Checkpoint 8

2026-05-12 22:41:03 +01:00
parent a01a5c9cef
commit 5c2ee4bba5
31 changed files with 3189 additions and 380 deletions
@@ -0,0 +1,84 @@
+"""Benchmark LiDAR perception improvements.
+
+Measures success rate, mean steps, and tracker quality metrics for
+demo collection across multiple seeds. Compares configurations.
+
+Usage::
+
+    python -m tools.benchmark_lidar --n-sheep 5 --seeds 15
+    HERDING_WORLD=field_round python -m tools.benchmark_lidar --n-sheep 5
+"""
+
+from __future__ import annotations
+
+import argparse
+import time
+from collections import Counter
+
+from training.bc.collect import collect_one
+from herding.control.universal import compute_action
+
+
+def run_benchmark(n_sheep: int, n_seeds: int, max_steps: int = 100000,
+                  drive_mode: str = "differential"):
+    results = []
+    t0 = time.time()
+    for seed in range(n_seeds):
+        obs, actions, success, steps = collect_one(
+            n_sheep, seed, max_steps, 5, compute_action,
+            frame_stack=1, privileged=False, drive_mode=drive_mode,
+        )
+        results.append({
+            "seed": seed,
+            "success": success,
+            "steps": steps,
+            "logged": len(obs),
+        })
+        tag = "+" if success else "x"
+        print(f"  [{tag}] seed={seed:>2d}  steps={steps:>6d}")
+    elapsed = time.time() - t0
+
+    successes = [r for r in results if r["success"]]
+    failures = [r for r in results if not r["success"]]
+    n_ok = len(successes)
+    rate = 100.0 * n_ok / len(results)
+
+    mean_steps_ok = (sum(r["steps"] for r in successes) / n_ok) if n_ok else 0
+    mean_steps_all = sum(r["steps"] for r in results) / len(results)
+
+    print(f"\n  Results: {n_ok}/{len(results)} success ({rate:.0f}%)")
+    print(f"  Mean steps (success): {mean_steps_ok:>8.0f}")
+    print(f"  Mean steps (all):     {mean_steps_all:>8.0f}")
+    print(f"  Elapsed: {elapsed:.0f}s")
+    return {
+        "n_sheep": n_sheep,
+        "n_seeds": n_seeds,
+        "success_rate": rate,
+        "n_success": n_ok,
+        "mean_steps_success": mean_steps_ok,
+        "mean_steps_all": mean_steps_all,
+        "elapsed_s": elapsed,
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--n-sheep", type=int, default=5)
+    parser.add_argument("--seeds", type=int, default=15)
+    parser.add_argument("--max-steps", type=int, default=100000)
+    parser.add_argument("--drive-mode", default="differential",
+                        choices=["differential", "mecanum"])
+    args = parser.parse_args()
+
+    from herding.world.geometry import FIELD_SHAPE
+    print(f"[bench] world={FIELD_SHAPE}  n_sheep={args.n_sheep}  "
+          f"seeds={args.seeds}  drive={args.drive_mode}")
+    print()
+    result = run_benchmark(args.n_sheep, args.seeds, args.max_steps,
+                           args.drive_mode)
+    print()
+    print("[bench] summary:", result)
+
+
+if __name__ == "__main__":
+    main()
@@ -5,38 +5,109 @@
 # then execs Webots on it.
 #
 # Usage:
-#   tools/run_webots.sh [N] [MODE]
-#     N    : number of active sheep (1..10), default 10
-#     MODE : "bc" | "rl" | "strombom" | "sequential", default "bc"
+#   tools/run_webots.sh [N] [MODE] [DRIVE] [WORLD]
+#     N     : number of active sheep (1..10), default 10
+#     MODE  : "bc" | "rl" | "strombom" | "sequential", default "bc"
+#     DRIVE : "differential" | "mecanum", default "differential"
+#     WORLD : base world name (without .wbt), default "field"
+#             Supported: "field" (rectangular), "field_round" (circular)
 #
 # Examples:
-#   tools/run_webots.sh 10 bc         # behaviour-cloned MLP, 10 sheep
-#   tools/run_webots.sh 10 rl         # KL-PPO fine-tune of bc, 10 sheep
-#   tools/run_webots.sh 5 sequential  # single-target analytic baseline
-#   tools/run_webots.sh 3 strombom    # canonical Strömbom analytic
+#   tools/run_webots.sh 10 bc                     # behaviour-cloned MLP, diff drive
+#   tools/run_webots.sh 10 rl mecanum             # KL-PPO fine-tune, mecanum wheels
+#   tools/run_webots.sh 5 sequential field_round  # analytic baseline, round field
+#   tools/run_webots.sh 3 strombom mecanum field_round  # Strömbom, mecanum, round
 #
 # Notes:
 # * bc loads training/runs/bc/policy.zip, rl loads training/runs/rl.
 #   Override via HERDING_POLICY_DIR=/path/to/run env var.
 # * Conda env "tir" must be active (provides stable-baselines3 + torch).
+#
+# Headless-ish (no 3D view, fast sim, no modal dialogs):
+#   WEBOTS_HEADLESS=1 make webots N=10 MODE=rl DRIVE=mecanum
+#   WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
+# This passes --no-rendering --minimize --mode=fast --batch to webots.
+# Webots still needs a display (Qt); on a machine without one use e.g.:
+#   xvfb-run -a env WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
+# Optional extra CLI tokens (space-separated):
+#   WEBOTS_EXTRA_ARGS="--stdout --stderr" WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl

 set -e
 N=${1:-10}
 MODE=${2:-bc}
+DRIVE=${3:-differential}
+WORLD=${4:-field}

 if (( N < 1 || N > 10 )); then
    echo "N must be 1..10, got $N" >&2; exit 1
 fi
 case "$MODE" in
-    bc|rl|strombom|sequential) ;;
-    *) echo "MODE must be bc|rl|strombom|sequential, got '$MODE'" >&2; exit 1 ;;
+    bc|rl|strombom|sequential|universal) ;;
+    *) echo "MODE must be bc|rl|strombom|sequential|universal, got '$MODE'" >&2; exit 1 ;;
+esac
+case "$DRIVE" in
+    differential|mecanum) ;;
+    *) echo "DRIVE must be differential|mecanum, got '$DRIVE'" >&2; exit 1 ;;
 esac

 ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
-SRC="$ROOT/worlds/field.wbt"
-DST="$ROOT/worlds/field_test.wbt"
+SRC="$ROOT/worlds/${WORLD}.wbt"
+if [[ ! -f "$SRC" ]]; then
+    echo "World file not found: $SRC" >&2; exit 1
+fi
+DST="$ROOT/worlds/${WORLD}_test.wbt"
+
+if [[ -n "${HERDING_POLICY_DIR:-}" ]]; then
+    RESOLVED_POLICY_DIR="$HERDING_POLICY_DIR"
+else
+    # Try drive-mode-specific path first, then legacy path.
+    if [[ "$MODE" == "rl" ]]; then
+        DRIVED="$ROOT/training/runs/rl_${DRIVE}"
+        LEGACY="$ROOT/training/runs/rl"
+    else
+        DRIVED="$ROOT/training/runs/bc_${DRIVE}"
+        LEGACY="$ROOT/training/runs/bc"
+    fi
+    if [[ -d "$DRIVED" ]]; then
+        RESOLVED_POLICY_DIR="$DRIVED"
+    else
+        RESOLVED_POLICY_DIR="$LEGACY"
+    fi
+fi

 cp "$SRC" "$DST"
+
+# Swap robot proto based on drive mode.
+# Base worlds reference ShepherdDog (diff-drive). For mecanum we swap in
+# ShepherdDogMecanum and inject mecanum contact properties.
+if [[ "$DRIVE" == "mecanum" ]]; then
+    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
+    sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
+    # Inject mecanum contact properties after the existing contactProperties block.
+    python3 -c "
+import re, sys
+with open(sys.argv[1], 'r') as f:
+    txt = f.read()
+# Find the closing ']' of contactProperties and insert before it.
+mec = '''
+    ContactProperties {
+      material1 \"MecanumWheel\"
+      coulombFriction [
+        2
+      ]
+      bounce 0
+      forceDependentSlip [
+        10
+      ]
+      softCFM 0.0001
+    }'''
+# Insert before the first ']' that closes contactProperties [...]
+txt = re.sub(r'(contactProperties\s*\[[^\]]*)(\])', r'\1' + mec + r'\2', txt, count=1)
+with open(sys.argv[1], 'w') as f:
+    f.write(txt)
+" "$DST"
+fi
+
 # Comment out sheep N+1..10 by prefixing the matching Sheep { ... } line.
 for i in $(seq $((N+1)) 10); do
    sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST"
@@ -46,20 +117,24 @@ active=$(grep -c '^Sheep' "$DST")
 echo "------------------------------------------------------------"
 echo "World      : $DST"
 echo "Mode       : $MODE"
+echo "Drive      : $DRIVE"
 echo "Sheep      : $active active"
-echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
+echo "Policy dir : $RESOLVED_POLICY_DIR"
 echo "------------------------------------------------------------"

 # Webots strips HERDING_* env vars from controller subprocesses in some
 # setups, so we also write a runtime config file the controller reads.
-RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
 cat > "$ROOT/herding_runtime.cfg" <<EOF
 HERDING_MODE=$MODE
 HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
+HERDING_DRIVE=$DRIVE
+HERDING_WORLD=$WORLD
 EOF

 export HERDING_MODE="$MODE"
 export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
+export HERDING_DRIVE="$DRIVE"
+export HERDING_WORLD="$WORLD"

 # The controller writes this sentinel when all GT sheep are penned. We
 # poll for it and kill Webots so the run finishes cleanly instead of
@@ -68,7 +143,14 @@ DONE_FILE="$ROOT/training/.run_done"
 mkdir -p "$(dirname "$DONE_FILE")"
 rm -f "$DONE_FILE"

-webots "$DST" &
+if [[ "${WEBOTS_HEADLESS:-}" == "1" ]]; then
+    echo "[run_webots] headless flags: --no-rendering --minimize --mode=fast --batch"
+    # shellcheck disable=SC2086
+    webots --no-rendering --minimize --mode=fast --batch ${WEBOTS_EXTRA_ARGS:-} "$DST" &
+else
+    # shellcheck disable=SC2086
+    webots ${WEBOTS_EXTRA_ARGS:-} "$DST" &
+fi
 WEBOTS_PID=$!

 cleanup() {