Checkpoint 3

2026-05-10 12:46:14 +01:00
parent 1bb9415414
commit 2a6db038df
16 changed files with 305 additions and 662 deletions
@@ -1,22 +0,0 @@
-"""
-Viewpoint inspector — prints position, orientation and FOV to the console
-once per second.  Attach as the controller of a dummy supervisor robot to
-copy-paste exact camera values into field.wbt.
-"""
-
-from controller import Supervisor
-
-robot    = Supervisor()
-timestep = int(robot.getBasicTimeStep())
-vp       = robot.getFromDef("VIEWPOINT")
-
-step = 0
-while robot.step(timestep) != -1:
-    if step % 60 == 0:
-        pos = vp.getField("position").getSFVec3f()
-        ori = vp.getField("orientation").getSFRotation()
-        fov = vp.getField("fieldOfView").getSFFloat()
-        print(f"position:    {pos[0]:.3f} {pos[1]:.3f} {pos[2]:.3f}")
-        print(f"orientation: {ori[0]:.3f} {ori[1]:.3f} {ori[2]:.3f} {ori[3]:.3f}")
-        print(f"fieldOfView: {fov:.3f}\n")
-    step += 1
@@ -27,11 +27,19 @@ if _PROJECT_ROOT not in sys.path:
 import numpy as np

 from herding.geometry import PEN_ENTRY
-from herding.sequential import compute_action
+from herding.sequential import compute_action as sequential_action
+from herding.strombom import compute_action as strombom_action
 from training.herding_env import HerdingEnv


-def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int):
+TEACHERS = {
+    "sequential": sequential_action,
+    "strombom": strombom_action,
+}
+
+
+def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
+                teacher_fn):
    env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
                    difficulty=1.0, seed=seed)
    obs, _ = env.reset(seed=seed)
@@ -41,7 +49,7 @@ def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int):
                     for i in range(env.n_sheep) if not env.sheep_penned[i]}
        if not positions:
            break
-        vx, vy, _mode = compute_action(
+        vx, vy, _mode = teacher_fn(
            (env.dog_x, env.dog_y), positions, PEN_ENTRY,
        )
        action = np.array([vx, vy], dtype=np.float32)
@@ -70,7 +78,12 @@ def main():
                        help="Keep every Nth (obs, action) pair.")
    parser.add_argument("--keep-failures", action="store_true",
                        help="Include partial-success trajectories. Default off.")
+    parser.add_argument("--teacher", default="sequential",
+                        choices=list(TEACHERS.keys()),
+                        help="Which analytic teacher to demonstrate.")
    args = parser.parse_args()
+    teacher_fn = TEACHERS[args.teacher]
+    print(f"[demos] teacher: {args.teacher}")

    n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
    print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
@@ -83,7 +96,7 @@ def main():
    for n in n_sheep_list:
        for seed in range(args.seeds_per_n):
            obs, actions, success, total_steps = collect_one(
-                n, seed, args.max_steps, args.subsample,
+                n, seed, args.max_steps, args.subsample, teacher_fn,
            )
            n_total += 1
            if success:
@@ -15,7 +15,7 @@
 #   tools/run_webots.sh 3 strombom    # canonical baseline, 3 sheep
 #
 # Notes:
-# * The RL mode loads training/runs/bc_pretrained/policy.zip by default.
+# * The RL mode loads training/runs/bc_solo/policy.zip by default.
 #   Override via HERDING_POLICY_DIR=/path/to/run env var.
 # * Conda env "tir" must be active (provides stable-baselines3 + torch).

@@ -46,12 +46,12 @@ echo "------------------------------------------------------------"
 echo "World      : $DST"
 echo "Mode       : $MODE"
 echo "Sheep      : $active active"
-echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_pretrained}"
+echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_solo}"
 echo "------------------------------------------------------------"

 # Webots strips HERDING_* env vars from controller subprocesses in some
 # setups, so we also write a runtime config file the controller reads.
-RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_pretrained}"
+RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_solo}"
 cat > "$ROOT/herding_runtime.cfg" <<EOF
 HERDING_MODE=$MODE
 HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR