Checkpoint 4

2026-05-11 00:42:52 +01:00
parent 2a6db038df
commit 6688325d89
26 changed files with 2018 additions and 503 deletions
@@ -26,12 +26,16 @@ if _PROJECT_ROOT not in sys.path:

 import numpy as np

+from herding.active_scan import ActiveScanTeacher
 from herding.geometry import PEN_ENTRY
 from herding.sequential import compute_action as sequential_action
 from herding.strombom import compute_action as strombom_action
 from training.herding_env import HerdingEnv


+# Base analytic teachers (no scanning). The default at demo-collection
+# time wraps these in ActiveScanTeacher, which under LiDAR makes the
+# teacher operate on the same partial obs as the student.
 TEACHERS = {
    "sequential": sequential_action,
    "strombom": strombom_action,
@@ -39,19 +43,34 @@ TEACHERS = {


 def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
-                teacher_fn):
+                teacher_fn, frame_stack: int = 1, privileged: bool = False):
    env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
-                    difficulty=1.0, seed=seed)
+                    difficulty=1.0, seed=seed, frame_stack=frame_stack)
    obs, _ = env.reset(seed=seed)
    obs_list, action_list = [], []
+    # Active-scan wrapper: scan first, then run the base teacher on the
+    # tracker dict. Reset state per episode so the opening scan kicks in.
+    scan_teacher = ActiveScanTeacher(teacher_fn)
    for step in range(max_steps):
-        positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
-                     for i in range(env.n_sheep) if not env.sheep_penned[i]}
-        if not positions:
-            break
-        vx, vy, _mode = teacher_fn(
-            (env.dog_x, env.dog_y), positions, PEN_ENTRY,
-        )
+        if privileged:
+            # Asymmetric "learning by cheating": teacher reads GT, student
+            # gets LiDAR obs. Kept available for ablation; default off.
+            positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
+                         for i in range(env.n_sheep) if not env.sheep_penned[i]}
+            if not positions:
+                break
+            vx, vy, _mode = teacher_fn(
+                (env.dog_x, env.dog_y), positions, PEN_ENTRY,
+            )
+        else:
+            # Matched-perception teacher: it sees what the student sees
+            # (the tracker dict), with active scanning to fill the
+            # tracker before driving.
+            positions = env.perceived_positions()
+            vx, vy, _mode = scan_teacher(
+                (env.dog_x, env.dog_y), env.dog_heading,
+                positions, PEN_ENTRY,
+            )
        action = np.array([vx, vy], dtype=np.float32)
        if step % subsample == 0:
            obs_list.append(obs.copy())
@@ -81,6 +100,14 @@ def main():
    parser.add_argument("--teacher", default="sequential",
                        choices=list(TEACHERS.keys()),
                        help="Which analytic teacher to demonstrate.")
+    parser.add_argument("--frame-stack", type=int, default=1,
+                        help="K — concatenate the last K env obs into a "
+                             "single (32·K)-D vector. Lets a memoryless "
+                             "MLP recover temporal info under partial "
+                             "LiDAR observability.")
+    parser.add_argument("--privileged", action="store_true",
+                        help="Teacher reads ground truth (asymmetric BC). "
+                             "Default: matched-perception with active scan.")
    args = parser.parse_args()
    teacher_fn = TEACHERS[args.teacher]
    print(f"[demos] teacher: {args.teacher}")
@@ -97,6 +124,7 @@ def main():
        for seed in range(args.seeds_per_n):
            obs, actions, success, total_steps = collect_one(
                n, seed, args.max_steps, args.subsample, teacher_fn,
+                frame_stack=args.frame_stack, privileged=args.privileged,
            )
            n_total += 1
            if success: