From fce0e0c786e5e0afd1b0274ea47cb6d6ade4b425 Mon Sep 17 00:00:00 2001 From: Johnny Fernandes Date: Mon, 11 May 2026 10:35:48 +0100 Subject: [PATCH] Checkpoint 6 --- README.md | 76 ++-- controllers/sheep/flocking.py | 4 +- controllers/sheep/sheep.py | 6 +- controllers/shepherd_dog/shepherd_dog.py | 352 +++++------------- herding/control/__init__.py | 0 herding/{ => control}/active_scan.py | 2 +- herding/{control.py => control/modulation.py} | 0 herding/{ => control}/sequential.py | 2 +- herding/{ => control}/strombom.py | 2 +- herding/obs.py | 2 +- herding/perception/__init__.py | 0 herding/{ => perception}/lidar_perception.py | 4 +- herding/{ => perception}/lidar_sim.py | 0 herding/{ => perception}/sheep_tracker.py | 2 +- herding/world/__init__.py | 0 herding/{ => world}/diffdrive.py | 0 herding/{ => world}/flocking_sim.py | 2 +- herding/{ => world}/geometry.py | 0 tests/__init__.py | 0 {training => tests}/parity_test.py | 4 +- tools/auto_dagger.sh | 166 --------- tools/collect_demos.py | 8 +- tools/dagger_merge_train.py | 135 ------- tools/run_webots.sh | 22 +- training/README.md | 89 ++--- training/eval.py | 6 +- training/herding_env.py | 14 +- 27 files changed, 194 insertions(+), 704 deletions(-) create mode 100644 herding/control/__init__.py rename herding/{ => control}/active_scan.py (98%) rename herding/{control.py => control/modulation.py} (100%) rename herding/{ => control}/sequential.py (98%) rename herding/{ => control}/strombom.py (98%) create mode 100644 herding/perception/__init__.py rename herding/{ => perception}/lidar_perception.py (98%) rename herding/{ => perception}/lidar_sim.py (100%) rename herding/{ => perception}/sheep_tracker.py (99%) create mode 100644 herding/world/__init__.py rename herding/{ => world}/diffdrive.py (100%) rename herding/{ => world}/flocking_sim.py (99%) rename herding/{ => world}/geometry.py (100%) create mode 100644 tests/__init__.py rename {training => tests}/parity_test.py (96%) delete mode 100755 tools/auto_dagger.sh delete mode 100644 tools/dagger_merge_train.py diff --git a/README.md b/README.md index d1da54b..f03254c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ gate into an external pen. The dog has three deployable modes: | `rl` | KL-regularised PPO fine-tune of `bc` | Reward-driven refinement | `sequential` (single-target pin-and-push) is kept as an alternative -analytic baseline. `dagger` is a data-collection mode, not deployment. +analytic baseline. ## Perception @@ -28,13 +28,13 @@ control step: (`herding/sheep_tracker.py`). **LiDAR validation** (intermediate-goal item v from `docs/project.md`): -run the dog controller in `HERDING_MODE=diag` mode to capture 80 -real Webots scans plus the ground-truth sheep positions in -`training/dagger/diag_.npz`. Comparing detections against GT in -that file showed clustered centroids match GT positions within 0.15 m -after the +SHEEP_RADIUS surface-to-centre correction — i.e. the -LiDAR pipeline produces correct sheep-position estimates from the -real Webots scan, validating the sensor for the herding task. +during development a diagnostic-dump controller captured 80 real +Webots scans plus the ground-truth sheep positions. Comparing +detections against GT showed clustered centroids match GT positions +within 0.15 m after the +SHEEP_RADIUS surface-to-centre correction — +i.e. the LiDAR pipeline produces correct sheep-position estimates +from the real Webots scan, validating the sensor for the herding +task. The tracker outputs a `{name: (x, y)}` dict shaped exactly like the prior receiver-based one, so Strömbom, Sequential, and the BC obs @@ -53,7 +53,7 @@ Privileged ground-truth perception is available for ablation — pip install -r training/requirements.txt # 2. Smoke test -python -m training.parity_test +python -m tests.parity_test # 3. Reproduce the BC policy (~10 min on CPU: ~5 min demos + ~3 min BC) python -m tools.collect_demos --teacher strombom \ @@ -61,21 +61,17 @@ python -m tools.collect_demos --teacher strombom \ python -m training.bc_pretrain --demos training/demos.npz \ --out training/runs/bc --epochs 60 --net-arch 512,512 -# 4. Optional: DAgger from inside Webots if sim-trained doesn't transfer -tools/auto_dagger.sh 3 60 -python -m tools.dagger_merge_train --out training/runs/bc_dagger - -# 5. Evaluate (env) -python -m training.eval --policy training/runs/bc \ - --max-flock 10 --max-steps 8000 --n-seeds 5 - -# 6. Optional RL fine-tune of the BC policy (~40 min on CPU, 1 M steps) +# 4. KL-PPO fine-tune of the BC policy (~30 min on CPU, 1 M steps) python -m training.train_ppo \ --bc training/runs/bc \ --out training/runs/rl \ --total-timesteps 1000000 -# 7. Run in Webots +# 5. Evaluate (env) +python -m training.eval --policy training/runs/rl \ + --max-flock 10 --max-steps 15000 --n-seeds 10 + +# 6. Run in Webots tools/run_webots.sh 10 bc # behaviour-cloned MLP tools/run_webots.sh 10 rl # KL-PPO fine-tune tools/run_webots.sh 10 strombom # analytic baseline @@ -84,22 +80,25 @@ tools/run_webots.sh 10 strombom # analytic baseline ## Layout ``` -herding/ — single source of truth (env + Webots both import) - geometry.py — field/pen constants, robot specs - flocking_sim.py — Reynolds-style sheep dynamics - diffdrive.py — differential-drive kinematics - control.py — shared near-sheep speed-modulation helper +herding/ — perception / control / world primitives obs.py — 32-D order-invariant observation builder - strombom.py — canonical CoM-drive teacher - sequential.py — single-target "pin-and-push" teacher - active_scan.py — wraps a base teacher with opening rotation + - walk-to-centre + speed modulation - lidar_sim.py — fast 2D raycast for the env (sheep + walls + posts) - lidar_perception.py — scan → world-frame cluster centroids + filters - sheep_tracker.py — multi-target NN tracker with FOV memory + world/ — environment-side physics & geometry + geometry.py field/pen constants, robot specs + diffdrive.py differential-drive kinematics + flocking_sim.py Reynolds + Strömbom 2014 sheep dynamics + perception/ — LiDAR → tracked-sheep pipeline + lidar_sim.py fast 2D raycast for the env + lidar_perception.py scan → world-frame cluster centroids + filters + sheep_tracker.py multi-target NN tracker with FOV memory + control/ — every dog mode's action source + strombom.py canonical CoM collect/drive heuristic + sequential.py single-target "pin-and-push" alternative + active_scan.py wraps a base teacher with opening rotation + + walk-to-centre fallback + modulation.py shared near-sheep speed-modulation helper controllers/ - sheep/sheep.py — Webots sheep controller (uses herding.flocking_sim) + sheep/sheep.py — Webots sheep controller (uses herding.world.flocking_sim) shepherd_dog/ shepherd_dog.py — Webots dog controller, mode-switched policy_loader.py — lazy SB3 policy loader (auto-detects frame stack) @@ -107,16 +106,17 @@ controllers/ training/ herding_env.py — Gymnasium env (LiDAR + tracker by default) bc_pretrain.py — supervised BC of (obs, action) demos into MLP - eval.py — analytic + BC policy comparison harness - parity_test.py — shape / determinism smoke test + train_ppo.py — KL-regularised PPO fine-tune of BC + eval.py — analytic + learned policy comparison harness runs/ — checkpoints (whitelisted in .gitignore) requirements.txt +tests/ + parity_test.py — shape / determinism / baseline smoke test + tools/ collect_demos.py — sim demos via the active-scan teacher - dagger_merge_train.py — merge Webots-collected DAgger demos and retrain run_webots.sh — launch Webots with N sheep + chosen mode - auto_dagger.sh — headless DAgger collection across many runs worlds/ field.wbt — main world (3 m gate, external pen) @@ -127,8 +127,8 @@ docs/project.md — original project goals ## Shared low-level control -Every dog mode (RL, Strömbom, Sequential, the DAgger teacher) routes -its action through `herding/control.py:modulate_speed_near_sheep`, +Every dog mode (Strömbom, Sequential, BC, RL) routes its action +through `herding/control/modulation.py:modulate_speed_near_sheep`, which scales action magnitude down when within ~2.5 m of the nearest tracked sheep. This stops the dog from charging in at full speed and scattering the flock. Direction (intent) is preserved. diff --git a/controllers/sheep/flocking.py b/controllers/sheep/flocking.py index eb26f4f..f12af07 100644 --- a/controllers/sheep/flocking.py +++ b/controllers/sheep/flocking.py @@ -11,14 +11,14 @@ _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", "..")) if _PROJECT_ROOT not in sys.path: sys.path.insert(0, _PROJECT_ROOT) -from herding.flocking_sim import ( # noqa: F401 +from herding.world.flocking_sim import ( # noqa: F401 MAX_SPEED, FLEE_SPEED, WANDER_SPEED, WALL_MARGIN, WALL_HARD_MARGIN, WALL_HARD_GAIN, FLEE_DIST, SEPARATION_DIST, COHESION_DIST, PEN_MARGIN, compute_heading_speed, ) -from herding.geometry import ( # noqa: F401 +from herding.world.geometry import ( # noqa: F401 FIELD_X, FIELD_Y, PEN_X, PEN_Y, in_pen, ) diff --git a/controllers/sheep/sheep.py b/controllers/sheep/sheep.py index 9ce6b75..7db8369 100644 --- a/controllers/sheep/sheep.py +++ b/controllers/sheep/sheep.py @@ -24,9 +24,9 @@ if _PROJECT_ROOT not in sys.path: from controller import Supervisor -from herding.diffdrive import heading_speed_to_wheels -from herding.flocking_sim import MAX_SPEED, compute_heading_speed -from herding.geometry import ( +from herding.world.diffdrive import heading_speed_to_wheels +from herding.world.flocking_sim import MAX_SPEED, compute_heading_speed +from herding.world.geometry import ( SHEEP_MAX_WHEEL_OMEGA, is_penned_position, ) diff --git a/controllers/shepherd_dog/shepherd_dog.py b/controllers/shepherd_dog/shepherd_dog.py index a6d466b..1823255 100644 --- a/controllers/shepherd_dog/shepherd_dog.py +++ b/controllers/shepherd_dog/shepherd_dog.py @@ -4,52 +4,39 @@ Mode is selected by ``HERDING_MODE`` (env var, or via the ``herding_runtime.cfg`` file the launcher writes since Webots strips env vars on some setups): - strombom → canonical Strömbom collect/drive heuristic. - sequential → single-target "pin and push" — drives the sheep - closest to the pen. - bc → behaviour-cloned MLP, trained on Strömbom demos via - sim. Default policy directory: training/runs/bc. - rl → KL-regularised PPO fine-tune of the BC policy. Same - obs/action space as bc; refines time-to-pen via - environment reward while staying anchored to bc. - Default policy directory: training/runs/rl. - dagger → DAgger data collection. Reads sheep ground-truth - via the receiver, computes the active-scan teacher's - recommended action at every step, drives with either - the teacher (HERDING_DAGGER_DRIVER=teacher, default) - or the loaded student (=student), and logs each - (lidar_stacked_obs, teacher_action) pair. On exit - dumps to ``training/dagger/dagger_.npz`` for - ``tools.dagger_merge_train`` to consume. + strombom → canonical Strömbom (2014) collect/drive heuristic + wrapped in ActiveScanTeacher (opening rotation + + walk-to-centre when the tracker briefly empties). + sequential → single-target "pin-and-push", same wrapper. + bc → behaviour-cloned MLP, trained on Strömbom demos. + Default policy: training/runs/bc/policy.zip. + rl → KL-regularised PPO fine-tune of bc. Same obs/action + space as bc; refines time-to-pen via reward while + staying anchored to bc. + Default policy: training/runs/rl/policy.zip. Sheep perception ---------------- -The dog now perceives sheep through its **front-mounted 140° LiDAR** -(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step -the controller: +The dog perceives sheep through its **front-mounted 140° LiDAR** +(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step: 1. Reads ``lidar.getRangeImage()``. - 2. Runs ``herding.lidar_perception.detections_from_scan`` to cluster - returns into world-frame ``(x, y)`` sheep estimates. - 3. Folds those into a ``herding.sheep_tracker.SheepTracker`` which - maintains last-seen positions for sheep currently out of the - FOV and latches "penned" once a track disappears near the gate. + 2. Runs ``herding.perception.lidar_perception.detections_from_scan`` + to cluster returns into world-frame ``(x, y)`` sheep estimates. + 3. Folds those into a ``SheepTracker`` which maintains last-seen + positions for sheep currently out of FOV and latches "penned" + once a track crosses the gate plane south. -The output of step 3 is a ``{name: (x, y)}`` dict shaped exactly like -the receiver-based one we used to consume — so Strömbom, Sequential -and the BC obs builder run unchanged. The sheep→dog Emitter/Receiver -link is still up (kept passively for compatibility) but its messages -are *not* used for control. +Sheep ``emitter`` messages are read **for diagnostic logging only** +(GT_penned counter + auto-finish sentinel); they are never used to +drive the policy. Perception for control comes entirely from LiDAR. -All modes share the same low-level differential-drive controller -(``herding.diffdrive.velocity_to_wheels`` with cos(err)-clamped forward -speed), so switching modes does not retune actuation. - -A safety supervisor enforces the "dog stays out of the pen" invariant: -if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is -overridden with a north-driving correction. RL fallback: if the policy -zip can't be loaded (SB3 missing, file missing), the controller drops -to strombom mode automatically. +Auto-finish +----------- +When the dog observes (via GT, read off the receiver) that all sheep +are penned, it writes ``training/.run_done`` and the launcher +(``tools/run_webots.sh``) detects it and closes Webots. This keeps +batch evaluation runs bounded. """ import math @@ -62,26 +49,27 @@ _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", "..")) if _PROJECT_ROOT not in sys.path: sys.path.insert(0, _PROJECT_ROOT) +import numpy as np + from controller import Robot -from herding.active_scan import ActiveScanTeacher -from herding.control import modulate_speed_near_sheep -from herding.diffdrive import velocity_to_wheels -from herding.geometry import ( +from herding.control.active_scan import ActiveScanTeacher +from herding.control.modulation import modulate_speed_near_sheep +from herding.control.sequential import compute_action as sequential_action +from herding.control.strombom import compute_action as strombom_action +from herding.obs import build_obs +from herding.perception.lidar_perception import detections_from_scan +from herding.perception.sheep_tracker import SheepTracker +from herding.world.diffdrive import velocity_to_wheels +from herding.world.geometry import ( DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA, DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS, PEN_ENTRY, is_penned_position, ) -from herding.lidar_perception import detections_from_scan -from herding.obs import OBS_DIM, build_obs -from herding.sequential import compute_action_debug as sequential_action_debug -from herding.sheep_tracker import SheepTracker -from herding.strombom import compute_action as strombom_action -from herding.strombom import compute_action_debug as strombom_action_debug # --------------------------------------------------------------------------- -# Mode selection +# Mode + policy resolution # --------------------------------------------------------------------------- def _load_runtime_config(): @@ -122,8 +110,8 @@ def _resolve_policy_dir(mode: str) -> str: 1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points to a real directory. 2. Mode-specific default: - bc → training/runs/bc (Strömbom-imitated MLP) - rl → training/runs/rl (KL-PPO fine-tune of bc) + bc → training/runs/bc (Strömbom-imitated MLP) + rl → training/runs/rl (KL-PPO fine-tune of bc) 3. Fall back to bc. All checkpoints are frame-stacked K = 4; ``policy_loader`` reads the stacking factor from the policy's observation space. @@ -135,60 +123,41 @@ def _resolve_policy_dir(mode: str) -> str: mode_default = { "bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"), "rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"), - "dagger": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"), } primary = mode_default.get(mode, mode_default["bc"]) if os.path.isdir(primary): return primary - # Fall back to BC if the requested checkpoint isn't there yet - # (e.g., user asked for `rl` before training the fine-tune). fallback = mode_default["bc"] if os.path.isdir(fallback): return fallback return env_dir or primary -_VALID_MODES = ("bc", "rl", "strombom", "sequential", "dagger", "diag") -# Back-compat: an old config saying HERDING_MODE=rl meant "the BC policy". -# We now use `rl` strictly for the KL-PPO fine-tune. If the rl -# directory isn't present, _resolve_policy_dir below silently falls -# back to bc, preserving the old behaviour. +_VALID_MODES = ("bc", "rl", "strombom", "sequential") if MODE not in _VALID_MODES: print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.") MODE = "strombom" -DAGGER_DRIVER = (os.environ.get("HERDING_DAGGER_DRIVER") - or _runtime_cfg.get("HERDING_DAGGER_DRIVER") - or "teacher").lower() -if DAGGER_DRIVER not in ("teacher", "student"): - DAGGER_DRIVER = "teacher" - POLICY_DIR = _resolve_policy_dir(MODE) policy_handle = None -if MODE in ("bc", "rl", "dagger"): +if MODE in ("bc", "rl"): print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}") try: from policy_loader import load as _load_policy policy_handle = _load_policy(POLICY_DIR) print(f"[dog] policy loaded from {POLICY_DIR}") except Exception as exc: - if MODE in ("bc", "rl"): - print(f"[dog] policy load failed ({exc!r}); falling back to strombom.") - MODE = "strombom" - else: - # In dagger mode, no policy is fine if driver=teacher. - print(f"[dog] policy load failed ({exc!r}); dagger driver forced to teacher.") - policy_handle = None -print(f"[dog] running in mode={MODE}" - + (f" driver={DAGGER_DRIVER}" if MODE == "dagger" else "")) + print(f"[dog] policy load failed ({exc!r}); falling back to strombom.") + MODE = "strombom" +print(f"[dog] running in mode={MODE}") # --------------------------------------------------------------------------- -# Action smoothing + safety supervisor +# Control parameters # --------------------------------------------------------------------------- -ACTION_SMOOTH = 0.55 # was 0.35; bumped for less frame-to-frame action jitter -prev_action = (0.0, 0.0) +ACTION_SMOOTH = 0.55 # EMA on (vx, vy) — kills frame-to-frame jitter +RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done") def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple: @@ -202,10 +171,6 @@ def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple: return (vx, vy) -# --------------------------------------------------------------------------- -# Driving -# --------------------------------------------------------------------------- - def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float): if math.hypot(vx, vy) < 1e-3: left_motor.setVelocity(0.0) @@ -245,12 +210,9 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep) emitter = robot.getDevice("emitter") lidar = robot.getDevice("lidar"); lidar.enable(timestep) -# The receiver channel from sheep is no longer consumed for perception -# (kept enabled in case any peripheral tooling reads it). Sheep -# positions come exclusively from the LiDAR + tracker pipeline below. tracker = SheepTracker() -# Cosmetic ear motors — ignored by control logic but keep them animated. +# Cosmetic ear motors — animated; not used by control. left_ear = robot.getDevice("left ear motor") right_ear = robot.getDevice("right ear motor") left_ear.setPosition(float("inf")) @@ -266,75 +228,26 @@ EAR_RATE = 8.0 # Main loop # --------------------------------------------------------------------------- -# Active sheep positions come from the LiDAR-fed tracker each step; -# penned_set is the tracker's ``get_penned_set()`` call. We drain the -# receiver queue without consuming it, so the small backlog of sheep -# pings can't grow unbounded. -step_count = 0 +# Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay +# the import-time cost). Each gets the same ActiveScanTeacher treatment: +# rotate-on-empty, walk-to-centre, near-sheep speed modulation. +analytic_teacher = None +if MODE in ("strombom", "sequential"): + base_fn = strombom_action if MODE == "strombom" else sequential_action + analytic_teacher = ActiveScanTeacher(base_fn) -import atexit -import time -import numpy as _np - -# DAgger state ---------------------------------------------------------- -# Logged each step in dagger mode: (stacked_lidar_obs, teacher_action). -DAGGER_LOG_OBS: list = [] -DAGGER_LOG_ACT: list = [] -# Diagnostic mode buffer (one dict per step). -DIAG_BUF: list = [] -# Frame stack buffer the controller maintains itself when dagger mode is -# active — the stacked obs we log must match what the policy sees so the -# downstream BC consumes (stacked_obs, teacher_action) pairs cleanly. -_FRAME_STACK = (policy_handle.frame_stack if policy_handle is not None else 4) -_dagger_buffer: list = [] -# Active-scan teacher operates on GT (read from receiver). -_dagger_teacher = ActiveScanTeacher(strombom_action) if MODE == "dagger" else None -# GT positions accumulated from the receiver (sheep emit their xy each step). +# GT positions from sheep emitters — used **only** for the auto-finish +# sentinel and the GT_penned diagnostic line. Never fed into control. _gt_sheep: dict = {} +_run_done = False - -_DAGGER_RUN_TS = int(time.time()) # one file per controller run -_DAGGER_DUMPED = False -# Sentinel that the auto-collection script polls — empty file written -# when this controller decides the run is "done" (all sheep penned, by -# GT). The launcher then kills Webots and moves on without waiting out -# its timeout. Honoured only in dagger mode. -_DAGGER_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", "dagger", ".DONE") - - -def _dump_dagger_log(): - """Save accumulated (obs, teacher_action) pairs to disk on exit. - - Webots may SIGKILL the controller, so the loop also calls this every - DAGGER_FLUSH_STEPS so we lose at most a few seconds of data per run. - Idempotent — repeated calls overwrite the same file with the latest - accumulated buffer. - """ - global _DAGGER_DUMPED - if MODE != "dagger" or not DAGGER_LOG_OBS: - return - out_dir = os.path.join(_PROJECT_ROOT, "training", "dagger") - os.makedirs(out_dir, exist_ok=True) - out_path = os.path.join(out_dir, f"dagger_{_DAGGER_RUN_TS}.npz") - obs_arr = _np.stack(DAGGER_LOG_OBS).astype(_np.float32) - act_arr = _np.stack(DAGGER_LOG_ACT).astype(_np.float32) - _np.savez(out_path, obs=obs_arr, actions=act_arr) - if not _DAGGER_DUMPED: - print(f"[dog dagger] wrote {len(DAGGER_LOG_OBS)} pairs → {out_path}") - _DAGGER_DUMPED = True - - -DAGGER_FLUSH_STEPS = 500 - - -atexit.register(_dump_dagger_log) - +prev_action = (0.0, 0.0) +step_count = 0 while robot.step(timestep) != -1: step_count += 1 - # Drain receiver. In every mode we capture GT for the diagnostic - # log line — perception still comes from LiDAR, the GT is read-only. + # Drain sheep emitter messages → GT (diagnostic only). while receiver.getQueueLength() > 0: msg = receiver.getString() receiver.nextPacket() @@ -350,115 +263,28 @@ while robot.step(timestep) != -1: n = compass.getValues() dog_heading = math.atan2(n[0], n[1]) - # ---- LiDAR perception → tracker → sheep_positions dict ---- - ranges = _np.asarray(lidar.getRangeImage(), dtype=_np.float32) + # ---- LiDAR perception → tracker → active sheep positions ---- + ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32) detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading) sheep_positions = tracker.update(detections) - penned_set = tracker.get_penned_set() - # ---- Diagnostic mode: dump the first DIAG_STEPS scans + GT to disk. - if MODE == "diag": - DIAG_STEPS = 80 - if step_count <= DIAG_STEPS: - DIAG_BUF.append(dict( - step=step_count, - ranges=ranges.copy(), - dog_x=dog_xy[0], dog_y=dog_xy[1], dog_h=dog_heading, - gt_sheep=dict(_gt_sheep), - detections=list(detections), - )) - if step_count == DIAG_STEPS: - _diag_path = os.path.join(_PROJECT_ROOT, "training", "dagger", - f"diag_{int(time.time())}.npz") - os.makedirs(os.path.dirname(_diag_path), exist_ok=True) - _np.savez( - _diag_path, - ranges=_np.stack([d["ranges"] for d in DIAG_BUF]), - dog_xy=_np.array([[d["dog_x"], d["dog_y"]] for d in DIAG_BUF], - dtype=_np.float32), - dog_h=_np.array([d["dog_h"] for d in DIAG_BUF], dtype=_np.float32), - # Per-step GT serialised: max-pad to 10 sheep. - gt_xy=_np.array([ - [list(d["gt_sheep"].get(f"sheep{i}", (1e9, 1e9))) - for i in range(1, 11)] - for d in DIAG_BUF - ], dtype=_np.float32), - detections=_np.array([ - len(d["detections"]) for d in DIAG_BUF - ], dtype=_np.int32), - ) - print(f"[dog diag] wrote {DIAG_STEPS} scans → {_diag_path}") - - # Build the single-frame LiDAR obs (matches what the env produces). sheep_xy_list = list(sheep_positions.values()) sheep_penned_list = [False] * len(sheep_xy_list) single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list) - # Maintain our own frame stack so logged obs == what policy sees. - if not _dagger_buffer: - _dagger_buffer = [single_obs.copy() for _ in range(_FRAME_STACK)] - else: - _dagger_buffer.append(single_obs) - if len(_dagger_buffer) > _FRAME_STACK: - _dagger_buffer = _dagger_buffer[-_FRAME_STACK:] - stacked_obs = _np.concatenate(_dagger_buffer, axis=0).astype(_np.float32) # ---- Action selection ---- - if MODE == "diag": - # Diagnostic mode: rotate in place so the captured scans cover - # all 360° of view from one position. Target = heading + π → - # cos(err) clamps forward to ~0, the dog spins. - _t = dog_heading + math.pi - vx, vy = math.cos(_t), math.sin(_t) - elif MODE == "dagger": - # Teacher: active-scan + Strömbom on GT (active sheep only). - gt_active = {name: xy for name, xy in _gt_sheep.items() - if not is_penned_position(xy[0], xy[1])} - t_vx, t_vy, _mode_str = _dagger_teacher( - dog_xy, dog_heading, gt_active, PEN_ENTRY, - ) - # Student (if a policy is loaded). - s_vx, s_vy = None, None - if policy_handle is not None: - action = policy_handle.predict(stacked_obs) - s_vx, s_vy = float(action[0]), float(action[1]) - # Drive selection. - if DAGGER_DRIVER == "student" and policy_handle is not None: - vx, vy = s_vx, s_vy - else: - vx, vy = t_vx, t_vy - # Always log the teacher action (this is the supervision signal). - DAGGER_LOG_OBS.append(stacked_obs.copy()) - DAGGER_LOG_ACT.append(_np.array([t_vx, t_vy], dtype=_np.float32)) - elif MODE in ("bc", "rl") and policy_handle is not None: - # Pass the single-frame obs; the policy_loader maintains its own - # frame stack internally. Both bc and rl use the same control - # interface — the only difference is which checkpoint loaded. + if MODE in ("bc", "rl") and policy_handle is not None: action = policy_handle.predict(single_obs) vx, vy = float(action[0]), float(action[1]) - elif MODE in ("strombom", "sequential"): - # Wrap the analytic teacher in ActiveScanTeacher so the dog - # rotates / walks-to-centre when the tracker briefly empties, - # instead of going idle. Without this wrapper, the first 2 s - # of LiDAR-blind operation kills the run because Strömbom and - # Sequential both return (0, 0) when there are no positions. - if "_analytic_teacher" not in globals(): - from herding.sequential import compute_action as sequential_action - _analytic_teacher = ActiveScanTeacher( - strombom_action if MODE == "strombom" else sequential_action - ) - vx, vy, _mode_str = _analytic_teacher( + else: + vx, vy, _mode_str = analytic_teacher( dog_xy, dog_heading, sheep_positions, PEN_ENTRY, ) - # Shared post-process: speed modulation near sheep. Applies to bc, - # rl, strombom, sequential — every mode where the action source is - # nominally unit-magnitude. In dagger mode the active-scan teacher - # has already modulated, and the diag mode action is hand-built for - # rotation; both skip. - if MODE not in ("dagger", "diag"): - vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions) + # Near-sheep speed modulation (shared by every mode). + vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions) - # EMA smoothing — reduces oscillation from policy or Strömbom flips. + # EMA smoothing — kills frame-to-frame action jitter. vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy @@ -469,7 +295,7 @@ while robot.step(timestep) != -1: drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX) emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}") - # Cosmetic ear wiggle — purely visual. + # Cosmetic ear wiggle. ear_phase += 0.12 ear_pos = EAR_AMPLITUDE * math.sin(ear_phase) left_ear.setVelocity(EAR_RATE) @@ -477,38 +303,26 @@ while robot.step(timestep) != -1: left_ear.setPosition(ear_pos) right_ear.setPosition(-ear_pos) - # --- Early-stop when all GT sheep are penned (all modes) --- - # The dog isn't a Supervisor so it can't call simulationQuit() — - # instead we write a sentinel file the launcher polls for and uses - # to kill the Webots process. Bounded by `_gt_sheep` so we don't + # Auto-finish: when all GT sheep are penned, write the sentinel. + # The launcher polls for it and closes Webots so batch evals don't + # hang after the task is done. Bounded by `_gt_sheep` so we don't # fire during the first few steps while the receiver fills. - if _gt_sheep and not os.path.exists(_DAGGER_DONE_FILE): - gt_active_count = sum(1 for x, y in _gt_sheep.values() - if not is_penned_position(x, y)) - if gt_active_count == 0: - if MODE == "dagger": - _dump_dagger_log() - os.makedirs(os.path.dirname(_DAGGER_DONE_FILE), exist_ok=True) - open(_DAGGER_DONE_FILE, "w").close() + if _gt_sheep and not _run_done: + gt_active = sum(1 for x, y in _gt_sheep.values() + if not is_penned_position(x, y)) + if gt_active == 0: + os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True) + open(RUN_DONE_FILE, "w").close() + _run_done = True print(f"[dog] all {len(_gt_sheep)} sheep penned at step " - f"{step_count} — wrote {_DAGGER_DONE_FILE}, " - f"launcher will close Webots") - - if MODE == "dagger" and step_count % DAGGER_FLUSH_STEPS == 0 and DAGGER_LOG_OBS: - _dump_dagger_log() + f"{step_count} — wrote sentinel, launcher will close Webots") if step_count % 200 == 0: gt_penned = sum(1 for x, y in _gt_sheep.values() if is_penned_position(x, y)) gt_total = len(_gt_sheep) - extra = "" - if MODE == "dagger": - extra = f" logged={len(DAGGER_LOG_OBS)}" print(f"[dog mode={MODE}] step={step_count} " f"GT_penned={gt_penned}/{gt_total} " f"tracks_active={tracker.n_active()} " f"tracks_penned={tracker.n_penned()} " - f"detections={len(detections)} action=({vx:+.2f}, {vy:+.2f}){extra}") - -# Loop ended (Webots told us to quit). Flush any remaining DAgger log. -_dump_dagger_log() + f"detections={len(detections)} action=({vx:+.2f}, {vy:+.2f})") diff --git a/herding/control/__init__.py b/herding/control/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/herding/active_scan.py b/herding/control/active_scan.py similarity index 98% rename from herding/active_scan.py rename to herding/control/active_scan.py index 68b24be..a378ce0 100644 --- a/herding/active_scan.py +++ b/herding/control/active_scan.py @@ -24,7 +24,7 @@ from __future__ import annotations import math -from herding.control import modulate_speed_near_sheep +from herding.control.modulation import modulate_speed_near_sheep INITIAL_SCAN_STEPS = 80 # ≈1.3 s at dt=16 ms — full rotation at the +π turn target. diff --git a/herding/control.py b/herding/control/modulation.py similarity index 100% rename from herding/control.py rename to herding/control/modulation.py diff --git a/herding/sequential.py b/herding/control/sequential.py similarity index 98% rename from herding/sequential.py rename to herding/control/sequential.py index 3fd1cf0..d694e2c 100644 --- a/herding/sequential.py +++ b/herding/control/sequential.py @@ -24,7 +24,7 @@ flock size and works up to at least n=10 within a 15 000-step budget. import math -from herding.geometry import GATE_Y, PEN_ENTRY, in_pen +from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen DELTA_DRIVE = 1.5 # standoff behind the target sheep diff --git a/herding/strombom.py b/herding/control/strombom.py similarity index 98% rename from herding/strombom.py rename to herding/control/strombom.py index 767da9b..1756d04 100644 --- a/herding/strombom.py +++ b/herding/control/strombom.py @@ -9,7 +9,7 @@ Reference: Strömbom et al. 2014, "Solving the shepherding problem". import math -from herding.geometry import PEN_ENTRY, GATE_Y, in_pen +from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen # Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from # the original (4.0 / 2.5) because the new external pen sits ~26 m from diff --git a/herding/obs.py b/herding/obs.py index 117cf4e..4bb7238 100644 --- a/herding/obs.py +++ b/herding/obs.py @@ -31,7 +31,7 @@ Layout (all components normalised so values stay roughly in [-1, 1]): import math import numpy as np -from herding.geometry import ( +from herding.world.geometry import ( FIELD_X, FIELD_Y, PEN_ENTRY, MAX_SHEEP, ) diff --git a/herding/perception/__init__.py b/herding/perception/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/herding/lidar_perception.py b/herding/perception/lidar_perception.py similarity index 98% rename from herding/lidar_perception.py rename to herding/perception/lidar_perception.py index eceb337..5607920 100644 --- a/herding/lidar_perception.py +++ b/herding/perception/lidar_perception.py @@ -29,8 +29,8 @@ import math import numpy as np -from herding.geometry import FIELD_X, FIELD_Y, GATE_Y, PEN_X, PEN_Y -from herding.lidar_sim import ( +from herding.world.geometry import FIELD_X, FIELD_Y, GATE_Y, PEN_X, PEN_Y +from herding.perception.lidar_sim import ( LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_N_RAYS, SHEEP_RADIUS, ray_angles, ) diff --git a/herding/lidar_sim.py b/herding/perception/lidar_sim.py similarity index 100% rename from herding/lidar_sim.py rename to herding/perception/lidar_sim.py diff --git a/herding/sheep_tracker.py b/herding/perception/sheep_tracker.py similarity index 99% rename from herding/sheep_tracker.py rename to herding/perception/sheep_tracker.py index 392810e..b225976 100644 --- a/herding/sheep_tracker.py +++ b/herding/perception/sheep_tracker.py @@ -26,7 +26,7 @@ from __future__ import annotations import math -from herding.geometry import MAX_SHEEP, in_pen, is_penned_position +from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position GATE_M = 2.5 # m — primary NN gate (recent tracks) diff --git a/herding/world/__init__.py b/herding/world/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/herding/diffdrive.py b/herding/world/diffdrive.py similarity index 100% rename from herding/diffdrive.py rename to herding/world/diffdrive.py diff --git a/herding/flocking_sim.py b/herding/world/flocking_sim.py similarity index 99% rename from herding/flocking_sim.py rename to herding/world/flocking_sim.py index b0d22aa..c0d593c 100644 --- a/herding/flocking_sim.py +++ b/herding/world/flocking_sim.py @@ -51,7 +51,7 @@ is a defensible engineering adaptation of Strömbom's qualitative import math import random -from herding.geometry import ( +from herding.world.geometry import ( FIELD_X, FIELD_Y, PEN_X, PEN_Y, GATE_X, diff --git a/herding/geometry.py b/herding/world/geometry.py similarity index 100% rename from herding/geometry.py rename to herding/world/geometry.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/training/parity_test.py b/tests/parity_test.py similarity index 96% rename from training/parity_test.py rename to tests/parity_test.py index 57b6c9d..3abf423 100644 --- a/training/parity_test.py +++ b/tests/parity_test.py @@ -21,9 +21,9 @@ if _PROJECT_ROOT not in sys.path: import numpy as np -from herding.geometry import MAX_SHEEP, PEN_ENTRY +from herding.world.geometry import MAX_SHEEP, PEN_ENTRY from herding.obs import OBS_DIM -from herding.strombom import compute_action +from herding.control.strombom import compute_action from training.herding_env import HerdingEnv diff --git a/tools/auto_dagger.sh b/tools/auto_dagger.sh deleted file mode 100755 index 4b58ee6..0000000 --- a/tools/auto_dagger.sh +++ /dev/null @@ -1,166 +0,0 @@ -#!/bin/bash -# tools/auto_dagger.sh — automated DAgger collection across many headless -# Webots runs. -# -# For each (flock_size, run_index) combination, generates a world with N -# active sheep at randomised positions, launches Webots in fast/headless -# mode, lets the controller log (lidar_obs, teacher_action) pairs for up -# to RUN_SEC seconds, kills the run, and moves on. The dog controller's -# 500-step periodic flush means each run produces a complete .npz even -# when killed by timeout. -# -# Usage: -# tools/auto_dagger.sh [RUNS_PER_FLOCK] [SECONDS_PER_RUN] -# RUNS_PER_FLOCK : how many randomised runs per flock size (default 3) -# SECONDS_PER_RUN: wall-clock cap per Webots run (default 60) -# -# Env-var overrides: -# HERDING_POLICY_DIR : policy the controller loads (only used when -# HERDING_DAGGER_DRIVER=student). Default bc. -# HERDING_DAGGER_DRIVER : "teacher" (default) or "student". -# HEADLESS=1 : force --no-rendering (default on). -# FLOCKS="1 3 5 8 10" : space-separated flock sizes to iterate over. -# -# Output: -# training/dagger/dagger_.npz — one per Webots run. -# -# After collection, run: -# python -m tools.dagger_merge_train --out training/runs/bc_dagger - -set -e - -RUNS_PER_FLOCK=${1:-3} -RUN_SEC=${2:-60} -FLOCKS=${FLOCKS:-"1 3 5 8 10"} -HEADLESS=${HEADLESS:-1} - -ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" -SRC="$ROOT/worlds/field.wbt" -DST="$ROOT/worlds/field_test.wbt" -POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}" -DRIVER="${HERDING_DAGGER_DRIVER:-teacher}" -DONE_FILE="$ROOT/training/dagger/.DONE" -WEBOTS_PID="" - -cleanup() { - echo "Caught interrupt — killing Webots (pid=$WEBOTS_PID) and exiting." - [[ -n "$WEBOTS_PID" ]] && kill "$WEBOTS_PID" 2>/dev/null - wait "$WEBOTS_PID" 2>/dev/null || true - exit 1 -} -trap cleanup INT TERM - -webots_args=(--mode=fast --batch --minimize) -if [[ "$HEADLESS" == "1" ]]; then - webots_args+=(--no-rendering) -fi - -echo "Auto-dagger collection" -echo " flock sizes : $FLOCKS" -echo " runs per size : $RUNS_PER_FLOCK" -echo " seconds per run : $RUN_SEC" -echo " policy dir : $POLICY_DIR (used only when driver=student)" -echo " driver : $DRIVER" -echo " webots flags : ${webots_args[*]}" -echo - -# Runtime config — re-written before each run anyway, but written once -# here so a manual webots launch at the same time would also pick it up. -cat > "$ROOT/herding_runtime.cfg" </dev/null | wc -l || echo 0) - -run_idx=0 -total_runs=0 -for f in $FLOCKS; do total_runs=$((total_runs + RUNS_PER_FLOCK)); done - -for flock in $FLOCKS; do - for run in $(seq 1 "$RUNS_PER_FLOCK"); do - run_idx=$((run_idx + 1)) - seed=$((1000 * flock + run)) - echo "=== [$run_idx/$total_runs] flock=$flock run=$run seed=$seed ===" - - # Generate randomised world. - cp "$SRC" "$DST" - for i in $(seq $((flock + 1)) 10); do - sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST" - done - # Inline Python: jitter sheep1..flock translations. - python3 - "$DST" "$flock" "$seed" <<'PYEOF' -import re, random, sys -path, n_str, seed = sys.argv[1], sys.argv[2], sys.argv[3] -n = int(n_str); random.seed(int(seed)) -with open(path) as f: - txt = f.read() -def rand_pos(): - while True: - x = random.uniform(-12.0, 12.0) - y = random.uniform(-10.0, 12.0) # avoid the gate strip - if x * x + y * y > 9.0: # at least 3 m from dog spawn - return x, y -for i in range(1, n + 1): - x, y = rand_pos() - pat = re.compile( - r'Sheep \{ translation\s+\S+\s+\S+\s+(\S+)\s+name "sheep' + str(i) + r'"' - ) - txt = pat.sub(rf'Sheep {{ translation {x:.2f} {y:.2f} \g<1> name "sheep{i}"', txt, count=1) -with open(path, "w") as f: - f.write(txt) -PYEOF - - # Run Webots in the background; poll for the .DONE sentinel or - # the wall-clock timeout, whichever comes first. - rm -f "$DONE_FILE" - webots "${webots_args[@]}" "$DST" \ - > /tmp/webots_dagger_run.log 2>&1 & - WEBOTS_PID=$! - - # Give the controller 10 s to start before polling the sentinel, - # otherwise a sheep that spawns already penned triggers an instant - # false-positive kill. - elapsed=0 - grace=10 - while kill -0 "$WEBOTS_PID" 2>/dev/null; do - if (( elapsed >= grace )) && [[ -f "$DONE_FILE" ]]; then - echo " sentinel .DONE detected — killing Webots early" - kill "$WEBOTS_PID" 2>/dev/null - wait "$WEBOTS_PID" 2>/dev/null || true - break - fi - if (( elapsed >= RUN_SEC )); then - echo " timeout ($RUN_SEC s) — killing Webots" - kill "$WEBOTS_PID" 2>/dev/null - wait "$WEBOTS_PID" 2>/dev/null || true - break - fi - sleep 2 - elapsed=$((elapsed + 2)) - done - WEBOTS_PID="" - - # Quick sanity from the log: did the controller actually run? - if grep -q "running in mode=dagger" /tmp/webots_dagger_run.log; then - new_pairs=$(tail -50 /tmp/webots_dagger_run.log | grep -oE 'logged=[0-9]+' | tail -1) - echo " controller ran ($new_pairs)" - else - echo " WARNING: controller may not have started (see /tmp/webots_dagger_run.log)" - fi - done -done - -after_count=$(ls -1 "$ROOT/training/dagger"/dagger_*.npz 2>/dev/null | wc -l || echo 0) -new_files=$((after_count - before_count)) - -echo -echo "Done." -echo " new dagger files : $new_files" -echo " total in dir : $after_count" -echo -echo "Next:" -echo " python -m tools.dagger_merge_train --out training/runs/bc_dagger" diff --git a/tools/collect_demos.py b/tools/collect_demos.py index 52e8ccb..10221c4 100644 --- a/tools/collect_demos.py +++ b/tools/collect_demos.py @@ -26,10 +26,10 @@ if _PROJECT_ROOT not in sys.path: import numpy as np -from herding.active_scan import ActiveScanTeacher -from herding.geometry import PEN_ENTRY -from herding.sequential import compute_action as sequential_action -from herding.strombom import compute_action as strombom_action +from herding.control.active_scan import ActiveScanTeacher +from herding.world.geometry import PEN_ENTRY +from herding.control.sequential import compute_action as sequential_action +from herding.control.strombom import compute_action as strombom_action from training.herding_env import HerdingEnv diff --git a/tools/dagger_merge_train.py b/tools/dagger_merge_train.py deleted file mode 100644 index 00f3f9a..0000000 --- a/tools/dagger_merge_train.py +++ /dev/null @@ -1,135 +0,0 @@ -"""Merge Webots DAgger demos with sim demos and retrain the BC policy. - -The dog controller in ``HERDING_MODE=dagger`` writes per-run files to -``training/dagger/dagger_.npz`` containing ``(obs, actions)`` pairs -where: - -* ``obs`` is the **stacked LiDAR observation** as built by the live - Webots tracker — exactly the input distribution the deployed - controller sees. -* ``actions`` is the **active-scan-teacher action computed from - ground-truth sheep positions** (read off the sheep emitter). - -Combined with the existing sim demos (``training/demos.npz`` by -default), this gives the BC student a training set that includes the -real Webots false-positive distribution — closing the sim-to-real -perception gap that the all-sim pipeline couldn't bridge. - -Usage:: - - # Iteration 1 — merge all dagger files with sim demos, retrain - python -m tools.dagger_merge_train \\ - --sim training/demos.npz \\ - --out training/runs/bc_dagger1 - - # Iteration 2 — drop the sim baseline, train only on Webots data - python -m tools.dagger_merge_train --no-sim --out training/runs/bc_dagger2 - -The new policy is saved as ``/policy.zip`` and is auto-loaded by -the controller's resolution priority on the next Webots run. -""" - -from __future__ import annotations - -import argparse -import glob -import os -import subprocess -import sys -from pathlib import Path - -_HERE = os.path.dirname(os.path.abspath(__file__)) -_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..")) -if _PROJECT_ROOT not in sys.path: - sys.path.insert(0, _PROJECT_ROOT) - -import numpy as np - - -def main() -> None: - parser = argparse.ArgumentParser() - parser.add_argument("--sim", default="training/demos.npz", - help="Sim demo file to mix with the Webots data. " - "Pass --no-sim to train only on dagger data.") - parser.add_argument("--no-sim", action="store_true", - help="Skip the sim demos entirely.") - parser.add_argument("--dagger-glob", default="training/dagger/dagger_*.npz", - help="Glob for Webots-collected dagger files.") - parser.add_argument("--merged-out", default="training/demos_dagger.npz", - help="Where to write the merged demo file.") - parser.add_argument("--out", default="training/runs/bc_dagger", - help="Where to write the BC policy.") - parser.add_argument("--epochs", type=int, default=60) - parser.add_argument("--batch-size", type=int, default=256) - parser.add_argument("--net-arch", default="512,512") - parser.add_argument("--cos-weight", type=float, default=1.0) - args = parser.parse_args() - - # --- Gather Webots files --- - dagger_paths = sorted(glob.glob(args.dagger_glob)) - if not dagger_paths: - raise SystemExit(f"No dagger files found at {args.dagger_glob} — " - "run Webots in HERDING_MODE=dagger first.") - - chunks_obs: list[np.ndarray] = [] - chunks_act: list[np.ndarray] = [] - total_dagger = 0 - for p in dagger_paths: - data = np.load(p) - obs = data["obs"].astype(np.float32) - act = data["actions"].astype(np.float32) - chunks_obs.append(obs) - chunks_act.append(act) - total_dagger += len(obs) - print(f" + {p}: {obs.shape[0]} pairs (obs dim {obs.shape[1]})") - print(f"[merge] total dagger pairs: {total_dagger}") - - obs_dim = chunks_obs[0].shape[1] - if any(c.shape[1] != obs_dim for c in chunks_obs): - raise SystemExit( - "Dagger files have inconsistent obs dims — they were collected " - "with different frame_stack settings. Either rerun with a " - "consistent setting or filter the glob." - ) - - # --- Optionally include sim demos --- - if not args.no_sim: - sim = np.load(args.sim) - sim_obs = sim["obs"].astype(np.float32) - sim_act = sim["actions"].astype(np.float32) - if sim_obs.shape[1] != obs_dim: - raise SystemExit( - f"Sim demos have obs dim {sim_obs.shape[1]} but dagger demos " - f"have {obs_dim}. Recollect sim demos at the same frame_stack." - ) - chunks_obs.append(sim_obs) - chunks_act.append(sim_act) - print(f"[merge] + sim demos: {sim_obs.shape[0]} pairs from {args.sim}") - - obs_all = np.concatenate(chunks_obs, axis=0) - act_all = np.concatenate(chunks_act, axis=0) - # Empty meta — bc_pretrain doesn't actually use it but the file format - # has it. - meta = np.zeros((0, 5), dtype=np.int32) - - Path(args.merged_out).parent.mkdir(parents=True, exist_ok=True) - np.savez(args.merged_out, obs=obs_all, actions=act_all, meta=meta) - print(f"[merge] wrote {len(obs_all)} pairs → {args.merged_out}") - print(f"[merge] obs shape {obs_all.shape}, action shape {act_all.shape}") - - # --- Run BC training --- - cmd = [ - sys.executable, "-m", "training.bc_pretrain", - "--demos", args.merged_out, - "--out", args.out, - "--epochs", str(args.epochs), - "--batch-size", str(args.batch_size), - "--net-arch", args.net_arch, - "--cos-weight", str(args.cos_weight), - ] - print(f"\n[merge] launching: {' '.join(cmd)}") - subprocess.run(cmd, check=True, cwd=_PROJECT_ROOT) - - -if __name__ == "__main__": - main() diff --git a/tools/run_webots.sh b/tools/run_webots.sh index 97c7cfa..8df26f9 100755 --- a/tools/run_webots.sh +++ b/tools/run_webots.sh @@ -7,19 +7,17 @@ # Usage: # tools/run_webots.sh [N] [MODE] # N : number of active sheep (1..10), default 10 -# MODE : "bc" | "rl" | "strombom" | "sequential" | "dagger", default "bc" +# MODE : "bc" | "rl" | "strombom" | "sequential", default "bc" # # Examples: -# tools/run_webots.sh 10 bc # BC-trained policy, 10 sheep +# tools/run_webots.sh 10 bc # behaviour-cloned MLP, 10 sheep # tools/run_webots.sh 10 rl # KL-PPO fine-tune of bc, 10 sheep -# tools/run_webots.sh 5 sequential # the analytic teacher, 5 sheep -# tools/run_webots.sh 3 strombom # canonical baseline, 3 sheep +# tools/run_webots.sh 5 sequential # single-target analytic baseline +# tools/run_webots.sh 3 strombom # canonical Strömbom analytic # # Notes: -# * The RL mode loads the latest BC policy by default — priority -# the BC policy (bc/policy.zip) (the controller resolves it). -# (LiDAR-perception, frame-stack K=4). Override via -# HERDING_POLICY_DIR=/path/to/run env var. +# * bc loads training/runs/bc/policy.zip, rl loads training/runs/rl. +# Override via HERDING_POLICY_DIR=/path/to/run env var. # * Conda env "tir" must be active (provides stable-baselines3 + torch). set -e @@ -30,10 +28,9 @@ if (( N < 1 || N > 10 )); then echo "N must be 1..10, got $N" >&2; exit 1 fi case "$MODE" in - bc|rl|strombom|sequential|dagger) ;; - *) echo "MODE must be bc|rl|strombom|sequential|dagger, got '$MODE'" >&2; exit 1 ;; + bc|rl|strombom|sequential) ;; + *) echo "MODE must be bc|rl|strombom|sequential, got '$MODE'" >&2; exit 1 ;; esac -DAGGER_DRIVER=${HERDING_DAGGER_DRIVER:-teacher} ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" SRC="$ROOT/worlds/field.wbt" @@ -59,7 +56,6 @@ RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}" cat > "$ROOT/herding_runtime.cfg" <