diff --git a/.gitignore b/.gitignore index 923d7e2..8851b64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # Python __pycache__/ +*.pyc +.pytest_cache/ # Training artefacts: ignore all run outputs except deployable policies training/runs/** @@ -8,8 +10,22 @@ training/runs/** !training/runs/*/ !training/runs/*/policy.zip -# Webots launcher scratch +# BC demo blobs — these get regenerated by `python -m training.bc.collect` +# and are too large to track. Keep them out of git. +training/bc/*.npz +training/bc/v1/ + +# Webots launcher scratch (the _test.wbt files are emitted on every run) worlds/** !worlds/field.wbt !worlds/field_round.wbt herding_runtime.cfg + +# Runtime logs — all of these are produced by training/eval/webots runs +# and are not useful to track in git. Keep summary docs/markdown only. +*.log +calibrate_mecanum.log +training/.run_done + +# Tooling +.claude/ diff --git a/README.md b/README.md index 2860859..c307841 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,18 @@ Group G25 — *Diogo Costa, Johnny Fernandes, Nelson Neto* -A differential-drive shepherd dog that herds 1–10 sheep through a 3 m -gate into an external pen. The dog has three deployable modes: +A shepherd dog that herds 1–10 sheep through a 3 m gate into an +external pen. Two worlds (`field` rectangular, `field_round` circular), +two drives (`differential`, `mecanum`), and four deployable control +modes: | Mode | Source | Role | |---|---|---| | `strombom` | Strömbom et al. (2014) collect/drive heuristic | Analytic baseline | -| `bc` | Behaviour cloning of the Strömbom teacher | Imitation learning result | +| `sequential` | Single-target "pin-and-push" | Alternative analytic baseline | +| `bc` | Behaviour cloning of the universal teacher | Imitation learning result | | `rl` | KL-regularised PPO fine-tune of `bc` | Reward-driven refinement | -`sequential` (single-target pin-and-push) is kept as an alternative -analytic baseline. - ## Perception The dog perceives sheep **only through its front-mounted 140° LiDAR** @@ -52,27 +52,39 @@ Privileged ground-truth perception is available for ablation — # 1. Set up the Python env (any venv with PyTorch + SB3) pip install -r training/requirements.txt -# 2. Smoke test (70 pytest cases, < 1 s) +# 2. Smoke test (126 pytest cases, < 1 s) make test -# 3. Reproduce the full pipeline (~30–60 min CPU) -make # demos -> bc -> rl -> eval +# 3. Reproduce a full pipeline (DRIVE+WORLD specific, ~1 h CPU) +make DRIVE=differential WORLD=field # demos -> bc -> rl -> eval +make DRIVE=differential WORLD=field_round +make DRIVE=mecanum WORLD=field # see note below +make train_all # all 4 combos sequentially # Individual stages (each rebuilds upstream artefacts if missing): -make bc_demos # sim demos -make bc # behaviour clone -make rl # KL-PPO fine-tune -make eval # 10-seed env eval of rl +make DRIVE=differential WORLD=field bc_demos # sim demos +make DRIVE=differential WORLD=field bc # behaviour clone +make DRIVE=differential WORLD=field rl # KL-PPO fine-tune +make DRIVE=differential WORLD=field eval # 10-seed env eval # 4. Run in Webots -make webots N=10 MODE=bc # behaviour-cloned MLP -make webots N=10 MODE=rl # KL-PPO fine-tune -make webots N=10 MODE=strombom # analytic baseline -# (or invoke directly: tools/run_webots.sh 10 rl) +tools/run_webots.sh 10 bc differential field # BC, diff, rect field +tools/run_webots.sh 10 rl differential field_round # RL, diff, round field +tools/run_webots.sh 5 strombom differential field # analytic baseline +HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field + # GT bypass for ablation ``` -`make help` lists every target and the overridable hyperparameters -(e.g. `make rl PPO_STEPS=2000000 KL=0.02`). +`make help` lists every target and the overridable hyperparameters. + +**Mecanum note**: the `ShepherdDogMecanum.proto` uses physical roller +hinges in Webots (committed 2026-05-16). The Webots calibration shows +a ~60% strafe efficiency and ~28% backward bleed compared to textbook +mecanum; the gym kinematics in `HERDING_MEC_WEBOTS` are tuned to +match. **Mecanum BC/RL policies need to be retrained against this +preset** — see `mecanum_proto_gap.md` in `memory/` for the 3-command +flow. The v1 policies in `training/runs/{bc,rl}_mecanum_*` predate the +proto rewrite and will not herd reliably in Webots until retrained. ## Documentation map @@ -87,56 +99,67 @@ make webots N=10 MODE=strombom # analytic baseline ``` herding/ — perception / control / world primitives - world/ — environment-side physics & geometry - geometry.py field/pen constants, robot specs - diffdrive.py differential-drive kinematics + config.py — frozen dataclasses for all tunable parameters; + named presets HERDING_DEFAULT / HERDING_WEBOTS / + HERDING_MEC_WEBOTS + world/ + geometry.py field/pen constants, world-shape switch + diffdrive.py differential + mecanum kinematics flocking_sim.py Reynolds + Strömbom 2014 sheep dynamics - perception/ — LiDAR → tracked-sheep pipeline - lidar_sim.py fast 2D raycast for the env + perception/ + lidar_sim.py fast 2D raycast for the gym env lidar_perception.py scan → world-frame cluster centroids + filters sheep_tracker.py multi-target NN tracker with FOV memory + and the consensus-promotion stage obs.py 32-D order-invariant observation builder - control/ — every dog mode's action source + control/ strombom.py canonical CoM collect/drive heuristic + (round-world aware) sequential.py single-target "pin-and-push" alternative - active_scan.py wraps a base teacher with opening rotation + - walk-to-centre fallback + universal.py teacher used for BC demo collection + (Strömbom + mecanum omega + straggler recovery) + active_scan.py rotate-on-empty + walk-to-centre fallback modulation.py shared near-sheep speed-modulation helper controllers/ - sheep/sheep.py — Webots sheep controller (uses herding.world.flocking_sim) + sheep/sheep.py — Webots sheep controller shepherd_dog/ shepherd_dog.py — Webots dog controller, mode-switched - policy_loader.py — lazy SB3 policy loader (auto-detects frame stack) + policy_loader.py — SB3 PPO / RecurrentPPO loader with frame stack training/ herding_env.py — Gymnasium env (LiDAR + tracker by default) bc/collect.py — sim demos via the active-scan teacher - bc/pretrain.py — supervised BC of (obs, action) demos into MLP + bc/pretrain.py — supervised BC into MLP rl/train.py — KL-regularised PPO fine-tune of BC + rl/train_lstm.py — RecurrentPPO variant (ablation) eval.py — analytic + learned policy comparison harness - bc/demos.npz — collected demonstrations (gitignored) runs/ — checkpoints (whitelisted in .gitignore) requirements.txt -tests/ - conftest.py — pytest setup (adds project root to sys.path) - test_geometry.py — geometric predicates + constants - test_diffdrive.py — kinematics and (vx, vy) → wheel-speed map - test_obs.py — observation builder (shape, normalisation, order) - test_control.py — speed modulation + analytic teachers + active scan - test_perception.py — LiDAR sim + clustering + tracker - test_env.py — Gymnasium contract + determinism + reward +tests/ — 126 pytest cases, < 1 s on CPU tools/ - run_webots.sh — launch Webots with N sheep + chosen mode + run_webots.sh — launch Webots with N sheep + chosen mode + world + webots_sweep.sh — headless sweep across modes × drives × worlds + webots_sweep_gt.sh — same with HERDING_USE_GT=1 (perfect perception) + calibrate_mecanum.sh — measure mecanum body velocity vs gym prediction + gen_mecanum_wheels.py — regenerate the 32 mecanum roller hinges + benchmark_lidar.py — tracker quality benchmark -Makefile — pipeline orchestrator (make / make rl / make test / …) +Makefile — pipeline orchestrator + (make DRIVE=… WORLD=… rl, make train_all, …) worlds/ - field.wbt — main world (3 m gate, external pen) + field.wbt — rectangular world (3 m gate, external pen) + field_round.wbt — circular world (radius 15 m, same pen) + +protos/ + Sheep.proto — sheep robot + ShepherdDog.proto — diff-drive dog, 140° LiDAR + ShepherdDog360.proto — diff-drive dog, 360° LiDAR (ablation) + ShepherdDogMecanum.proto — 4-wheel mecanum with physical roller hinges -protos/ — Sheep / ShepherdDog robot definitions docs/project.md — original course proposal/goals ``` @@ -151,48 +174,57 @@ scattering the flock. Direction (intent) is preserved. All modes also share the same EMA action smoother in `controllers/shepherd_dog/shepherd_dog.py:ACTION_SMOOTH = 0.55`. -## Results — env eval, 10 seeds × n=1..10 +## Results — Webots end-to-end, canonical 140° LiDAR -`max_steps=15000`, full-field spawn distribution. Success rate per -flock size, then mean steps over successful seeds. +Each cell = "OK at step X" means the dog penned all N sheep in a single +trial, `HERDING_USE_GT=0` (LiDAR perception, no ground truth bypass), +default consensus tracker. -### Success rate (%) +### Differential drive -| n | Strömbom | `bc` | `rl` | -|---:|---:|---:|---:| -| 1 | 30 | 80 | **90** | -| 2 | 90 | 50 | **90** | -| 3 | 60 | 90 | **90** | -| 4 | 40 | 80 | **90** | -| 5 | 60 | 70 | **100** | -| 6 | 30 | 80 | 80 | -| 7 | 70 | 80 | **100** | -| 8 | 30 | 100 | **100** | -| 9 | 40 | 90 | **100** | -| 10 | 50 | 100 | **100** | +| Mode | World | n=5 | n=10 | +|---|---|---:|---:| +| Strömbom | field | 7528 | 11620 | +| Strömbom | field_round | 8611 | 10339 | +| Sequential | field | 7135 | 16843 | +| Sequential | field_round | 6019 | 8494 | +| BC | field | 11698 | 15079 | +| BC | field_round | 7234 | 11320 | +| RL | field | 10039 | 13954 | +| RL | field_round | 5803 | 9151 | -### Mean penned per episode (out of n) +RL is **strictly faster than BC** on every comparable cell. -| n | Strömbom | `bc` | `rl` | -|---:|---:|---:|---:| -| 1 | 0.30 | 0.80 | **0.90** | -| 5 | 3.90 | 4.10 | **5.00** | -| 8 | 4.20 | 8.00 | **8.00** | -| 10 | 7.40 | 10.00 | **10.00** | +### LiDAR vs GT bypass (diff drive) -### Takeaways +GT bypass replaces the LiDAR tracker with perfect emitter positions. +LiDAR is the default; GT is a perception ablation +(`HERDING_USE_GT=1`): -- **BC clearly beats Strömbom** under realistic LiDAR conditions (full - field, partial observability). Strömbom struggles on small flocks - where a single sheep can spawn beyond the LiDAR's 12 m range; BC - learned active perception from the demos. -- **RL refines BC** without regressing on any cell. Ties or beats BC - at every flock size; biggest gains at n=1 and n=4 where BC's - imitation of Strömbom's drive heuristic was sub-optimal. -- **Aggressive reward shaping doesn't help** — a more aggressive - variant (β=0.02, W_TIME=-0.1, W_IMITATE=0, 3 M steps) trained as - an ablation was strictly worse than the conservative tune shipped - here (β=0.05, W_IMITATE=0.5, 1 M steps). +| Mode | World | n=5 LiDAR | n=5 GT | n=10 LiDAR | n=10 GT | +|---|---|---:|---:|---:|---:| +| Strömbom | field | 7528 | **5254** | 11620 | **7342** | +| Strömbom | field_round | 8611 | **3631** | 10339 | **7084** | +| Sequential | field | **7135** | 11092 | 16843 | **8698** | +| Sequential | field_round | 6019 | **3454** | 8494 | **7324** | + +GT is generally faster (perfect perception → fewer wasted steps). +Sequential n=5 / field is the one cell where GT is *slower* — its +straggler heuristic appears to over-correct when the dog has full +information. + +### Mecanum (differential is the headline) + +The `ShepherdDogMecanum.proto` was rewritten on 2026-05-16 with 32 +physical roller hinges, giving true omnidirectional motion in Webots +(`tools/calibrate_mecanum.sh` confirms the X-pattern). The mecanum +calibration shows ~60% strafe efficiency vs textbook (vs ~89% on +forward), so v1 mecanum BC/RL policies trained on textbook gym +mecanum no longer herd reliably. The fix is staged but not run: +the gym now has `HERDING_MEC_WEBOTS` which matches Webots' physical +mecanum, and `training/bc/collect.py` / `training/rl/train.py` auto- +select this preset for mecanum runs. Retraining (≈ 2 h per combo, +4 combos) is the documented future step. ## License diff --git a/herding/control/sequential.py b/herding/control/sequential.py index 6347156..39a3614 100644 --- a/herding/control/sequential.py +++ b/herding/control/sequential.py @@ -80,48 +80,3 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY): ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1]) return ax, ay, mode - - -def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY): - """``compute_action`` plus a debug dict.""" - active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)] - if not active: - return 0.0, 0.0, "idle", { - "n_active": 0, "phase": "idle", "radius": 0.0, "threshold": 0.0, - "com_x": 0.0, "com_y": 0.0, - "target_x": dog_xy[0], "target_y": dog_xy[1], - } - - n = len(active) - com_x = sum(p[0] for p in active) / n - com_y = sum(p[1] for p in active) / n - dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active] - radius = max(dists) - threshold = F_FACTOR * math.sqrt(n) - - if n <= STRAGGLER_THRESHOLD: - sx, sy = min(active, - key=lambda p: math.hypot(p[0] - pen_target[0], - p[1] - pen_target[1])) - ux, uy = _unit(sx - pen_target[0], sy - pen_target[1]) - tx, ty = sx + DELTA_TARGET * ux, sy + DELTA_TARGET * uy - mode = "targeted" - - elif radius > threshold: - idx = max(range(n), key=lambda i: dists[i]) - sx, sy = active[idx] - ux, uy = _unit(sx - com_x, sy - com_y) - tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy - mode = "collect" - - else: - ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1]) - tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy - mode = "drive" - - ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1]) - return ax, ay, mode, { - "n_active": n, "phase": mode, "radius": radius, "threshold": threshold, - "com_x": com_x, "com_y": com_y, - "target_x": tx, "target_y": ty, - } diff --git a/herding/control/strombom.py b/herding/control/strombom.py index 5ba46d1..fa77be4 100644 --- a/herding/control/strombom.py +++ b/herding/control/strombom.py @@ -76,40 +76,3 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY): ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1]) return ax, ay, mode - - -def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY): - """``compute_action`` plus a small debug dict (CoM, target, radius).""" - active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)] - if not active: - return 0.0, 0.0, "idle", { - "n_active": 0, "radius": 0.0, "threshold": 0.0, - "com_x": 0.0, "com_y": 0.0, - "target_x": dog_xy[0], "target_y": dog_xy[1], - } - - n = len(active) - com_x = sum(p[0] for p in active) / n - com_y = sum(p[1] for p in active) / n - dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active] - radius = max(dists) - threshold = F_FACTOR * math.sqrt(n) - - if radius > threshold: - idx = max(range(n), key=lambda i: dists[i]) - sx, sy = active[idx] - ux, uy = _unit(sx - com_x, sy - com_y) - tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy - mode = "collect" - else: - ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1]) - tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy - mode = "drive" - - ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1]) - dbg = { - "n_active": n, "radius": radius, "threshold": threshold, - "com_x": com_x, "com_y": com_y, - "target_x": tx, "target_y": ty, - } - return ax, ay, mode, dbg diff --git a/herding/control/universal.py b/herding/control/universal.py index 8edf22a..564beb1 100644 --- a/herding/control/universal.py +++ b/herding/control/universal.py @@ -207,17 +207,3 @@ def compute_action(dog_xy, dog_heading, sheep_positions, omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi)) return ax, ay, omega, mode - - -def compute_action_diff(dog_xy, dog_heading, sheep_positions, - pen_target=PEN_ENTRY): - """Compatibility wrapper returning ``(vx, vy, mode)`` — same as Strömbom. - - Use this when plugging into existing differential-drive code that - doesn't expect omega. - """ - vx, vy, _omega, mode = compute_action( - dog_xy, dog_heading, sheep_positions, pen_target, - drive_mode="differential", - ) - return vx, vy, mode diff --git a/stage1_train.log b/stage1_train.log deleted file mode 100644 index 210fc8d..0000000 --- a/stage1_train.log +++ /dev/null @@ -1,7 +0,0 @@ -make[1]: Entering directory '/run/host/home/johnnyf/Documents/Projects/TIR/project' -make DRIVE=differential WORLD=field -make[2]: Entering directory '/run/host/home/johnnyf/Documents/Projects/TIR/project' -python -m training.eval --policy training/runs/rl_differential_field \ - --max-flock 10 --max-steps 15000 --n-seeds 10 \ - --drive-mode differential --world field -make[2]: Leaving directory '/run/host/home/johnnyf/Documents/Projects/TIR/project' diff --git a/training/bc/demos.npz b/training/bc/demos.npz deleted file mode 100644 index b7738c1..0000000 Binary files a/training/bc/demos.npz and /dev/null differ diff --git a/training/runs/bc_dagger1_differential_field/policy.zip b/training/runs/bc_dagger1_differential_field/policy.zip deleted file mode 100644 index e04d8a3..0000000 Binary files a/training/runs/bc_dagger1_differential_field/policy.zip and /dev/null differ diff --git a/training/runs/bc_dagger2_differential_field/policy.zip b/training/runs/bc_dagger2_differential_field/policy.zip deleted file mode 100644 index 3ae78fa..0000000 Binary files a/training/runs/bc_dagger2_differential_field/policy.zip and /dev/null differ