diff --git a/.gitignore b/.gitignore
index 923d7e2..8851b64 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 # Python
 __pycache__/
+*.pyc
+.pytest_cache/
 
 # Training artefacts: ignore all run outputs except deployable policies
 training/runs/**
@@ -8,8 +10,22 @@ training/runs/**
 !training/runs/*/
 !training/runs/*/policy.zip
 
-# Webots launcher scratch
+# BC demo blobs — these get regenerated by `python -m training.bc.collect`
+# and are too large to track. Keep them out of git.
+training/bc/*.npz
+training/bc/v1/
+
+# Webots launcher scratch (the _test.wbt files are emitted on every run)
 worlds/**
 !worlds/field.wbt
 !worlds/field_round.wbt
 herding_runtime.cfg
+
+# Runtime logs — all of these are produced by training/eval/webots runs
+# and are not useful to track in git. Keep summary docs/markdown only.
+*.log
+calibrate_mecanum.log
+training/.run_done
+
+# Tooling
+.claude/
diff --git a/README.md b/README.md
index 2860859..c307841 100644
--- a/README.md
+++ b/README.md
@@ -2,18 +2,18 @@
 
 Group G25 — *Diogo Costa, Johnny Fernandes, Nelson Neto*
 
-A differential-drive shepherd dog that herds 1–10 sheep through a 3 m
-gate into an external pen. The dog has three deployable modes:
+A shepherd dog that herds 1–10 sheep through a 3 m gate into an
+external pen. Two worlds (`field` rectangular, `field_round` circular),
+two drives (`differential`, `mecanum`), and four deployable control
+modes:
 
 | Mode | Source | Role |
 |---|---|---|
 | `strombom` | Strömbom et al. (2014) collect/drive heuristic | Analytic baseline |
-| `bc` | Behaviour cloning of the Strömbom teacher | Imitation learning result |
+| `sequential` | Single-target "pin-and-push" | Alternative analytic baseline |
+| `bc` | Behaviour cloning of the universal teacher | Imitation learning result |
 | `rl` | KL-regularised PPO fine-tune of `bc` | Reward-driven refinement |
 
-`sequential` (single-target pin-and-push) is kept as an alternative
-analytic baseline.
-
 ## Perception
 
 The dog perceives sheep **only through its front-mounted 140° LiDAR**
@@ -52,27 +52,39 @@ Privileged ground-truth perception is available for ablation —
 # 1. Set up the Python env (any venv with PyTorch + SB3)
 pip install -r training/requirements.txt
 
-# 2. Smoke test (70 pytest cases, < 1 s)
+# 2. Smoke test (126 pytest cases, < 1 s)
 make test
 
-# 3. Reproduce the full pipeline (~30–60 min CPU)
-make            # demos -> bc -> rl -> eval
+# 3. Reproduce a full pipeline (DRIVE+WORLD specific, ~1 h CPU)
+make DRIVE=differential WORLD=field       # demos -> bc -> rl -> eval
+make DRIVE=differential WORLD=field_round
+make DRIVE=mecanum     WORLD=field        # see note below
+make train_all                            # all 4 combos sequentially
 
 # Individual stages (each rebuilds upstream artefacts if missing):
-make bc_demos   # sim demos
-make bc         # behaviour clone
-make rl         # KL-PPO fine-tune
-make eval       # 10-seed env eval of rl
+make DRIVE=differential WORLD=field bc_demos   # sim demos
+make DRIVE=differential WORLD=field bc         # behaviour clone
+make DRIVE=differential WORLD=field rl         # KL-PPO fine-tune
+make DRIVE=differential WORLD=field eval       # 10-seed env eval
 
 # 4. Run in Webots
-make webots N=10 MODE=bc          # behaviour-cloned MLP
-make webots N=10 MODE=rl          # KL-PPO fine-tune
-make webots N=10 MODE=strombom    # analytic baseline
-# (or invoke directly: tools/run_webots.sh 10 rl)
+tools/run_webots.sh 10 bc differential field        # BC, diff, rect field
+tools/run_webots.sh 10 rl differential field_round  # RL, diff, round field
+tools/run_webots.sh 5 strombom differential field   # analytic baseline
+HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
+                                                    # GT bypass for ablation
 ```
 
-`make help` lists every target and the overridable hyperparameters
-(e.g. `make rl PPO_STEPS=2000000 KL=0.02`).
+`make help` lists every target and the overridable hyperparameters.
+
+**Mecanum note**: the `ShepherdDogMecanum.proto` uses physical roller
+hinges in Webots (committed 2026-05-16). The Webots calibration shows
+a ~60% strafe efficiency and ~28% backward bleed compared to textbook
+mecanum; the gym kinematics in `HERDING_MEC_WEBOTS` are tuned to
+match. **Mecanum BC/RL policies need to be retrained against this
+preset** — see `mecanum_proto_gap.md` in `memory/` for the 3-command
+flow. The v1 policies in `training/runs/{bc,rl}_mecanum_*` predate the
+proto rewrite and will not herd reliably in Webots until retrained.
 
 ## Documentation map
 
@@ -87,56 +99,67 @@ make webots N=10 MODE=strombom    # analytic baseline
 
 ```
 herding/                  — perception / control / world primitives
-  world/                  — environment-side physics & geometry
-    geometry.py             field/pen constants, robot specs
-    diffdrive.py            differential-drive kinematics
+  config.py               — frozen dataclasses for all tunable parameters;
+                            named presets HERDING_DEFAULT / HERDING_WEBOTS /
+                            HERDING_MEC_WEBOTS
+  world/
+    geometry.py             field/pen constants, world-shape switch
+    diffdrive.py            differential + mecanum kinematics
     flocking_sim.py         Reynolds + Strömbom 2014 sheep dynamics
-  perception/             — LiDAR → tracked-sheep pipeline
-    lidar_sim.py            fast 2D raycast for the env
+  perception/
+    lidar_sim.py            fast 2D raycast for the gym env
     lidar_perception.py     scan → world-frame cluster centroids + filters
     sheep_tracker.py        multi-target NN tracker with FOV memory
+                            and the consensus-promotion stage
     obs.py                  32-D order-invariant observation builder
-  control/                — every dog mode's action source
+  control/
     strombom.py             canonical CoM collect/drive heuristic
+                            (round-world aware)
     sequential.py           single-target "pin-and-push" alternative
-    active_scan.py          wraps a base teacher with opening rotation +
-                            walk-to-centre fallback
+    universal.py            teacher used for BC demo collection
+                            (Strömbom + mecanum omega + straggler recovery)
+    active_scan.py          rotate-on-empty + walk-to-centre fallback
     modulation.py           shared near-sheep speed-modulation helper
 
 controllers/
-  sheep/sheep.py          — Webots sheep controller (uses herding.world.flocking_sim)
+  sheep/sheep.py          — Webots sheep controller
   shepherd_dog/
     shepherd_dog.py       — Webots dog controller, mode-switched
-    policy_loader.py      — lazy SB3 policy loader (auto-detects frame stack)
+    policy_loader.py      — SB3 PPO / RecurrentPPO loader with frame stack
 
 training/
   herding_env.py          — Gymnasium env (LiDAR + tracker by default)
   bc/collect.py           — sim demos via the active-scan teacher
-  bc/pretrain.py          — supervised BC of (obs, action) demos into MLP
+  bc/pretrain.py          — supervised BC into MLP
   rl/train.py             — KL-regularised PPO fine-tune of BC
+  rl/train_lstm.py        — RecurrentPPO variant (ablation)
   eval.py                 — analytic + learned policy comparison harness
-  bc/demos.npz            — collected demonstrations (gitignored)
   runs/                   — checkpoints (whitelisted in .gitignore)
   requirements.txt
 
-tests/
-  conftest.py             — pytest setup (adds project root to sys.path)
-  test_geometry.py        — geometric predicates + constants
-  test_diffdrive.py       — kinematics and (vx, vy) → wheel-speed map
-  test_obs.py             — observation builder (shape, normalisation, order)
-  test_control.py         — speed modulation + analytic teachers + active scan
-  test_perception.py      — LiDAR sim + clustering + tracker
-  test_env.py             — Gymnasium contract + determinism + reward
+tests/                    — 126 pytest cases, < 1 s on CPU
 
 tools/
-  run_webots.sh           — launch Webots with N sheep + chosen mode
+  run_webots.sh           — launch Webots with N sheep + chosen mode + world
+  webots_sweep.sh         — headless sweep across modes × drives × worlds
+  webots_sweep_gt.sh      — same with HERDING_USE_GT=1 (perfect perception)
+  calibrate_mecanum.sh    — measure mecanum body velocity vs gym prediction
+  gen_mecanum_wheels.py   — regenerate the 32 mecanum roller hinges
+  benchmark_lidar.py      — tracker quality benchmark
 
-Makefile                  — pipeline orchestrator (make / make rl / make test / …)
+Makefile                  — pipeline orchestrator
+                            (make DRIVE=… WORLD=… rl, make train_all, …)
 
 worlds/
-  field.wbt               — main world (3 m gate, external pen)
+  field.wbt               — rectangular world (3 m gate, external pen)
+  field_round.wbt         — circular world (radius 15 m, same pen)
+
+protos/
+  Sheep.proto             — sheep robot
+  ShepherdDog.proto       — diff-drive dog, 140° LiDAR
+  ShepherdDog360.proto    — diff-drive dog, 360° LiDAR (ablation)
+  ShepherdDogMecanum.proto — 4-wheel mecanum with physical roller hinges
 
-protos/                   — Sheep / ShepherdDog robot definitions
 docs/project.md           — original course proposal/goals
 ```
 
@@ -151,48 +174,57 @@ scattering the flock. Direction (intent) is preserved.
 All modes also share the same EMA action smoother in
 `controllers/shepherd_dog/shepherd_dog.py:ACTION_SMOOTH = 0.55`.
 
-## Results — env eval, 10 seeds × n=1..10
+## Results — Webots end-to-end, canonical 140° LiDAR
 
-`max_steps=15000`, full-field spawn distribution. Success rate per
-flock size, then mean steps over successful seeds.
+Each cell = "OK at step X" means the dog penned all N sheep in a single
+trial, `HERDING_USE_GT=0` (LiDAR perception, no ground truth bypass),
+default consensus tracker.
 
-### Success rate (%)
+### Differential drive
 
-| n  | Strömbom | `bc` | `rl` |
-|---:|---:|---:|---:|
-|  1 |  30 |  80 | **90** |
-|  2 |  90 |  50 | **90** |
-|  3 |  60 |  90 | **90** |
-|  4 |  40 |  80 | **90** |
-|  5 |  60 |  70 | **100** |
-|  6 |  30 |  80 | 80 |
-|  7 |  70 |  80 | **100** |
-|  8 |  30 | 100 | **100** |
-|  9 |  40 |  90 | **100** |
-| 10 |  50 | 100 | **100** |
+| Mode | World | n=5 | n=10 |
+|---|---|---:|---:|
+| Strömbom    | field         | 7528  | 11620 |
+| Strömbom    | field_round   | 8611  | 10339 |
+| Sequential  | field         | 7135  | 16843 |
+| Sequential  | field_round   | 6019  | 8494 |
+| BC          | field         | 11698 | 15079 |
+| BC          | field_round   | 7234  | 11320 |
+| RL          | field         | 10039 | 13954 |
+| RL          | field_round   | 5803  | 9151 |
 
-### Mean penned per episode (out of n)
+RL is **strictly faster than BC** on every comparable cell.
 
-| n  | Strömbom | `bc` | `rl` |
-|---:|---:|---:|---:|
-|  1 | 0.30 | 0.80 | **0.90** |
-|  5 | 3.90 | 4.10 | **5.00** |
-|  8 | 4.20 | 8.00 | **8.00** |
-| 10 | 7.40 | 10.00 | **10.00** |
+### LiDAR vs GT bypass (diff drive)
 
-### Takeaways
+GT bypass replaces the LiDAR tracker with perfect emitter positions.
+LiDAR is the default; GT is a perception ablation
+(`HERDING_USE_GT=1`):
 
-- **BC clearly beats Strömbom** under realistic LiDAR conditions (full
-  field, partial observability). Strömbom struggles on small flocks
-  where a single sheep can spawn beyond the LiDAR's 12 m range; BC
-  learned active perception from the demos.
-- **RL refines BC** without regressing on any cell. Ties or beats BC
-  at every flock size; biggest gains at n=1 and n=4 where BC's
-  imitation of Strömbom's drive heuristic was sub-optimal.
-- **Aggressive reward shaping doesn't help** — a more aggressive
-  variant (β=0.02, W_TIME=-0.1, W_IMITATE=0, 3 M steps) trained as
-  an ablation was strictly worse than the conservative tune shipped
-  here (β=0.05, W_IMITATE=0.5, 1 M steps).
+| Mode | World | n=5 LiDAR | n=5 GT | n=10 LiDAR | n=10 GT |
+|---|---|---:|---:|---:|---:|
+| Strömbom   | field        | 7528  | **5254** | 11620 | **7342** |
+| Strömbom   | field_round  | 8611  | **3631** | 10339 | **7084** |
+| Sequential | field        | **7135** | 11092 | 16843 | **8698** |
+| Sequential | field_round  | 6019  | **3454** | 8494  | **7324** |
+
+GT is generally faster (perfect perception → fewer wasted steps).
+Sequential n=5 / field is the one cell where GT is *slower* — its
+straggler heuristic appears to over-correct when the dog has full
+information.
+
+### Mecanum (differential is the headline)
+
+The `ShepherdDogMecanum.proto` was rewritten on 2026-05-16 with 32
+physical roller hinges, giving true omnidirectional motion in Webots
+(`tools/calibrate_mecanum.sh` confirms the X-pattern). The mecanum
+calibration shows ~60% strafe efficiency vs textbook (vs ~89% on
+forward), so v1 mecanum BC/RL policies trained on textbook gym
+mecanum no longer herd reliably. The fix is staged but not run:
+the gym now has `HERDING_MEC_WEBOTS` which matches Webots' physical
+mecanum, and `training/bc/collect.py` / `training/rl/train.py` auto-
+select this preset for mecanum runs. Retraining (≈ 2 h per combo,
+4 combos) is the documented future step.
 
 ## License
 
diff --git a/herding/control/sequential.py b/herding/control/sequential.py
index 6347156..39a3614 100644
--- a/herding/control/sequential.py
+++ b/herding/control/sequential.py
@@ -80,48 +80,3 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
 
     ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
     return ax, ay, mode
-
-
-def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """``compute_action`` plus a debug dict."""
-    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
-    if not active:
-        return 0.0, 0.0, "idle", {
-            "n_active": 0, "phase": "idle", "radius": 0.0, "threshold": 0.0,
-            "com_x": 0.0, "com_y": 0.0,
-            "target_x": dog_xy[0], "target_y": dog_xy[1],
-        }
-
-    n = len(active)
-    com_x = sum(p[0] for p in active) / n
-    com_y = sum(p[1] for p in active) / n
-    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
-    radius = max(dists)
-    threshold = F_FACTOR * math.sqrt(n)
-
-    if n <= STRAGGLER_THRESHOLD:
-        sx, sy = min(active,
-                     key=lambda p: math.hypot(p[0] - pen_target[0],
-                                              p[1] - pen_target[1]))
-        ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
-        tx, ty = sx + DELTA_TARGET * ux, sy + DELTA_TARGET * uy
-        mode = "targeted"
-
-    elif radius > threshold:
-        idx = max(range(n), key=lambda i: dists[i])
-        sx, sy = active[idx]
-        ux, uy = _unit(sx - com_x, sy - com_y)
-        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
-        mode = "collect"
-
-    else:
-        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
-        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
-        mode = "drive"
-
-    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
-    return ax, ay, mode, {
-        "n_active": n, "phase": mode, "radius": radius, "threshold": threshold,
-        "com_x": com_x, "com_y": com_y,
-        "target_x": tx, "target_y": ty,
-    }
diff --git a/herding/control/strombom.py b/herding/control/strombom.py
index 5ba46d1..fa77be4 100644
--- a/herding/control/strombom.py
+++ b/herding/control/strombom.py
@@ -76,40 +76,3 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
 
     ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
     return ax, ay, mode
-
-
-def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """``compute_action`` plus a small debug dict (CoM, target, radius)."""
-    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
-    if not active:
-        return 0.0, 0.0, "idle", {
-            "n_active": 0, "radius": 0.0, "threshold": 0.0,
-            "com_x": 0.0, "com_y": 0.0,
-            "target_x": dog_xy[0], "target_y": dog_xy[1],
-        }
-
-    n = len(active)
-    com_x = sum(p[0] for p in active) / n
-    com_y = sum(p[1] for p in active) / n
-    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
-    radius = max(dists)
-    threshold = F_FACTOR * math.sqrt(n)
-
-    if radius > threshold:
-        idx = max(range(n), key=lambda i: dists[i])
-        sx, sy = active[idx]
-        ux, uy = _unit(sx - com_x, sy - com_y)
-        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
-        mode = "collect"
-    else:
-        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
-        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
-        mode = "drive"
-
-    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
-    dbg = {
-        "n_active": n, "radius": radius, "threshold": threshold,
-        "com_x": com_x, "com_y": com_y,
-        "target_x": tx, "target_y": ty,
-    }
-    return ax, ay, mode, dbg
diff --git a/herding/control/universal.py b/herding/control/universal.py
index 8edf22a..564beb1 100644
--- a/herding/control/universal.py
+++ b/herding/control/universal.py
@@ -207,17 +207,3 @@ def compute_action(dog_xy, dog_heading, sheep_positions,
         omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
 
     return ax, ay, omega, mode
-
-
-def compute_action_diff(dog_xy, dog_heading, sheep_positions,
-                        pen_target=PEN_ENTRY):
-    """Compatibility wrapper returning ``(vx, vy, mode)`` — same as Strömbom.
-
-    Use this when plugging into existing differential-drive code that
-    doesn't expect omega.
-    """
-    vx, vy, _omega, mode = compute_action(
-        dog_xy, dog_heading, sheep_positions, pen_target,
-        drive_mode="differential",
-    )
-    return vx, vy, mode
diff --git a/stage1_train.log b/stage1_train.log
deleted file mode 100644
index 210fc8d..0000000
--- a/stage1_train.log
+++ /dev/null
@@ -1,7 +0,0 @@
-make[1]: Entering directory '/run/host/home/johnnyf/Documents/Projects/TIR/project'
-make DRIVE=differential WORLD=field
-make[2]: Entering directory '/run/host/home/johnnyf/Documents/Projects/TIR/project'
-python -m training.eval --policy training/runs/rl_differential_field \
-	--max-flock 10 --max-steps 15000 --n-seeds 10 \
-	--drive-mode differential --world field
-make[2]: Leaving directory '/run/host/home/johnnyf/Documents/Projects/TIR/project'
diff --git a/training/bc/demos.npz b/training/bc/demos.npz
deleted file mode 100644
index b7738c1..0000000
Binary files a/training/bc/demos.npz and /dev/null differ
diff --git a/training/runs/bc_dagger1_differential_field/policy.zip b/training/runs/bc_dagger1_differential_field/policy.zip
deleted file mode 100644
index e04d8a3..0000000
Binary files a/training/runs/bc_dagger1_differential_field/policy.zip and /dev/null differ
diff --git a/training/runs/bc_dagger2_differential_field/policy.zip b/training/runs/bc_dagger2_differential_field/policy.zip
deleted file mode 100644
index 3ae78fa..0000000
Binary files a/training/runs/bc_dagger2_differential_field/policy.zip and /dev/null differ