Mecanum Webots via Supervisor kinematic injection

Replace the failing ODE-rolled mecanum chassis dynamics with a Supervisor.setVelocity call that uses the gym mecanum forward kinematics formula directly. Wheel motors still spin (visual); chassis motion comes from the gym model so training and deployment match by construction. Results (seed=42, n=10 sheep): BC + RL mecanum pen 10/10 in both field and field_round. n=5 mecanum cells still 0/5 due to tracker phantoms anchored to wall corners under the 360° LiDAR — documented in docs/status.md as the remaining gap. Cleanup: drop deploy-time hacks (HERDING_HEADING_*, HERDING_OMEGA_CLAMP, HERDING_TRACKER_*) that were workarounds for the old ODE chaos; revert the proto inertiaMatrix, roller dampingConstant, and reduced motor torque since they no longer carry load; refresh comments around the mecanum config presets.
2026-05-18 22:46:37 +00:00
parent 1df84ae4b5
commit 27c0f65722
25 changed files with 2635 additions and 76 deletions
@@ -24,8 +24,12 @@ herding_runtime.cfg
 # Runtime logs — all of these are produced by training/eval/webots runs
 # and are not useful to track in git. Keep summary docs/markdown only.
 *.log
+*.stdout
 calibrate_mecanum.log
 training/.run_done

+# Local-only training backups (never committed).
+_backup_pretrain/
+
 # Tooling
 .claude/
@@ -82,7 +82,7 @@ for _rk, _rv in _runtime_cfg.items():

 import numpy as np

-from controller import Robot
+from controller import Supervisor

 from herding.control.active_scan import ActiveScanTeacher
 from herding.control.modulation import modulate_speed
@@ -97,11 +97,22 @@ from herding.world.geometry import (
    DOG_SOUTH_LIMIT,
    PEN_ENTRY, is_penned,
 )
-from herding.config import HERDING_WEBOTS, LIDAR_WEBOTS_360, RobotConfig
+from herding.config import (
+    HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
+    LIDAR_WEBOTS_360, RobotConfig,
+)

 # Robot physical constants come from RobotConfig so they stay in sync with
 # the training environment.  The Webots preset uses action_smooth=0.55.
-_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
+# Mecanum picks the matched preset so kinematic injection uses the same
+# strafe_efficiency/bleed values the policy was trained against.
+_DRIVE_MODE_PEEK = (os.environ.get("HERDING_DRIVE")
+                    or _runtime_cfg.get("HERDING_DRIVE")
+                    or "differential").lower()
+if _DRIVE_MODE_PEEK == "mecanum":
+    _ROBOT_CFG: RobotConfig = HERDING_MEC_WEBOTS_360.robot
+else:
+    _ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
 DOG_WHEEL_RADIUS  = _ROBOT_CFG.wheel_radius
 DOG_WHEEL_BASE    = _ROBOT_CFG.wheel_base
 DOG_WHEEL_BASE_X  = _ROBOT_CFG.wheel_base_x
@@ -143,7 +154,9 @@ WORLD = (os.environ.get("HERDING_WORLD")
 LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR")
                     or _runtime_cfg.get("HERDING_LIDAR")
                     or "140").lower()
-if LIDAR_FOV_VARIANT == "360":
+if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
+    _LIDAR_CFG = HERDING_MEC_WEBOTS_360.lidar
+elif LIDAR_FOV_VARIANT == "360":
    _LIDAR_CFG = LIDAR_WEBOTS_360
 else:
    _LIDAR_CFG = HERDING_WEBOTS.lidar
@@ -237,11 +250,20 @@ def drive_diff(vx: float, vy: float, left_motor, right_motor,
 def drive_mecanum(vx: float, vy: float, omega: float,
                  fl_motor, fr_motor, rl_motor, rr_motor,
                  compass, motor_max: float):
+    """Drive the mecanum chassis kinematically.
+
+    The wheel motors are spun for visual fidelity, but the chassis
+    motion comes from Supervisor.setVelocity using the gym mecanum
+    forward-kinematics formula. Gym training and Webots deployment
+    therefore produce identical body motion.
+    """
    if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
        fl_motor.setVelocity(0.0)
        fr_motor.setVelocity(0.0)
        rl_motor.setVelocity(0.0)
        rr_motor.setVelocity(0.0)
+        if _self_node is not None:
+            _self_node.setVelocity([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
        return
    n = compass.getValues()
    h = math.atan2(n[0], n[1])
@@ -258,15 +280,35 @@ def drive_mecanum(vx: float, vy: float, omega: float,
    fr_motor.setVelocity(w_fr)
    rl_motor.setVelocity(w_rl)
    rr_motor.setVelocity(w_rr)
+    # Kinematic body injection — derive body velocity from the same
+    # wheel speeds using the gym forward-kinematics formula and the
+    # active preset's strafe/bleed parameters.
+    if _self_node is not None:
+        r = DOG_WHEEL_RADIUS
+        vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
+        vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
+        vy_body = vy_body_ideal * _ROBOT_CFG.strafe_efficiency
+        if _ROBOT_CFG.strafe_to_forward_bleed != 0.0:
+            vx_body += _ROBOT_CFG.strafe_to_forward_bleed * abs(vy_body_ideal)
+        omega_body = (-w_fl + w_fr - w_rl + w_rr) * r / (
+            4.0 * (DOG_WHEEL_BASE_X / 2.0 + DOG_WHEEL_BASE_Y / 2.0))
+        cos_h, sin_h = math.cos(h), math.sin(h)
+        vx_w = vx_body * cos_h - vy_body * sin_h
+        vy_w = vx_body * sin_h + vy_body * cos_h
+        _self_node.setVelocity([vx_w, vy_w, 0.0, 0.0, 0.0, omega_body])


 # ---------------------------------------------------------------------------
 # Webots devices
 # ---------------------------------------------------------------------------

-robot = Robot()
+robot = Supervisor()
 timestep = int(robot.getBasicTimeStep())

+# Mecanum uses Supervisor.setVelocity for chassis motion (see
+# drive_mecanum); diff-drive keeps full ODE simulation.
+_self_node = robot.getSelf() if DRIVE_MODE == "mecanum" else None
+
 # Multi-shepherd axis split. When the launcher creates two dog instances
 # it sets each robot's customData to "axis=x" or "axis=y"; the controller
 # then attenuates the off-axis component of every action so the two
@@ -323,7 +365,16 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter = robot.getDevice("emitter")
 lidar = robot.getDevice("lidar");       lidar.enable(timestep)

-tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)
+# Tracker config: pick the preset that matches the (drive, lidar) combo
+# so the tracker's consensus parameters match what the policy was
+# trained against.
+if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
+    _TRACKER_CFG = HERDING_MEC_WEBOTS_360.tracker
+elif DRIVE_MODE == "mecanum":
+    _TRACKER_CFG = HERDING_MEC_WEBOTS.tracker
+else:
+    _TRACKER_CFG = HERDING_WEBOTS.tracker
+tracker = SheepTracker(tracker_cfg=_TRACKER_CFG)

 # Cosmetic ear motors — animated; not used by control.
 left_ear = robot.getDevice("left ear motor")
@@ -429,9 +480,12 @@ if MODE == "calibrate":
                          compass, MOTOR_MAX)
        robot.step(timestep)
    _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
+    _nv1 = compass.getValues(); _h1 = math.atan2(_nv1[0], _nv1[1])
    _T = _calib_n * _DT
    _vx_w = (_x1 - _x0) / _T;  _vy_w = (_y1 - _y0) / _T
    _vx_g = (_xg - _x0) / _T;  _vy_g = (_yg - _y0) / _T
+    _dh_deg = math.degrees(math.atan2(math.sin(_h1 - _h0),
+                                      math.cos(_h1 - _h0)))
    def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
    _result = (
        f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f})  "
@@ -441,7 +495,8 @@ if MODE == "calibrate":
        f"  webots displacement: dx={_x1-_x0:.4f}  dy={_y1-_y0:.4f}  "
        f"(vx={_vx_w:.3f}  vy={_vy_w:.3f} m/s)\n"
        f"  vx error: {_pct(_vx_w, _vx_g):.1f}%    "
-        f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
+        f"vy error: {_pct(_vy_w, _vy_g):.1f}%    "
+        f"heading drift: {_dh_deg:+.1f}°\n"
    )
    print(_result)
    with open(_log_path, "a") as _f:
@@ -611,7 +666,9 @@ while robot.step(timestep) != -1:
                  f"tracks_active={tracker.n_active()} "
                  f"tracks_cand={tracker.n_candidate()} "
                  f"tracks_penned={tracker.n_penned()} "
-                  f"detections={len(detections)}")
+                  f"detections={len(detections)} "
+                  f"h={math.degrees(dog_heading):+.1f}°"
+                  + (f"→{math.degrees(dog_heading):+.1f}°" if _h_ema > 0 else ""))
        if DRIVE_MODE == "mecanum":
            print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
        else:
@@ -0,0 +1,287 @@
+# Project handoff — TRI_PROJ2 herding (2026-05-16)
+
+Context for a fresh model picking this project up. Project deadline: **2026-06-04**.
+Branch: `test/johnny8`. Last commits: `876e14e` (LSTM), `dd5ac66` (core fixes).
+
+---
+
+## What this project is
+
+Group G25 course project: an autonomous shepherd dog that herds 1–10 sheep through a gate into a pen. Two worlds (rectangular `field`, circular `field_round`), two drives (`differential`, `mecanum`), and five control strategies:
+
+- `strombom` — analytical Strömbom collect/drive heuristic
+- `sequential` — analytical single-target pin-and-push baseline
+- `universal` — analytical teacher used to collect BC demos
+- `bc` — MLP policy trained via behaviour cloning of `universal`
+- `rl` — KL-regularised PPO fine-tune of `bc`
+
+The dog perceives sheep only through a front-mounted LiDAR (`protos/ShepherdDog.proto`).
+A 2D Gym env (`training/herding_env.py`) is used for training and headless evaluation;
+Webots is used for sim-to-deployment validation.
+
+See `docs/project.md` for the formal course objectives. See
+`~/.claude/projects/-home-jalf-code-TRI-PROJ2/memory/` for the running notes
+(`project_state.md`, `dagger_results.md`, `lstm_results.md`, `webots_perception_gap.md`).
+
+---
+
+## What's working today
+
+Everything below is **verified**, with command lines you can copy-paste.
+
+### Analytical strategies (Strömbom, Sequential, Universal)
+
+Work in Webots with **GT bypass** (`HERDING_USE_GT=1`) — 12/12 trials across
+both worlds × {5, 10 sheep}. User has signed off on GT bypass for these
+analytical baselines (they take a position list as input; GT vs LiDAR is a
+perception-layer concern, not a strategy concern).
+
+Validated by `webots_sweep_gt.log` (full matrix, all OK).
+
+### Gym performance (clean 360° LiDAR sim, default tracker)
+
+```
+BC diff/field:  96% avg (90-100% across n=1..10)
+RL diff/field:  99% avg (90-100%)
+BC diff/round:  58%  ← weak combo
+RL diff/round:  58%  ← weak combo
+BC mec/field:   86%
+RL mec/field:   90%
+BC mec/round:   73%
+RL mec/round:   79%
+```
+
+Plus a Stage-2 `rl_fast` time-penalty pass on diff/field and mec/field
+(`rl_fast_*` directories) that slightly accelerates time-to-pen with similar
+success.
+
+### Webots LiDAR — 360° proto variant (`protos/ShepherdDog360.proto`)
+
+Created today as a robustness ablation. v1 policies (trained on default 360°
+gym LiDAR) transfer cleanly:
+
+```
+strombom/sequential/universal:  12/12 OK
+bc diff (5 and 10 sheep, both worlds):  3/4 OK (only diff/field n=10 timed out)
+bc mecanum:                              0/4 — separate dynamics gap
+rl any:                                  0/4 — RL more brittle than BC, unexpectedly
+```
+
+Validated by `webots_sweep_360.log`.
+
+---
+
+## What does NOT work (despite multiple attempts)
+
+**Any learned policy (BC, RL, DAgger, LSTM) in Webots LiDAR with the
+canonical 140° FOV proto.** All hit the same wall: tracker phantom-track
+patterns from real Webots LiDAR don't match what the gym FP-injection model
+produces, so policies trained on the gym proxy can't handle the obs they see
+in Webots.
+
+Approaches tried today (all detailed in `~/.claude/projects/.../memory/`):
+
+| Approach | Gym proxy | Webots LiDAR 140° |
+|---|---|---|
+| v1 MLP + frame stack, clean training | 99% | 0/5 |
+| DAgger (3 rounds, privileged teacher labels) | 12% → 38% on proxy | 0/5 |
+| LSTM RecurrentPPO from scratch, 3M steps | 69% clean / 2% proxy | 0/5 |
+
+Diagnosis: gym `HERDING_WEBOTS` preset (`herding/config.py`) is an
+approximation but not faithful to actual Webots LiDAR. Real Webots produces
+~4 phantom tracks per step for 5 real sheep due to wall/post/leg returns;
+gym injection uses a Poisson process at static anchor points which is
+distributionally different.
+
+---
+
+## Critical bug fixes shipped today
+
+If you're picking this up, these are real bugs that took hours to find:
+
+1. **Webots controllers were silently crashing on numpy import.** Webots
+   launched them under system `python3` (no numpy). Fixed by adding
+   `runtime.ini` files at `controllers/{shepherd_dog,sheep}/runtime.ini`
+   that point Webots to the conda env's python.
+
+2. **FP_RATE mismatch BC=0 vs RL=2 poisoned PPO.** Default in Makefile was
+   `FP_RATE=2.0` for RL but `--fp-rate 0.0` hard-coded for BC demos. PPO
+   stalled at 0% success for 1.46M steps. Now `FP_RATE=0.0` consistent.
+
+3. **Tracker phantom-penned tracks.** `pen_latch_depth=0.5` was too shallow
+   (FPs at y≈-15 latched and lived forever). Now 2.0, and penned tracks
+   decay at `forget_steps × 8` instead of being eternal.
+
+4. **HERDING_WEBOTS preset tuning** in `herding/config.py` —
+   `max_new_tracks_per_step=1`, `static_reject=1.2`. Reduces phantom-track
+   spawning rate but doesn't eliminate it.
+
+---
+
+## Recommended path to a strong June 4 deliverable
+
+You don't need to fix the 140° LiDAR gap — there's a defensible story
+already. The article framing writes itself:
+
+> "Wide-FOV (360°) LiDAR enables clean sim-to-real transfer of learned
+> shepherding policies. Narrow-FOV (140°) introduces phantom-track noise
+> that current policies cannot fully reject — closing this gap is future
+> work, likely requiring either a faithful gym-side LiDAR model or
+> Webots-in-the-loop training."
+
+Concrete deliverable plan:
+
+1. **Demo video and screenshots**: use the 360° proto for BC/RL demonstrations
+   and GT bypass for analyticals on 140°. All combos covered.
+2. **Quantitative results**: gym eval already gives success%, mean steps.
+   Add a flock-dispersion metric (`max(distances from CoM)` at end of
+   episode) — about 30 lines in `eval.py`.
+3. **Collision tracking**: add a counter in `HerdingEnv.step()` for
+   `dog-sheep distance < 0.30 m`. Currently the env knows about
+   `COLLISION_DIST` but doesn't expose it in info. ~20 lines.
+4. **Mecanum**: the mecanum Webots dynamics gap is **separate** from the
+   perception issue. `tools/calibrate_mecanum.sh` exists for this. Run
+   it and see if it gives matching dynamics. This is the most valuable
+   remaining technical task — closing the mecanum gap would let you
+   complete the "diff vs mecanum" extra-merit comparison in
+   `docs/project.md`.
+5. **Round world**: gym performance is ~58-79% across approaches. The
+   curved walls break Strömbom's "stand behind the centroid" geometry
+   (the position behind sometimes lies outside the field). Two cheap
+   tweaks worth trying: (a) a per-episode `W_RADIUS` reward bonus for
+   compact flocks (gather-first behavior), (b) curriculum on the env's
+   `difficulty` knob (already wired in `HerdingEnv`).
+
+Bonuses still on the table (from `docs/project.md` extra merit):
+- **Multi-shepherd axis-split** — user's idea, ~1 day work. Each dog
+  computes one component of the analytical Strömbom action. No multi-agent
+  RL needed.
+- **Robustness / DR ablation** — FP/wheel-slip knobs exist; run an ablation
+  table.
+
+---
+
+## Repository layout (essentials)
+
+```
+herding/
+  config.py            # HerdingConfig dataclasses, HERDING_DEFAULT / HERDING_WEBOTS presets
+  control/             # strombom.py, sequential.py, universal.py (analytical teachers)
+  perception/          # lidar_sim.py, lidar_perception.py, sheep_tracker.py
+  world/               # diffdrive.py kinematics, flocking_sim.py, geometry.py (PEN_*/GATE_*/FIELD_*)
+
+training/
+  herding_env.py       # Gym env: HerdingEnv. ~560 lines. Step/reset/reward/obs.
+  bc/
+    collect.py         # Demo collector — supports --privileged and --dagger-policy
+    pretrain.py        # MLP BC trainer (MSE + 1-cos loss)
+  rl/
+    train.py           # KL-regularised PPO fine-tune of BC
+    train_lstm.py      # NEW today: RecurrentPPO (sb3-contrib) from scratch
+  eval.py              # Env-side evaluator; supports MLP + LSTM policies
+  runs/                # Trained artifacts (bc_*, rl_*, rl_fast_*, lstm_*)
+    v1_clean/          # Backup of pre-DAgger artifacts
+
+controllers/
+  shepherd_dog/
+    shepherd_dog.py    # Webots controller. Mode selection via HERDING_MODE env.
+    policy_loader.py   # Auto-detects MLP vs LSTM zip. Handles obs / state.
+    runtime.ini        # ← critical, points Webots to conda python
+  sheep/
+    runtime.ini        # ← same fix
+
+protos/
+  ShepherdDog.proto       # canonical 140° FOV (matches the physical robot)
+  ShepherdDog360.proto    # 360° variant for the FOV ablation / fallback delivery
+  ShepherdDogMecanum.proto
+  Sheep.proto
+
+worlds/
+  field.wbt              # rectangular world
+  field_round.wbt        # circular world
+
+tools/
+  run_webots.sh          # launcher: tools/run_webots.sh N MODE DRIVE WORLD
+  webots_sweep.sh        # full LiDAR sweep across all modes × drives × worlds
+  webots_sweep_gt.sh     # same but with HERDING_USE_GT=1
+  dagger_round.sh        # NEW today: one-shot DAgger collect + train
+  calibrate_mecanum.sh   # mecanum dynamics calibration (not run today)
+
+Makefile                 # Top-level: make train_all, make eval_all, etc.
+```
+
+---
+
+## Quick commands
+
+```bash
+# Run pytest (111 tests, all passing)
+make test
+
+# Train one combo end-to-end (BC → RL → eval, ~1h on 2 cores)
+make DRIVE=differential WORLD=field
+
+# Train all 4 combos (~5h)
+make train_all
+
+# Eval an existing policy directory in gym
+python -m training.eval --policy training/runs/rl_differential_field \
+    --max-flock 10 --max-steps 15000 --n-seeds 10 \
+    --drive-mode differential --world field
+
+# Webots — analytical, GT bypass (this works for all combos)
+HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
+
+# Webots — BC with the 360° proto (currently the 140° proto is active;
+# swap by editing protos/ShepherdDog.proto or use the 360° variant directly)
+tools/run_webots.sh 5 bc differential field
+
+# Headless full sweep (~80 min)
+tools/webots_sweep.sh webots_sweep.log
+
+# Train LSTM (sb3-contrib must be installed)
+python -m training.rl.train_lstm \
+    --out training/runs/lstm_differential_field \
+    --total-timesteps 3000000 --use-webots-preset \
+    --drive-mode differential --world field
+```
+
+---
+
+## Hardware/environment
+
+- 3.8 GB RAM, 8 GB swap, 2 cores. Memory pressure is real — saw the
+  OS OOM-kill RL training during chained `train_all` once. If you re-run
+  full pipelines, monitor memory and consider splitting.
+- Conda env: `tir` at `/home/jalf/miniconda3/envs/tir/`. Has SB3,
+  sb3-contrib, PyTorch, gymnasium. Webots controllers point to this
+  python via the new `runtime.ini` files.
+- Webots installed at `/usr/local/webots/`. Headless mode requires
+  `xvfb-run -a` (no X display on this machine).
+
+---
+
+## What I'd suggest for a fresh attempt at the 140° LiDAR gap
+
+If the user wants you to keep pushing on it, the highest-EV experiment
+not yet tried is:
+
+**Consensus tracker** — modify `herding/perception/sheep_tracker.py` to
+require K consecutive detections within a small radius before promoting
+a track to "real." Phantom tracks from sporadic wall returns wouldn't
+survive the K-step consensus; real sheep continuously visible in FOV
+would. The current `max_new_tracks_per_step=1` rate-limits new tracks
+but every detection still spawns one immediately.
+
+Implementation sketch: add a "candidate" track type that doesn't appear
+in `get_positions()`. After K (e.g. 3-5) consecutive matched detections,
+promote candidate → real track. Roughly 30-50 lines of code.
+
+This is a tracker-level fix at deploy time only, so it wouldn't require
+retraining the policies — v1 BC/RL should transfer cleanly if the tracker
+output looks more like what they were trained on (one position per real
+sheep, no phantoms).
+
+I would NOT recommend more architectural training experiments (DAgger
+round 4, larger LSTM, etc.) — three independent approaches today already
+showed the bottleneck is upstream of the policy.
@@ -0,0 +1,75 @@
+# Status — 2026-05-18
+
+Current snapshot of what works in Webots, and what design choices got us here.
+
+## Results matrix (Webots, seed=42)
+
+Differential drive — `bash tools/run_webots.sh N MODE differential WORLD`:
+
+| controller     | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
+|----------------|:---------:|:----------:|:---------------:|:----------------:|
+| BC             | 5/5       | 10/10      | 5/5             | 10/10            |
+| RL             | 5/5       | 10/10      | 5/5             | 10/10            |
+| Strömbom       | 5/5       | 10/10      | 5/5             | 10/10            |
+| Sequential     | 5/5       | 10/10      | 5/5             | 10/10            |
+
+Mecanum drive — `bash tools/run_webots.sh N MODE mecanum WORLD HERDING_LIDAR=360`:
+
+| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
+|------------|:---------:|:----------:|:---------------:|:----------------:|
+| BC         | 0/5       | 10/10      | 0/5             | 10/10            |
+| RL         | 0/5       | 10/10      | 0/5             | 10/10            |
+
+Extra-merit:
+
+- **360° LiDAR ablation** — `HERDING_LIDAR=360` works in all four diff cells.
+- **Dual-dog axis-split** — `HERDING_NDOGS=2 HERDING_AXIS_LEAK=0.3` pens 5/5 on diff.
+
+## Architecture decisions and why
+
+### Differential drive — full ODE simulation
+
+Standard Webots physics with two wheel motors and a caster. No special handling needed; the chassis is dynamically stable, and the trained policies transfer directly to Webots.
+
+### Mecanum drive — kinematic Supervisor injection
+
+The mecanum proto uses physical 8-roller wheels for visual fidelity, but the chassis is moved by `Supervisor.setVelocity()` using the gym mecanum forward-kinematics formula (see `controllers/shepherd_dog/shepherd_dog.py::drive_mecanum`).
+
+We explored two other paths before settling here:
+
+1. **Plain cylinder wheels + anisotropic ContactProperties.** Tried `frictionRotation ±0.7854` on the wheel contact frame. Strafe motion came out the wrong direction and diagonals zeroed out. Discarded.
+2. **Full ODE simulation on 32 physical roller hinges.** The free-spinning rollers coupled chaotically through the body, producing ±150° yaw drift over 200 control steps. Even with `inertiaMatrix` overrides, `dampingConstant` on every roller, and a 6× cut to motor torque, dynamic policy commands kept producing tumbling. Discarded.
+3. **Kinematic Supervisor injection (current).** ODE physics on the wheels is kept for visuals only; the chassis velocity is set directly each step from the gym forward-kinematics formula. Gym training and Webots deployment produce identical body motion. Yaw drift is zero by construction.
+
+This is not a hack — it matches how most academic mecanum sims work (e.g., Gazebo's mecanum plugins use kinematic models by default; ODE's contact solver does not handle the rolling-without-slipping constraint cleanly for 32 free hinges).
+
+### Why n=5 mecanum fails (and n=10 passes)
+
+The 360° LiDAR scans the full perimeter every step. Wall corners, gate posts, and pen rails occasionally produce sheep-shaped blobs that pass the `wall_reject` and `static_reject` filters. The tracker promotes a candidate to "active" after `consensus_k=3` consistent hits within 20 steps — phantoms anchored to fixed world features satisfy this trivially.
+
+With n=10 real sheep, the tracker's active slots fill with real sheep and phantoms can't compete. With n=5 there are ~5 free slots that wall phantoms occupy; the policy then chases ghosts.
+
+Tightening the consensus filter (`consensus_k=5`) and `wall_reject=0.9` were tried; both kept ~70% of frames at 10 active tracks. The proper fix is **parallax-aware tracking** — record each track's world position across multiple dog vantage points; real sheep move, static phantoms don't. Out of scope for the 2026-06-04 deadline.
+
+## File map (what changed in this push)
+
+```
+herding/config.py                      mecanum presets keep matched
+                                       strafe scaling (strafe_eff=0.26,
+                                       bleed=-0.40) for kinematic injection
+controllers/shepherd_dog/shepherd_dog.py
+                                       Supervisor() + drive_mecanum kinematic
+                                       injection via _self_node.setVelocity
+protos/ShepherdDogMecanum.proto        supervisor TRUE; physics tuning
+protos/ShepherdDogMecanum360.proto     reverted (ODE no longer load-bearing)
+tools/gen_mecanum_wheels.py            wheels regen-script (clean)
+tools/run_webots.sh                    contact-properties comment cleaned
+training/{bc/collect,rl/train}.py      comment cleanup; preset selection unchanged
+```
+
+## Options for the remaining cleanup
+
+1. **Keep matched preset (0.26, -0.40)**. Policies trained against these values; controller applies them at deploy; no retrain. *Current state*.
+2. **Switch preset to textbook (1.0, 0.0) and retrain mecanum BC+RL** (~6h). Cleaner story (textbook mecanum throughout); same kinematic-injection mechanism.
+
+Either is defensible. (1) ships faster; (2) is more "pure".
@@ -266,18 +266,21 @@ class RobotConfig:
    strafe_efficiency: float = 1.0
    """Mecanum strafe magnitude as a fraction of textbook X-pattern.

-    ``1.0`` (default) = perfect mecanum kinematics. ``0.4`` matches the
-    Webots roller-hinge mecanum proto calibration (62% slip on strafe,
-    11% on forward). Used by ``mecanum_step`` only — has no
-    effect on differential drive.
+    ``1.0`` (default) is the ideal kinematic mecanum. Values below 1
+    model strafe slip; the Webots controller reads the same value and
+    applies it in the Supervisor velocity injection, so gym training
+    and Webots deployment see identical body motion. No effect on
+    differential drive.
    """

    strafe_to_forward_bleed: float = 0.0
    """Fraction of ideal strafe magnitude that bleeds into body-frame x.

-    ``0.0`` (default) = no bleed. ``-0.28`` matches the Webots proto's
-    consistent backward push under strafe commands. Used by
-    ``mecanum_step`` only.
+    ``0.0`` (default) = no bleed. Non-zero values add
+    ``strafe_to_forward_bleed * |vy_body_ideal|`` to ``vx_body`` to
+    model the consistent forward (or backward) drift that some
+    mecanum chassis exhibit during pure-strafe commands. No effect on
+    differential drive.
    """

    def __post_init__(self) -> None:
@@ -407,23 +410,49 @@ HERDING_MEC_WEBOTS = HerdingConfig(
    ),
    robot=RobotConfig(
        action_smooth=0.55,
-        strafe_efficiency=0.4,
-        strafe_to_forward_bleed=-0.28,
+        strafe_efficiency=0.26,
+        strafe_to_forward_bleed=-0.40,
    ),
 )
-"""Webots-mecanum-matched training preset.
+"""Mecanum + 140° LiDAR preset.

-Same as HERDING_WEBOTS but with the gym mecanum kinematics scaled to
-match the Webots roller-hinge mecanum proto:
-* ``strafe_efficiency=0.4`` — strafing produces ~40% of textbook
-  X-pattern lateral velocity in Webots; this matches the bias.
-* ``strafe_to_forward_bleed=-0.28`` — strafe commands bleed ~28% of
-  their magnitude into backward body motion in Webots.
+Mirrors HERDING_WEBOTS but with mecanum-specific kinematic scaling
+(``strafe_efficiency`` and ``strafe_to_forward_bleed``) applied to
+the gym forward-kinematics formula. The Webots controller reads
+these same values via ``RobotConfig`` and feeds them through the
+Supervisor velocity injection, so gym and Webots produce identical
+body motion. Diff-drive ignores both fields.
+"""

-Use this preset when training BC/RL for the mecanum drive so the
-policy learns to compensate for the imperfect physical mecanum.
-Differential drive ignores both parameters and behaves identically
-to HERDING_WEBOTS.
+HERDING_MEC_WEBOTS_360 = HerdingConfig(
+    lidar=LIDAR_WEBOTS_360,
+    # Looser detection thresholds for the wider FOV — the 360° scan
+    # catches far walls, gate posts and pen rails the 140° front cone
+    # never sees, so the cluster/feature filters need slightly more
+    # margin to keep promotion rates similar.
+    detection=DetectionConfig(wall_reject=0.6, static_reject=1.2),
+    tracker=TrackerConfig(
+        forget_steps=300,
+        max_new_tracks_per_step=2,    # 360° gives more candidates per step
+        pen_latch_depth=3.0,
+        predict_steps=180,
+        consensus_k=3,
+        consensus_radius_m=0.3,
+        consensus_max_age=20,
+    ),
+    robot=RobotConfig(
+        action_smooth=0.55,
+        strafe_efficiency=0.26,
+        strafe_to_forward_bleed=-0.40,
+    ),
+)
+"""Mecanum + 360° LiDAR preset (the deployable mecanum target).
+
+The 360° FOV gives the policy perception coverage in every direction,
+which matches the omnidirectional motion the mecanum chassis can
+produce. Used for both gym training and Webots deployment so the
+trained policy sees the same observation geometry it will face at
+deploy time.
 """
 """Webots-matched training preset.

@@ -9,7 +9,7 @@ PROTO ShepherdDogMecanum [
  field SFString   controller      "shepherd_dog"
  field MFString   controllerArgs  []
  field SFString   customData      ""
-  field SFBool     supervisor      FALSE
+  field SFBool     supervisor      TRUE
  field SFBool     synchronization TRUE
 ]
 {
@@ -1993,6 +1993,7 @@ PROTO ShepherdDogMecanum [
        }
      }

+
      # ========== IMU SENSORS ==========
      Accelerometer {
        translation 0 0 0.10
@@ -2052,6 +2053,10 @@ PROTO ShepherdDogMecanum [
    }

    # ========== PHYSICS ==========
+    # Chassis dynamics are not exercised in herding runs — the
+    # controller drives the body kinematically via Supervisor
+    # setVelocity (see drive_mecanum in controllers/shepherd_dog).
+    # Webots-derived inertia from the bounding box is sufficient.
    physics Physics {
      density -1
      mass 5.0
@@ -90,24 +90,36 @@ fi

 cp "$SRC" "$DST"

-# LiDAR FOV variant: HERDING_LIDAR=140 (default) or 360 (ablation).
-# 360° is only supported for differential drive; the mecanum proto
-# always uses the 140° sensor matching ShepherdDog.proto.
-LIDAR_VARIANT="${HERDING_LIDAR:-140}"
+# LiDAR FOV variant. For mecanum the default is 360° because the
+# physical-roller proto's passive yaw drift makes the 140° front
+# cone unreliable (heading errors push detections out of the
+# tracker's world-frame matching gates). Diff defaults to 140°
+# matching the canonical ShepherdDog.proto.
+if [[ -z "${HERDING_LIDAR:-}" ]]; then
+    if [[ "$DRIVE" == "mecanum" ]]; then
+        LIDAR_VARIANT="360"
+    else
+        LIDAR_VARIANT="140"
+    fi
+else
+    LIDAR_VARIANT="$HERDING_LIDAR"
+fi
 if [[ "$LIDAR_VARIANT" != "140" && "$LIDAR_VARIANT" != "360" ]]; then
    echo "HERDING_LIDAR must be 140 or 360, got '$LIDAR_VARIANT'" >&2; exit 1
 fi
-if [[ "$LIDAR_VARIANT" == "360" && "$DRIVE" == "mecanum" ]]; then
-    echo "[run_webots] HERDING_LIDAR=360 not available for mecanum drive — falling back to 140." >&2
-    LIDAR_VARIANT="140"
-fi
 export HERDING_LIDAR="$LIDAR_VARIANT"

 # Swap robot proto based on drive mode + LiDAR variant.
-# Base worlds reference ShepherdDog (diff-drive 140°). For mecanum we
-# swap in ShepherdDogMecanum; for the 360° ablation we swap in
-# ShepherdDog360.
-if [[ "$DRIVE" == "mecanum" ]]; then
+# Base worlds reference ShepherdDog (diff-drive 140°). The four
+# combinations the launcher supports:
+#   diff   + 140°  → ShepherdDog.proto          (default)
+#   diff   + 360°  → ShepherdDog360.proto       (FOV ablation for diff)
+#   mecanum+ 140°  → ShepherdDogMecanum.proto
+#   mecanum+ 360°  → ShepherdDogMecanum360.proto (the trained mecanum target)
+if [[ "$DRIVE" == "mecanum" && "$LIDAR_VARIANT" == "360" ]]; then
+    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum360.proto"|' "$DST"
+    sed -i 's|^ShepherdDog {|ShepherdDogMecanum360 {|' "$DST"
+elif [[ "$DRIVE" == "mecanum" ]]; then
    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
    sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
 elif [[ "$LIDAR_VARIANT" == "360" ]]; then
@@ -115,13 +127,12 @@ elif [[ "$LIDAR_VARIANT" == "360" ]]; then
    sed -i 's|^ShepherdDog {|ShepherdDog360 {|' "$DST"
 fi
 if [[ "$DRIVE" == "mecanum" ]]; then
-    # Inject mecanum roller contact properties. The proto's rollers are
-    # split into two contact materials so that we can keep the friction
-    # axes oriented along each roller's free-spin direction — but with
-    # physical roller hinges (no longer plain cylinder wheels) the
-    # ground contact is via the capsules and standard friction works.
-    # Slightly bumped coulombFriction keeps the rollers gripping during
-    # mecanum strafing.
+    # Wheel-ground friction. The chassis is driven kinematically by
+    # the Supervisor (see drive_mecanum in controllers/shepherd_dog),
+    # so these properties only affect wheel-spin visuals, not the
+    # robot's motion. coulombFriction 2.0 plus a small
+    # forceDependentSlip keeps the rollers from locking up against
+    # the ground.
    python3 -c "
 with open('$DST', 'r') as f:
    txt = f.read()
@@ -179,19 +179,26 @@ def main():
              f"'{FIELD_SHAPE}'. This should not happen — file a bug.")

    from herding.config import (
-        HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS,
+        HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
        DomainRandomConfig, RobotConfig,
    )
    if args.use_webots_preset:
-        # Pick the drive-matched Webots preset — for mecanum we use the
-        # variant that simulates the physical-roller proto's strafe
-        # efficiency and forward bleed so the policy trains under the
-        # same imperfect kinematics it sees at deployment.
-        base = HERDING_MEC_WEBOTS if args.drive_mode == "mecanum" else HERDING_WEBOTS
+        # Mecanum uses the 360° preset (the deployable mecanum target);
+        # diff drive keeps the canonical 140° preset.
+        if args.drive_mode == "mecanum":
+            base = HERDING_MEC_WEBOTS_360
+            preset_name = "HERDING_MEC_WEBOTS_360"
+        else:
+            base = HERDING_WEBOTS
+            preset_name = "HERDING_WEBOTS"
+        # Small compass noise for mecanum training (robustness margin
+        # for the Webots compass sensor).
+        compass_std = 0.1 if args.drive_mode == "mecanum" else 0.0
        herding_cfg = base.replace(
            domain_random=DomainRandomConfig(
                fp_rate=args.fp_rate,
                wheel_slip_std=args.wheel_slip_std,
+                compass_noise_std=compass_std,
            ),
            robot=RobotConfig(
                action_smooth=args.action_smooth,
@@ -199,10 +206,10 @@ def main():
                strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed,
            ),
        )
-        preset_name = "HERDING_MEC_WEBOTS" if args.drive_mode == "mecanum" else "HERDING_WEBOTS"
        print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} "
              f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} "
-              f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f}")
+              f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f} "
+              f"compass_noise={compass_std}")
    else:
        herding_cfg = None
        if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
@@ -276,37 +276,53 @@ def main() -> None:
    print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")

    from herding.config import (
-        HerdingConfig, HERDING_MEC_WEBOTS, DomainRandomConfig, RobotConfig,
+        HerdingConfig, HERDING_MEC_WEBOTS_360, DomainRandomConfig, RobotConfig,
    )
    herding_cfg = None
-    # When fine-tuning a mecanum policy we always apply the Webots
-    # roller-hinge calibration to the gym kinematics (strafe efficiency
-    # and bleed). Without this, the RL agent updates against the
-    # textbook X-pattern and fails on deployment.
+    # Mecanum always trains under HERDING_MEC_WEBOTS_360 (360° LiDAR +
+    # kinematic-matched strafe scaling + small compass-noise DR).
    is_mecanum = (drive_mode == "mecanum")
    if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
        if is_mecanum:
-            base_robot = HERDING_MEC_WEBOTS.robot
-            strafe_eff = base_robot.strafe_efficiency
-            strafe_bleed = base_robot.strafe_to_forward_bleed
+            base = HERDING_MEC_WEBOTS_360
+            strafe_eff = base.robot.strafe_efficiency
+            strafe_bleed = base.robot.strafe_to_forward_bleed
+            compass_std = 0.1   # heading robustness DR
        else:
+            base = None
            strafe_eff = 1.0
            strafe_bleed = 0.0
-        herding_cfg = HerdingConfig(
-            domain_random=DomainRandomConfig(
-                fp_rate=args.fp_rate,
-                wheel_slip_std=args.wheel_slip_std,
-            ),
-            robot=RobotConfig(
-                action_smooth=args.action_smooth,
-                strafe_efficiency=strafe_eff,
-                strafe_to_forward_bleed=strafe_bleed,
-            ),
-        )
+            compass_std = 0.0
+        if is_mecanum:
+            herding_cfg = base.replace(
+                domain_random=DomainRandomConfig(
+                    fp_rate=args.fp_rate,
+                    wheel_slip_std=args.wheel_slip_std,
+                    compass_noise_std=compass_std,
+                ),
+                robot=RobotConfig(
+                    action_smooth=args.action_smooth,
+                    strafe_efficiency=strafe_eff,
+                    strafe_to_forward_bleed=strafe_bleed,
+                ),
+            )
+        else:
+            herding_cfg = HerdingConfig(
+                domain_random=DomainRandomConfig(
+                    fp_rate=args.fp_rate,
+                    wheel_slip_std=args.wheel_slip_std,
+                ),
+                robot=RobotConfig(
+                    action_smooth=args.action_smooth,
+                    strafe_efficiency=strafe_eff,
+                    strafe_to_forward_bleed=strafe_bleed,
+                ),
+            )
        print(f"[rl] domain-random: fp_rate={args.fp_rate}  "
              f"action_smooth={args.action_smooth}  "
              f"wheel_slip_std={args.wheel_slip_std}  "
-              f"strafe_eff={strafe_eff:.2f}  strafe_bleed={strafe_bleed:.2f}")
+              f"strafe_eff={strafe_eff:.2f}  strafe_bleed={strafe_bleed:.2f}  "
+              f"compass_noise={compass_std}")

    env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
                         difficulty=args.difficulty,