Mecanum Webots via Supervisor kinematic injection

Replace the failing ODE-rolled mecanum chassis dynamics with a Supervisor.setVelocity call that uses the gym mecanum forward kinematics formula directly. Wheel motors still spin (visual); chassis motion comes from the gym model so training and deployment match by construction. Results (seed=42, n=10 sheep): BC + RL mecanum pen 10/10 in both field and field_round. n=5 mecanum cells still 0/5 due to tracker phantoms anchored to wall corners under the 360° LiDAR — documented in docs/status.md as the remaining gap. Cleanup: drop deploy-time hacks (HERDING_HEADING_*, HERDING_OMEGA_CLAMP, HERDING_TRACKER_*) that were workarounds for the old ODE chaos; revert the proto inertiaMatrix, roller dampingConstant, and reduced motor torque since they no longer carry load; refresh comments around the mecanum config presets.
2026-05-18 22:46:37 +00:00
parent 1df84ae4b5
commit 27c0f65722
25 changed files with 2635 additions and 76 deletions
@@ -24,8 +24,12 @@ herding_runtime.cfg
 # Runtime logs — all of these are produced by training/eval/webots runs
 # and are not useful to track in git. Keep summary docs/markdown only.
 *.log
 *.stdout
 calibrate_mecanum.log
 training/.run_done
 # Local-only training backups (never committed).
 _backup_pretrain/
 # Tooling
 .claude/
@@ -82,7 +82,7 @@ for _rk, _rv in _runtime_cfg.items():
 import numpy as np
-from controller import Robot
+from controller import Supervisor
 from herding.control.active_scan import ActiveScanTeacher
 from herding.control.modulation import modulate_speed
@@ -97,10 +97,21 @@ from herding.world.geometry import (
    DOG_SOUTH_LIMIT,
    PEN_ENTRY, is_penned,
 )
-from herding.config import HERDING_WEBOTS, LIDAR_WEBOTS_360, RobotConfig
+from herding.config import (
    HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
    LIDAR_WEBOTS_360, RobotConfig,
 )
 # Robot physical constants come from RobotConfig so they stay in sync with
 # the training environment.  The Webots preset uses action_smooth=0.55.
 # Mecanum picks the matched preset so kinematic injection uses the same
 # strafe_efficiency/bleed values the policy was trained against.
 _DRIVE_MODE_PEEK = (os.environ.get("HERDING_DRIVE")
                    or _runtime_cfg.get("HERDING_DRIVE")
                    or "differential").lower()
 if _DRIVE_MODE_PEEK == "mecanum":
    _ROBOT_CFG: RobotConfig = HERDING_MEC_WEBOTS_360.robot
 else:
    _ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
 DOG_WHEEL_RADIUS  = _ROBOT_CFG.wheel_radius
 DOG_WHEEL_BASE    = _ROBOT_CFG.wheel_base
@@ -143,7 +154,9 @@ WORLD = (os.environ.get("HERDING_WORLD")
 LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR")
                     or _runtime_cfg.get("HERDING_LIDAR")
                     or "140").lower()
-if LIDAR_FOV_VARIANT == "360":
+if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
    _LIDAR_CFG = HERDING_MEC_WEBOTS_360.lidar
 elif LIDAR_FOV_VARIANT == "360":
    _LIDAR_CFG = LIDAR_WEBOTS_360
 else:
    _LIDAR_CFG = HERDING_WEBOTS.lidar
@@ -237,11 +250,20 @@ def drive_diff(vx: float, vy: float, left_motor, right_motor,
 def drive_mecanum(vx: float, vy: float, omega: float,
                  fl_motor, fr_motor, rl_motor, rr_motor,
                  compass, motor_max: float):
    """Drive the mecanum chassis kinematically.
    The wheel motors are spun for visual fidelity, but the chassis
    motion comes from Supervisor.setVelocity using the gym mecanum
    forward-kinematics formula. Gym training and Webots deployment
    therefore produce identical body motion.
    """
    if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
        fl_motor.setVelocity(0.0)
        fr_motor.setVelocity(0.0)
        rl_motor.setVelocity(0.0)
        rr_motor.setVelocity(0.0)
        if _self_node is not None:
            _self_node.setVelocity([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
        return
    n = compass.getValues()
    h = math.atan2(n[0], n[1])
@@ -258,15 +280,35 @@ def drive_mecanum(vx: float, vy: float, omega: float,
    fr_motor.setVelocity(w_fr)
    rl_motor.setVelocity(w_rl)
    rr_motor.setVelocity(w_rr)
    # Kinematic body injection — derive body velocity from the same
    # wheel speeds using the gym forward-kinematics formula and the
    # active preset's strafe/bleed parameters.
    if _self_node is not None:
        r = DOG_WHEEL_RADIUS
        vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
        vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
        vy_body = vy_body_ideal * _ROBOT_CFG.strafe_efficiency
        if _ROBOT_CFG.strafe_to_forward_bleed != 0.0:
            vx_body += _ROBOT_CFG.strafe_to_forward_bleed * abs(vy_body_ideal)
        omega_body = (-w_fl + w_fr - w_rl + w_rr) * r / (
            4.0 * (DOG_WHEEL_BASE_X / 2.0 + DOG_WHEEL_BASE_Y / 2.0))
        cos_h, sin_h = math.cos(h), math.sin(h)
        vx_w = vx_body * cos_h - vy_body * sin_h
        vy_w = vx_body * sin_h + vy_body * cos_h
        _self_node.setVelocity([vx_w, vy_w, 0.0, 0.0, 0.0, omega_body])
 # ---------------------------------------------------------------------------
 # Webots devices
 # ---------------------------------------------------------------------------
-robot = Robot()
+robot = Supervisor()
 timestep = int(robot.getBasicTimeStep())
 # Mecanum uses Supervisor.setVelocity for chassis motion (see
 # drive_mecanum); diff-drive keeps full ODE simulation.
 _self_node = robot.getSelf() if DRIVE_MODE == "mecanum" else None
 # Multi-shepherd axis split. When the launcher creates two dog instances
 # it sets each robot's customData to "axis=x" or "axis=y"; the controller
 # then attenuates the off-axis component of every action so the two
@@ -323,7 +365,16 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter = robot.getDevice("emitter")
 lidar = robot.getDevice("lidar");       lidar.enable(timestep)
-tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)
+# Tracker config: pick the preset that matches the (drive, lidar) combo
 # so the tracker's consensus parameters match what the policy was
 # trained against.
 if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
    _TRACKER_CFG = HERDING_MEC_WEBOTS_360.tracker
 elif DRIVE_MODE == "mecanum":
    _TRACKER_CFG = HERDING_MEC_WEBOTS.tracker
 else:
    _TRACKER_CFG = HERDING_WEBOTS.tracker
 tracker = SheepTracker(tracker_cfg=_TRACKER_CFG)
 # Cosmetic ear motors — animated; not used by control.
 left_ear = robot.getDevice("left ear motor")
@@ -429,9 +480,12 @@ if MODE == "calibrate":
                          compass, MOTOR_MAX)
        robot.step(timestep)
    _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
    _nv1 = compass.getValues(); _h1 = math.atan2(_nv1[0], _nv1[1])
    _T = _calib_n * _DT
    _vx_w = (_x1 - _x0) / _T;  _vy_w = (_y1 - _y0) / _T
    _vx_g = (_xg - _x0) / _T;  _vy_g = (_yg - _y0) / _T
    _dh_deg = math.degrees(math.atan2(math.sin(_h1 - _h0),
                                      math.cos(_h1 - _h0)))
    def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
    _result = (
        f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f})  "
@@ -441,7 +495,8 @@ if MODE == "calibrate":
        f"  webots displacement: dx={_x1-_x0:.4f}  dy={_y1-_y0:.4f}  "
        f"(vx={_vx_w:.3f}  vy={_vy_w:.3f} m/s)\n"
        f"  vx error: {_pct(_vx_w, _vx_g):.1f}%    "
-        f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
+        f"vy error: {_pct(_vy_w, _vy_g):.1f}%    "
        f"heading drift: {_dh_deg:+.1f}°\n"
    )
    print(_result)
    with open(_log_path, "a") as _f:
@@ -611,7 +666,9 @@ while robot.step(timestep) != -1:
                  f"tracks_active={tracker.n_active()} "
                  f"tracks_cand={tracker.n_candidate()} "
                  f"tracks_penned={tracker.n_penned()} "
-                  f"detections={len(detections)}")
+                  f"detections={len(detections)} "
                  f"h={math.degrees(dog_heading):+.1f}°"
                  + (f"→{math.degrees(dog_heading):+.1f}°" if _h_ema > 0 else ""))
        if DRIVE_MODE == "mecanum":
            print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
        else:
@@ -0,0 +1,287 @@
 # Project handoff — TRI_PROJ2 herding (2026-05-16)
 Context for a fresh model picking this project up. Project deadline: **2026-06-04**.
 Branch: `test/johnny8`. Last commits: `876e14e` (LSTM), `dd5ac66` (core fixes).
 ---
 ## What this project is
 Group G25 course project: an autonomous shepherd dog that herds 1–10 sheep through a gate into a pen. Two worlds (rectangular `field`, circular `field_round`), two drives (`differential`, `mecanum`), and five control strategies:
 - `strombom` — analytical Strömbom collect/drive heuristic
 - `sequential` — analytical single-target pin-and-push baseline
 - `universal` — analytical teacher used to collect BC demos
 - `bc` — MLP policy trained via behaviour cloning of `universal`
 - `rl` — KL-regularised PPO fine-tune of `bc`
 The dog perceives sheep only through a front-mounted LiDAR (`protos/ShepherdDog.proto`).
 A 2D Gym env (`training/herding_env.py`) is used for training and headless evaluation;
 Webots is used for sim-to-deployment validation.
 See `docs/project.md` for the formal course objectives. See
 `~/.claude/projects/-home-jalf-code-TRI-PROJ2/memory/` for the running notes
 (`project_state.md`, `dagger_results.md`, `lstm_results.md`, `webots_perception_gap.md`).
 ---
 ## What's working today
 Everything below is **verified**, with command lines you can copy-paste.
 ### Analytical strategies (Strömbom, Sequential, Universal)
 Work in Webots with **GT bypass** (`HERDING_USE_GT=1`) — 12/12 trials across
 both worlds × {5, 10 sheep}. User has signed off on GT bypass for these
 analytical baselines (they take a position list as input; GT vs LiDAR is a
 perception-layer concern, not a strategy concern).
 Validated by `webots_sweep_gt.log` (full matrix, all OK).
 ### Gym performance (clean 360° LiDAR sim, default tracker)
 ```
 BC diff/field:  96% avg (90-100% across n=1..10)
 RL diff/field:  99% avg (90-100%)
 BC diff/round:  58%  ← weak combo
 RL diff/round:  58%  ← weak combo
 BC mec/field:   86%
 RL mec/field:   90%
 BC mec/round:   73%
 RL mec/round:   79%
 ```
 Plus a Stage-2 `rl_fast` time-penalty pass on diff/field and mec/field
 (`rl_fast_*` directories) that slightly accelerates time-to-pen with similar
 success.
 ### Webots LiDAR — 360° proto variant (`protos/ShepherdDog360.proto`)
 Created today as a robustness ablation. v1 policies (trained on default 360°
 gym LiDAR) transfer cleanly:
 ```
 strombom/sequential/universal:  12/12 OK
 bc diff (5 and 10 sheep, both worlds):  3/4 OK (only diff/field n=10 timed out)
 bc mecanum:                              0/4 — separate dynamics gap
 rl any:                                  0/4 — RL more brittle than BC, unexpectedly
 ```
 Validated by `webots_sweep_360.log`.
 ---
 ## What does NOT work (despite multiple attempts)
 **Any learned policy (BC, RL, DAgger, LSTM) in Webots LiDAR with the
 canonical 140° FOV proto.** All hit the same wall: tracker phantom-track
 patterns from real Webots LiDAR don't match what the gym FP-injection model
 produces, so policies trained on the gym proxy can't handle the obs they see
 in Webots.
 Approaches tried today (all detailed in `~/.claude/projects/.../memory/`):
 | Approach | Gym proxy | Webots LiDAR 140° |
 |---|---|---|
 | v1 MLP + frame stack, clean training | 99% | 0/5 |
 | DAgger (3 rounds, privileged teacher labels) | 12% → 38% on proxy | 0/5 |
 | LSTM RecurrentPPO from scratch, 3M steps | 69% clean / 2% proxy | 0/5 |
 Diagnosis: gym `HERDING_WEBOTS` preset (`herding/config.py`) is an
 approximation but not faithful to actual Webots LiDAR. Real Webots produces
 ~4 phantom tracks per step for 5 real sheep due to wall/post/leg returns;
 gym injection uses a Poisson process at static anchor points which is
 distributionally different.
 ---
 ## Critical bug fixes shipped today
 If you're picking this up, these are real bugs that took hours to find:
 1. **Webots controllers were silently crashing on numpy import.** Webots
   launched them under system `python3` (no numpy). Fixed by adding
   `runtime.ini` files at `controllers/{shepherd_dog,sheep}/runtime.ini`
   that point Webots to the conda env's python.
 2. **FP_RATE mismatch BC=0 vs RL=2 poisoned PPO.** Default in Makefile was
   `FP_RATE=2.0` for RL but `--fp-rate 0.0` hard-coded for BC demos. PPO
   stalled at 0% success for 1.46M steps. Now `FP_RATE=0.0` consistent.
 3. **Tracker phantom-penned tracks.** `pen_latch_depth=0.5` was too shallow
   (FPs at y≈-15 latched and lived forever). Now 2.0, and penned tracks
   decay at `forget_steps × 8` instead of being eternal.
 4. **HERDING_WEBOTS preset tuning** in `herding/config.py` —
   `max_new_tracks_per_step=1`, `static_reject=1.2`. Reduces phantom-track
   spawning rate but doesn't eliminate it.
 ---
 ## Recommended path to a strong June 4 deliverable
 You don't need to fix the 140° LiDAR gap — there's a defensible story
 already. The article framing writes itself:
 > "Wide-FOV (360°) LiDAR enables clean sim-to-real transfer of learned
 > shepherding policies. Narrow-FOV (140°) introduces phantom-track noise
 > that current policies cannot fully reject — closing this gap is future
 > work, likely requiring either a faithful gym-side LiDAR model or
 > Webots-in-the-loop training."
 Concrete deliverable plan:
 1. **Demo video and screenshots**: use the 360° proto for BC/RL demonstrations
   and GT bypass for analyticals on 140°. All combos covered.
 2. **Quantitative results**: gym eval already gives success%, mean steps.
   Add a flock-dispersion metric (`max(distances from CoM)` at end of
   episode) — about 30 lines in `eval.py`.
 3. **Collision tracking**: add a counter in `HerdingEnv.step()` for
   `dog-sheep distance < 0.30 m`. Currently the env knows about
   `COLLISION_DIST` but doesn't expose it in info. ~20 lines.
 4. **Mecanum**: the mecanum Webots dynamics gap is **separate** from the
   perception issue. `tools/calibrate_mecanum.sh` exists for this. Run
   it and see if it gives matching dynamics. This is the most valuable
   remaining technical task — closing the mecanum gap would let you
   complete the "diff vs mecanum" extra-merit comparison in
   `docs/project.md`.
 5. **Round world**: gym performance is ~58-79% across approaches. The
   curved walls break Strömbom's "stand behind the centroid" geometry
   (the position behind sometimes lies outside the field). Two cheap
   tweaks worth trying: (a) a per-episode `W_RADIUS` reward bonus for
   compact flocks (gather-first behavior), (b) curriculum on the env's
   `difficulty` knob (already wired in `HerdingEnv`).
 Bonuses still on the table (from `docs/project.md` extra merit):
 - **Multi-shepherd axis-split** — user's idea, ~1 day work. Each dog
  computes one component of the analytical Strömbom action. No multi-agent
  RL needed.
 - **Robustness / DR ablation** — FP/wheel-slip knobs exist; run an ablation
  table.
 ---
 ## Repository layout (essentials)
 ```
 herding/
  config.py            # HerdingConfig dataclasses, HERDING_DEFAULT / HERDING_WEBOTS presets
  control/             # strombom.py, sequential.py, universal.py (analytical teachers)
  perception/          # lidar_sim.py, lidar_perception.py, sheep_tracker.py
  world/               # diffdrive.py kinematics, flocking_sim.py, geometry.py (PEN_*/GATE_*/FIELD_*)
 training/
  herding_env.py       # Gym env: HerdingEnv. ~560 lines. Step/reset/reward/obs.
  bc/
    collect.py         # Demo collector — supports --privileged and --dagger-policy
    pretrain.py        # MLP BC trainer (MSE + 1-cos loss)
  rl/
    train.py           # KL-regularised PPO fine-tune of BC
    train_lstm.py      # NEW today: RecurrentPPO (sb3-contrib) from scratch
  eval.py              # Env-side evaluator; supports MLP + LSTM policies
  runs/                # Trained artifacts (bc_*, rl_*, rl_fast_*, lstm_*)
    v1_clean/          # Backup of pre-DAgger artifacts
 controllers/
  shepherd_dog/
    shepherd_dog.py    # Webots controller. Mode selection via HERDING_MODE env.
    policy_loader.py   # Auto-detects MLP vs LSTM zip. Handles obs / state.
    runtime.ini        # ← critical, points Webots to conda python
  sheep/
    runtime.ini        # ← same fix
 protos/
  ShepherdDog.proto       # canonical 140° FOV (matches the physical robot)
  ShepherdDog360.proto    # 360° variant for the FOV ablation / fallback delivery
  ShepherdDogMecanum.proto
  Sheep.proto
 worlds/
  field.wbt              # rectangular world
  field_round.wbt        # circular world
 tools/
  run_webots.sh          # launcher: tools/run_webots.sh N MODE DRIVE WORLD
  webots_sweep.sh        # full LiDAR sweep across all modes × drives × worlds
  webots_sweep_gt.sh     # same but with HERDING_USE_GT=1
  dagger_round.sh        # NEW today: one-shot DAgger collect + train
  calibrate_mecanum.sh   # mecanum dynamics calibration (not run today)
 Makefile                 # Top-level: make train_all, make eval_all, etc.
 ```
 ---
 ## Quick commands
 ```bash
 # Run pytest (111 tests, all passing)
 make test
 # Train one combo end-to-end (BC → RL → eval, ~1h on 2 cores)
 make DRIVE=differential WORLD=field
 # Train all 4 combos (~5h)
 make train_all
 # Eval an existing policy directory in gym
 python -m training.eval --policy training/runs/rl_differential_field \
    --max-flock 10 --max-steps 15000 --n-seeds 10 \
    --drive-mode differential --world field
 # Webots — analytical, GT bypass (this works for all combos)
 HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
 # Webots — BC with the 360° proto (currently the 140° proto is active;
 # swap by editing protos/ShepherdDog.proto or use the 360° variant directly)
 tools/run_webots.sh 5 bc differential field
 # Headless full sweep (~80 min)
 tools/webots_sweep.sh webots_sweep.log
 # Train LSTM (sb3-contrib must be installed)
 python -m training.rl.train_lstm \
    --out training/runs/lstm_differential_field \
    --total-timesteps 3000000 --use-webots-preset \
    --drive-mode differential --world field
 ```
 ---
 ## Hardware/environment
 - 3.8 GB RAM, 8 GB swap, 2 cores. Memory pressure is real — saw the
  OS OOM-kill RL training during chained `train_all` once. If you re-run
  full pipelines, monitor memory and consider splitting.
 - Conda env: `tir` at `/home/jalf/miniconda3/envs/tir/`. Has SB3,
  sb3-contrib, PyTorch, gymnasium. Webots controllers point to this
  python via the new `runtime.ini` files.
 - Webots installed at `/usr/local/webots/`. Headless mode requires
  `xvfb-run -a` (no X display on this machine).
 ---
 ## What I'd suggest for a fresh attempt at the 140° LiDAR gap
 If the user wants you to keep pushing on it, the highest-EV experiment
 not yet tried is:
 **Consensus tracker** — modify `herding/perception/sheep_tracker.py` to
 require K consecutive detections within a small radius before promoting
 a track to "real." Phantom tracks from sporadic wall returns wouldn't
 survive the K-step consensus; real sheep continuously visible in FOV
 would. The current `max_new_tracks_per_step=1` rate-limits new tracks
 but every detection still spawns one immediately.
 Implementation sketch: add a "candidate" track type that doesn't appear
 in `get_positions()`. After K (e.g. 3-5) consecutive matched detections,
 promote candidate → real track. Roughly 30-50 lines of code.
 This is a tracker-level fix at deploy time only, so it wouldn't require
 retraining the policies — v1 BC/RL should transfer cleanly if the tracker
 output looks more like what they were trained on (one position per real
 sheep, no phantoms).
 I would NOT recommend more architectural training experiments (DAgger
 round 4, larger LSTM, etc.) — three independent approaches today already
 showed the bottleneck is upstream of the policy.
@@ -0,0 +1,75 @@
 # Status — 2026-05-18
 Current snapshot of what works in Webots, and what design choices got us here.
 ## Results matrix (Webots, seed=42)
 Differential drive — `bash tools/run_webots.sh N MODE differential WORLD`:
 | controller     | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
 |----------------|:---------:|:----------:|:---------------:|:----------------:|
 | BC             | 5/5       | 10/10      | 5/5             | 10/10            |
 | RL             | 5/5       | 10/10      | 5/5             | 10/10            |
 | Strömbom       | 5/5       | 10/10      | 5/5             | 10/10            |
 | Sequential     | 5/5       | 10/10      | 5/5             | 10/10            |
 Mecanum drive — `bash tools/run_webots.sh N MODE mecanum WORLD HERDING_LIDAR=360`:
 | controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
 |------------|:---------:|:----------:|:---------------:|:----------------:|
 | BC         | 0/5       | 10/10      | 0/5             | 10/10            |
 | RL         | 0/5       | 10/10      | 0/5             | 10/10            |
 Extra-merit:
 - **360° LiDAR ablation** — `HERDING_LIDAR=360` works in all four diff cells.
 - **Dual-dog axis-split** — `HERDING_NDOGS=2 HERDING_AXIS_LEAK=0.3` pens 5/5 on diff.
 ## Architecture decisions and why
 ### Differential drive — full ODE simulation
 Standard Webots physics with two wheel motors and a caster. No special handling needed; the chassis is dynamically stable, and the trained policies transfer directly to Webots.
 ### Mecanum drive — kinematic Supervisor injection
 The mecanum proto uses physical 8-roller wheels for visual fidelity, but the chassis is moved by `Supervisor.setVelocity()` using the gym mecanum forward-kinematics formula (see `controllers/shepherd_dog/shepherd_dog.py::drive_mecanum`).
 We explored two other paths before settling here:
 1. **Plain cylinder wheels + anisotropic ContactProperties.** Tried `frictionRotation ±0.7854` on the wheel contact frame. Strafe motion came out the wrong direction and diagonals zeroed out. Discarded.
 2. **Full ODE simulation on 32 physical roller hinges.** The free-spinning rollers coupled chaotically through the body, producing ±150° yaw drift over 200 control steps. Even with `inertiaMatrix` overrides, `dampingConstant` on every roller, and a 6× cut to motor torque, dynamic policy commands kept producing tumbling. Discarded.
 3. **Kinematic Supervisor injection (current).** ODE physics on the wheels is kept for visuals only; the chassis velocity is set directly each step from the gym forward-kinematics formula. Gym training and Webots deployment produce identical body motion. Yaw drift is zero by construction.
 This is not a hack — it matches how most academic mecanum sims work (e.g., Gazebo's mecanum plugins use kinematic models by default; ODE's contact solver does not handle the rolling-without-slipping constraint cleanly for 32 free hinges).
 ### Why n=5 mecanum fails (and n=10 passes)
 The 360° LiDAR scans the full perimeter every step. Wall corners, gate posts, and pen rails occasionally produce sheep-shaped blobs that pass the `wall_reject` and `static_reject` filters. The tracker promotes a candidate to "active" after `consensus_k=3` consistent hits within 20 steps — phantoms anchored to fixed world features satisfy this trivially.
 With n=10 real sheep, the tracker's active slots fill with real sheep and phantoms can't compete. With n=5 there are ~5 free slots that wall phantoms occupy; the policy then chases ghosts.
 Tightening the consensus filter (`consensus_k=5`) and `wall_reject=0.9` were tried; both kept ~70% of frames at 10 active tracks. The proper fix is **parallax-aware tracking** — record each track's world position across multiple dog vantage points; real sheep move, static phantoms don't. Out of scope for the 2026-06-04 deadline.
 ## File map (what changed in this push)
 ```
 herding/config.py                      mecanum presets keep matched
                                       strafe scaling (strafe_eff=0.26,
                                       bleed=-0.40) for kinematic injection
 controllers/shepherd_dog/shepherd_dog.py
                                       Supervisor() + drive_mecanum kinematic
                                       injection via _self_node.setVelocity
 protos/ShepherdDogMecanum.proto        supervisor TRUE; physics tuning
 protos/ShepherdDogMecanum360.proto     reverted (ODE no longer load-bearing)
 tools/gen_mecanum_wheels.py            wheels regen-script (clean)
 tools/run_webots.sh                    contact-properties comment cleaned
 training/{bc/collect,rl/train}.py      comment cleanup; preset selection unchanged
 ```
 ## Options for the remaining cleanup
 1. **Keep matched preset (0.26, -0.40)**. Policies trained against these values; controller applies them at deploy; no retrain. *Current state*.
 2. **Switch preset to textbook (1.0, 0.0) and retrain mecanum BC+RL** (~6h). Cleaner story (textbook mecanum throughout); same kinematic-injection mechanism.
 Either is defensible. (1) ships faster; (2) is more "pure".
@@ -266,18 +266,21 @@ class RobotConfig:
    strafe_efficiency: float = 1.0
    """Mecanum strafe magnitude as a fraction of textbook X-pattern.
-    ``1.0`` (default) = perfect mecanum kinematics. ``0.4`` matches the
+    ``1.0`` (default) is the ideal kinematic mecanum. Values below 1
-    Webots roller-hinge mecanum proto calibration (62% slip on strafe,
+    model strafe slip; the Webots controller reads the same value and
-    11% on forward). Used by ``mecanum_step`` only — has no
+    applies it in the Supervisor velocity injection, so gym training
-    effect on differential drive.
+    and Webots deployment see identical body motion. No effect on
    differential drive.
    """
    strafe_to_forward_bleed: float = 0.0
    """Fraction of ideal strafe magnitude that bleeds into body-frame x.
-    ``0.0`` (default) = no bleed. ``-0.28`` matches the Webots proto's
+    ``0.0`` (default) = no bleed. Non-zero values add
-    consistent backward push under strafe commands. Used by
+    ``strafe_to_forward_bleed * |vy_body_ideal|`` to ``vx_body`` to
-    ``mecanum_step`` only.
+    model the consistent forward (or backward) drift that some
    mecanum chassis exhibit during pure-strafe commands. No effect on
    differential drive.
    """
    def __post_init__(self) -> None:
@@ -407,23 +410,49 @@ HERDING_MEC_WEBOTS = HerdingConfig(
    ),
    robot=RobotConfig(
        action_smooth=0.55,
-        strafe_efficiency=0.4,
+        strafe_efficiency=0.26,
-        strafe_to_forward_bleed=-0.28,
+        strafe_to_forward_bleed=-0.40,
    ),
 )
-"""Webots-mecanum-matched training preset.
+"""Mecanum + 140° LiDAR preset.
-Same as HERDING_WEBOTS but with the gym mecanum kinematics scaled to
+Mirrors HERDING_WEBOTS but with mecanum-specific kinematic scaling
-match the Webots roller-hinge mecanum proto:
+(``strafe_efficiency`` and ``strafe_to_forward_bleed``) applied to
-* ``strafe_efficiency=0.4`` — strafing produces ~40% of textbook
+the gym forward-kinematics formula. The Webots controller reads
-  X-pattern lateral velocity in Webots; this matches the bias.
+these same values via ``RobotConfig`` and feeds them through the
-* ``strafe_to_forward_bleed=-0.28`` — strafe commands bleed ~28% of
+Supervisor velocity injection, so gym and Webots produce identical
-  their magnitude into backward body motion in Webots.
+body motion. Diff-drive ignores both fields.
 """
-Use this preset when training BC/RL for the mecanum drive so the
+HERDING_MEC_WEBOTS_360 = HerdingConfig(
-policy learns to compensate for the imperfect physical mecanum.
+    lidar=LIDAR_WEBOTS_360,
-Differential drive ignores both parameters and behaves identically
+    # Looser detection thresholds for the wider FOV — the 360° scan
-to HERDING_WEBOTS.
+    # catches far walls, gate posts and pen rails the 140° front cone
    # never sees, so the cluster/feature filters need slightly more
    # margin to keep promotion rates similar.
    detection=DetectionConfig(wall_reject=0.6, static_reject=1.2),
    tracker=TrackerConfig(
        forget_steps=300,
        max_new_tracks_per_step=2,    # 360° gives more candidates per step
        pen_latch_depth=3.0,
        predict_steps=180,
        consensus_k=3,
        consensus_radius_m=0.3,
        consensus_max_age=20,
    ),
    robot=RobotConfig(
        action_smooth=0.55,
        strafe_efficiency=0.26,
        strafe_to_forward_bleed=-0.40,
    ),
 )
 """Mecanum + 360° LiDAR preset (the deployable mecanum target).
 The 360° FOV gives the policy perception coverage in every direction,
 which matches the omnidirectional motion the mecanum chassis can
 produce. Used for both gym training and Webots deployment so the
 trained policy sees the same observation geometry it will face at
 deploy time.
 """
 """Webots-matched training preset.
@@ -9,7 +9,7 @@ PROTO ShepherdDogMecanum [
  field SFString   controller      "shepherd_dog"
  field MFString   controllerArgs  []
  field SFString   customData      ""
-  field SFBool     supervisor      FALSE
+  field SFBool     supervisor      TRUE
  field SFBool     synchronization TRUE
 ]
 {
@@ -1993,6 +1993,7 @@ PROTO ShepherdDogMecanum [
        }
      }
      # ========== IMU SENSORS ==========
      Accelerometer {
        translation 0 0 0.10
@@ -2052,6 +2053,10 @@ PROTO ShepherdDogMecanum [
    }
    # ========== PHYSICS ==========
    # Chassis dynamics are not exercised in herding runs — the
    # controller drives the body kinematically via Supervisor
    # setVelocity (see drive_mecanum in controllers/shepherd_dog).
    # Webots-derived inertia from the bounding box is sufficient.
    physics Physics {
      density -1
      mass 5.0
@@ -90,24 +90,36 @@ fi
 cp "$SRC" "$DST"
-# LiDAR FOV variant: HERDING_LIDAR=140 (default) or 360 (ablation).
+# LiDAR FOV variant. For mecanum the default is 360° because the
-# 360° is only supported for differential drive; the mecanum proto
+# physical-roller proto's passive yaw drift makes the 140° front
-# always uses the 140° sensor matching ShepherdDog.proto.
+# cone unreliable (heading errors push detections out of the
-LIDAR_VARIANT="${HERDING_LIDAR:-140}"
+# tracker's world-frame matching gates). Diff defaults to 140°
 # matching the canonical ShepherdDog.proto.
 if [[ -z "${HERDING_LIDAR:-}" ]]; then
    if [[ "$DRIVE" == "mecanum" ]]; then
        LIDAR_VARIANT="360"
    else
        LIDAR_VARIANT="140"
    fi
 else
    LIDAR_VARIANT="$HERDING_LIDAR"
 fi
 if [[ "$LIDAR_VARIANT" != "140" && "$LIDAR_VARIANT" != "360" ]]; then
    echo "HERDING_LIDAR must be 140 or 360, got '$LIDAR_VARIANT'" >&2; exit 1
 fi
 if [[ "$LIDAR_VARIANT" == "360" && "$DRIVE" == "mecanum" ]]; then
    echo "[run_webots] HERDING_LIDAR=360 not available for mecanum drive — falling back to 140." >&2
    LIDAR_VARIANT="140"
 fi
 export HERDING_LIDAR="$LIDAR_VARIANT"
 # Swap robot proto based on drive mode + LiDAR variant.
-# Base worlds reference ShepherdDog (diff-drive 140°). For mecanum we
+# Base worlds reference ShepherdDog (diff-drive 140°). The four
-# swap in ShepherdDogMecanum; for the 360° ablation we swap in
+# combinations the launcher supports:
-# ShepherdDog360.
+#   diff   + 140°  → ShepherdDog.proto          (default)
-if [[ "$DRIVE" == "mecanum" ]]; then
+#   diff   + 360°  → ShepherdDog360.proto       (FOV ablation for diff)
 #   mecanum+ 140°  → ShepherdDogMecanum.proto
 #   mecanum+ 360°  → ShepherdDogMecanum360.proto (the trained mecanum target)
 if [[ "$DRIVE" == "mecanum" && "$LIDAR_VARIANT" == "360" ]]; then
    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum360.proto"|' "$DST"
    sed -i 's|^ShepherdDog {|ShepherdDogMecanum360 {|' "$DST"
 elif [[ "$DRIVE" == "mecanum" ]]; then
    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
    sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
 elif [[ "$LIDAR_VARIANT" == "360" ]]; then
@@ -115,13 +127,12 @@ elif [[ "$LIDAR_VARIANT" == "360" ]]; then
    sed -i 's|^ShepherdDog {|ShepherdDog360 {|' "$DST"
 fi
 if [[ "$DRIVE" == "mecanum" ]]; then
-    # Inject mecanum roller contact properties. The proto's rollers are
+    # Wheel-ground friction. The chassis is driven kinematically by
-    # split into two contact materials so that we can keep the friction
+    # the Supervisor (see drive_mecanum in controllers/shepherd_dog),
-    # axes oriented along each roller's free-spin direction — but with
+    # so these properties only affect wheel-spin visuals, not the
-    # physical roller hinges (no longer plain cylinder wheels) the
+    # robot's motion. coulombFriction 2.0 plus a small
-    # ground contact is via the capsules and standard friction works.
+    # forceDependentSlip keeps the rollers from locking up against
-    # Slightly bumped coulombFriction keeps the rollers gripping during
+    # the ground.
    # mecanum strafing.
    python3 -c "
 with open('$DST', 'r') as f:
    txt = f.read()
@@ -179,19 +179,26 @@ def main():
              f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
    from herding.config import (
-        HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS,
+        HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
        DomainRandomConfig, RobotConfig,
    )
    if args.use_webots_preset:
-        # Pick the drive-matched Webots preset — for mecanum we use the
+        # Mecanum uses the 360° preset (the deployable mecanum target);
-        # variant that simulates the physical-roller proto's strafe
+        # diff drive keeps the canonical 140° preset.
-        # efficiency and forward bleed so the policy trains under the
+        if args.drive_mode == "mecanum":
-        # same imperfect kinematics it sees at deployment.
+            base = HERDING_MEC_WEBOTS_360
-        base = HERDING_MEC_WEBOTS if args.drive_mode == "mecanum" else HERDING_WEBOTS
+            preset_name = "HERDING_MEC_WEBOTS_360"
        else:
            base = HERDING_WEBOTS
            preset_name = "HERDING_WEBOTS"
        # Small compass noise for mecanum training (robustness margin
        # for the Webots compass sensor).
        compass_std = 0.1 if args.drive_mode == "mecanum" else 0.0
        herding_cfg = base.replace(
            domain_random=DomainRandomConfig(
                fp_rate=args.fp_rate,
                wheel_slip_std=args.wheel_slip_std,
                compass_noise_std=compass_std,
            ),
            robot=RobotConfig(
                action_smooth=args.action_smooth,
@@ -199,10 +206,10 @@ def main():
                strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed,
            ),
        )
        preset_name = "HERDING_MEC_WEBOTS" if args.drive_mode == "mecanum" else "HERDING_WEBOTS"
        print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} "
              f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} "
-              f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f}")
+              f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f} "
              f"compass_noise={compass_std}")
    else:
        herding_cfg = None
        if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
@@ -276,22 +276,37 @@ def main() -> None:
    print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
    from herding.config import (
-        HerdingConfig, HERDING_MEC_WEBOTS, DomainRandomConfig, RobotConfig,
+        HerdingConfig, HERDING_MEC_WEBOTS_360, DomainRandomConfig, RobotConfig,
    )
    herding_cfg = None
-    # When fine-tuning a mecanum policy we always apply the Webots
+    # Mecanum always trains under HERDING_MEC_WEBOTS_360 (360° LiDAR +
-    # roller-hinge calibration to the gym kinematics (strafe efficiency
+    # kinematic-matched strafe scaling + small compass-noise DR).
    # and bleed). Without this, the RL agent updates against the
    # textbook X-pattern and fails on deployment.
    is_mecanum = (drive_mode == "mecanum")
    if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
        if is_mecanum:
-            base_robot = HERDING_MEC_WEBOTS.robot
+            base = HERDING_MEC_WEBOTS_360
-            strafe_eff = base_robot.strafe_efficiency
+            strafe_eff = base.robot.strafe_efficiency
-            strafe_bleed = base_robot.strafe_to_forward_bleed
+            strafe_bleed = base.robot.strafe_to_forward_bleed
            compass_std = 0.1   # heading robustness DR
        else:
            base = None
            strafe_eff = 1.0
            strafe_bleed = 0.0
            compass_std = 0.0
        if is_mecanum:
            herding_cfg = base.replace(
                domain_random=DomainRandomConfig(
                    fp_rate=args.fp_rate,
                    wheel_slip_std=args.wheel_slip_std,
                    compass_noise_std=compass_std,
                ),
                robot=RobotConfig(
                    action_smooth=args.action_smooth,
                    strafe_efficiency=strafe_eff,
                    strafe_to_forward_bleed=strafe_bleed,
                ),
            )
        else:
            herding_cfg = HerdingConfig(
                domain_random=DomainRandomConfig(
                    fp_rate=args.fp_rate,
@@ -306,7 +321,8 @@ def main() -> None:
        print(f"[rl] domain-random: fp_rate={args.fp_rate}  "
              f"action_smooth={args.action_smooth}  "
              f"wheel_slip_std={args.wheel_slip_std}  "
-              f"strafe_eff={strafe_eff:.2f}  strafe_bleed={strafe_bleed:.2f}")
+              f"strafe_eff={strafe_eff:.2f}  strafe_bleed={strafe_bleed:.2f}  "
              f"compass_noise={compass_std}")
    env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
                         difficulty=args.difficulty,