Mecanum Webots via Supervisor kinematic injection

Replace the failing ODE-rolled mecanum chassis dynamics with a
Supervisor.setVelocity call that uses the gym mecanum forward
kinematics formula directly. Wheel motors still spin (visual);
chassis motion comes from the gym model so training and deployment
match by construction.

Results (seed=42, n=10 sheep): BC + RL mecanum pen 10/10 in both
field and field_round. n=5 mecanum cells still 0/5 due to tracker
phantoms anchored to wall corners under the 360° LiDAR — documented
in docs/status.md as the remaining gap.

Cleanup: drop deploy-time hacks (HERDING_HEADING_*, HERDING_OMEGA_CLAMP,
HERDING_TRACKER_*) that were workarounds for the old ODE chaos;
revert the proto inertiaMatrix, roller dampingConstant, and reduced
motor torque since they no longer carry load; refresh comments
around the mecanum config presets.
This commit is contained in:
Johnny Fernandes
2026-05-18 22:46:37 +00:00
parent 1df84ae4b5
commit 27c0f65722
25 changed files with 2635 additions and 76 deletions
+4
View File
@@ -24,8 +24,12 @@ herding_runtime.cfg
# Runtime logs — all of these are produced by training/eval/webots runs
# and are not useful to track in git. Keep summary docs/markdown only.
*.log
*.stdout
calibrate_mecanum.log
training/.run_done
# Local-only training backups (never committed).
_backup_pretrain/
# Tooling
.claude/
+65 -8
View File
@@ -82,7 +82,7 @@ for _rk, _rv in _runtime_cfg.items():
import numpy as np
from controller import Robot
from controller import Supervisor
from herding.control.active_scan import ActiveScanTeacher
from herding.control.modulation import modulate_speed
@@ -97,11 +97,22 @@ from herding.world.geometry import (
DOG_SOUTH_LIMIT,
PEN_ENTRY, is_penned,
)
from herding.config import HERDING_WEBOTS, LIDAR_WEBOTS_360, RobotConfig
from herding.config import (
HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
LIDAR_WEBOTS_360, RobotConfig,
)
# Robot physical constants come from RobotConfig so they stay in sync with
# the training environment. The Webots preset uses action_smooth=0.55.
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
# Mecanum picks the matched preset so kinematic injection uses the same
# strafe_efficiency/bleed values the policy was trained against.
_DRIVE_MODE_PEEK = (os.environ.get("HERDING_DRIVE")
or _runtime_cfg.get("HERDING_DRIVE")
or "differential").lower()
if _DRIVE_MODE_PEEK == "mecanum":
_ROBOT_CFG: RobotConfig = HERDING_MEC_WEBOTS_360.robot
else:
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x
@@ -143,7 +154,9 @@ WORLD = (os.environ.get("HERDING_WORLD")
LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR")
or _runtime_cfg.get("HERDING_LIDAR")
or "140").lower()
if LIDAR_FOV_VARIANT == "360":
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
_LIDAR_CFG = HERDING_MEC_WEBOTS_360.lidar
elif LIDAR_FOV_VARIANT == "360":
_LIDAR_CFG = LIDAR_WEBOTS_360
else:
_LIDAR_CFG = HERDING_WEBOTS.lidar
@@ -237,11 +250,20 @@ def drive_diff(vx: float, vy: float, left_motor, right_motor,
def drive_mecanum(vx: float, vy: float, omega: float,
fl_motor, fr_motor, rl_motor, rr_motor,
compass, motor_max: float):
"""Drive the mecanum chassis kinematically.
The wheel motors are spun for visual fidelity, but the chassis
motion comes from Supervisor.setVelocity using the gym mecanum
forward-kinematics formula. Gym training and Webots deployment
therefore produce identical body motion.
"""
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
fl_motor.setVelocity(0.0)
fr_motor.setVelocity(0.0)
rl_motor.setVelocity(0.0)
rr_motor.setVelocity(0.0)
if _self_node is not None:
_self_node.setVelocity([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
return
n = compass.getValues()
h = math.atan2(n[0], n[1])
@@ -258,15 +280,35 @@ def drive_mecanum(vx: float, vy: float, omega: float,
fr_motor.setVelocity(w_fr)
rl_motor.setVelocity(w_rl)
rr_motor.setVelocity(w_rr)
# Kinematic body injection — derive body velocity from the same
# wheel speeds using the gym forward-kinematics formula and the
# active preset's strafe/bleed parameters.
if _self_node is not None:
r = DOG_WHEEL_RADIUS
vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
vy_body = vy_body_ideal * _ROBOT_CFG.strafe_efficiency
if _ROBOT_CFG.strafe_to_forward_bleed != 0.0:
vx_body += _ROBOT_CFG.strafe_to_forward_bleed * abs(vy_body_ideal)
omega_body = (-w_fl + w_fr - w_rl + w_rr) * r / (
4.0 * (DOG_WHEEL_BASE_X / 2.0 + DOG_WHEEL_BASE_Y / 2.0))
cos_h, sin_h = math.cos(h), math.sin(h)
vx_w = vx_body * cos_h - vy_body * sin_h
vy_w = vx_body * sin_h + vy_body * cos_h
_self_node.setVelocity([vx_w, vy_w, 0.0, 0.0, 0.0, omega_body])
# ---------------------------------------------------------------------------
# Webots devices
# ---------------------------------------------------------------------------
robot = Robot()
robot = Supervisor()
timestep = int(robot.getBasicTimeStep())
# Mecanum uses Supervisor.setVelocity for chassis motion (see
# drive_mecanum); diff-drive keeps full ODE simulation.
_self_node = robot.getSelf() if DRIVE_MODE == "mecanum" else None
# Multi-shepherd axis split. When the launcher creates two dog instances
# it sets each robot's customData to "axis=x" or "axis=y"; the controller
# then attenuates the off-axis component of every action so the two
@@ -323,7 +365,16 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")
lidar = robot.getDevice("lidar"); lidar.enable(timestep)
tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)
# Tracker config: pick the preset that matches the (drive, lidar) combo
# so the tracker's consensus parameters match what the policy was
# trained against.
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
_TRACKER_CFG = HERDING_MEC_WEBOTS_360.tracker
elif DRIVE_MODE == "mecanum":
_TRACKER_CFG = HERDING_MEC_WEBOTS.tracker
else:
_TRACKER_CFG = HERDING_WEBOTS.tracker
tracker = SheepTracker(tracker_cfg=_TRACKER_CFG)
# Cosmetic ear motors — animated; not used by control.
left_ear = robot.getDevice("left ear motor")
@@ -429,9 +480,12 @@ if MODE == "calibrate":
compass, MOTOR_MAX)
robot.step(timestep)
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
_nv1 = compass.getValues(); _h1 = math.atan2(_nv1[0], _nv1[1])
_T = _calib_n * _DT
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
_dh_deg = math.degrees(math.atan2(math.sin(_h1 - _h0),
math.cos(_h1 - _h0)))
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
_result = (
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
@@ -441,7 +495,8 @@ if MODE == "calibrate":
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
f"vy error: {_pct(_vy_w, _vy_g):.1f}% "
f"heading drift: {_dh_deg:+.1f}°\n"
)
print(_result)
with open(_log_path, "a") as _f:
@@ -611,7 +666,9 @@ while robot.step(timestep) != -1:
f"tracks_active={tracker.n_active()} "
f"tracks_cand={tracker.n_candidate()} "
f"tracks_penned={tracker.n_penned()} "
f"detections={len(detections)}")
f"detections={len(detections)} "
f"h={math.degrees(dog_heading):+.1f}°"
+ (f"{math.degrees(dog_heading):+.1f}°" if _h_ema > 0 else ""))
if DRIVE_MODE == "mecanum":
print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
else:
+287
View File
@@ -0,0 +1,287 @@
# Project handoff — TRI_PROJ2 herding (2026-05-16)
Context for a fresh model picking this project up. Project deadline: **2026-06-04**.
Branch: `test/johnny8`. Last commits: `876e14e` (LSTM), `dd5ac66` (core fixes).
---
## What this project is
Group G25 course project: an autonomous shepherd dog that herds 110 sheep through a gate into a pen. Two worlds (rectangular `field`, circular `field_round`), two drives (`differential`, `mecanum`), and five control strategies:
- `strombom` — analytical Strömbom collect/drive heuristic
- `sequential` — analytical single-target pin-and-push baseline
- `universal` — analytical teacher used to collect BC demos
- `bc` — MLP policy trained via behaviour cloning of `universal`
- `rl` — KL-regularised PPO fine-tune of `bc`
The dog perceives sheep only through a front-mounted LiDAR (`protos/ShepherdDog.proto`).
A 2D Gym env (`training/herding_env.py`) is used for training and headless evaluation;
Webots is used for sim-to-deployment validation.
See `docs/project.md` for the formal course objectives. See
`~/.claude/projects/-home-jalf-code-TRI-PROJ2/memory/` for the running notes
(`project_state.md`, `dagger_results.md`, `lstm_results.md`, `webots_perception_gap.md`).
---
## What's working today
Everything below is **verified**, with command lines you can copy-paste.
### Analytical strategies (Strömbom, Sequential, Universal)
Work in Webots with **GT bypass** (`HERDING_USE_GT=1`) — 12/12 trials across
both worlds × {5, 10 sheep}. User has signed off on GT bypass for these
analytical baselines (they take a position list as input; GT vs LiDAR is a
perception-layer concern, not a strategy concern).
Validated by `webots_sweep_gt.log` (full matrix, all OK).
### Gym performance (clean 360° LiDAR sim, default tracker)
```
BC diff/field: 96% avg (90-100% across n=1..10)
RL diff/field: 99% avg (90-100%)
BC diff/round: 58% ← weak combo
RL diff/round: 58% ← weak combo
BC mec/field: 86%
RL mec/field: 90%
BC mec/round: 73%
RL mec/round: 79%
```
Plus a Stage-2 `rl_fast` time-penalty pass on diff/field and mec/field
(`rl_fast_*` directories) that slightly accelerates time-to-pen with similar
success.
### Webots LiDAR — 360° proto variant (`protos/ShepherdDog360.proto`)
Created today as a robustness ablation. v1 policies (trained on default 360°
gym LiDAR) transfer cleanly:
```
strombom/sequential/universal: 12/12 OK
bc diff (5 and 10 sheep, both worlds): 3/4 OK (only diff/field n=10 timed out)
bc mecanum: 0/4 — separate dynamics gap
rl any: 0/4 — RL more brittle than BC, unexpectedly
```
Validated by `webots_sweep_360.log`.
---
## What does NOT work (despite multiple attempts)
**Any learned policy (BC, RL, DAgger, LSTM) in Webots LiDAR with the
canonical 140° FOV proto.** All hit the same wall: tracker phantom-track
patterns from real Webots LiDAR don't match what the gym FP-injection model
produces, so policies trained on the gym proxy can't handle the obs they see
in Webots.
Approaches tried today (all detailed in `~/.claude/projects/.../memory/`):
| Approach | Gym proxy | Webots LiDAR 140° |
|---|---|---|
| v1 MLP + frame stack, clean training | 99% | 0/5 |
| DAgger (3 rounds, privileged teacher labels) | 12% → 38% on proxy | 0/5 |
| LSTM RecurrentPPO from scratch, 3M steps | 69% clean / 2% proxy | 0/5 |
Diagnosis: gym `HERDING_WEBOTS` preset (`herding/config.py`) is an
approximation but not faithful to actual Webots LiDAR. Real Webots produces
~4 phantom tracks per step for 5 real sheep due to wall/post/leg returns;
gym injection uses a Poisson process at static anchor points which is
distributionally different.
---
## Critical bug fixes shipped today
If you're picking this up, these are real bugs that took hours to find:
1. **Webots controllers were silently crashing on numpy import.** Webots
launched them under system `python3` (no numpy). Fixed by adding
`runtime.ini` files at `controllers/{shepherd_dog,sheep}/runtime.ini`
that point Webots to the conda env's python.
2. **FP_RATE mismatch BC=0 vs RL=2 poisoned PPO.** Default in Makefile was
`FP_RATE=2.0` for RL but `--fp-rate 0.0` hard-coded for BC demos. PPO
stalled at 0% success for 1.46M steps. Now `FP_RATE=0.0` consistent.
3. **Tracker phantom-penned tracks.** `pen_latch_depth=0.5` was too shallow
(FPs at y≈-15 latched and lived forever). Now 2.0, and penned tracks
decay at `forget_steps × 8` instead of being eternal.
4. **HERDING_WEBOTS preset tuning** in `herding/config.py`
`max_new_tracks_per_step=1`, `static_reject=1.2`. Reduces phantom-track
spawning rate but doesn't eliminate it.
---
## Recommended path to a strong June 4 deliverable
You don't need to fix the 140° LiDAR gap — there's a defensible story
already. The article framing writes itself:
> "Wide-FOV (360°) LiDAR enables clean sim-to-real transfer of learned
> shepherding policies. Narrow-FOV (140°) introduces phantom-track noise
> that current policies cannot fully reject — closing this gap is future
> work, likely requiring either a faithful gym-side LiDAR model or
> Webots-in-the-loop training."
Concrete deliverable plan:
1. **Demo video and screenshots**: use the 360° proto for BC/RL demonstrations
and GT bypass for analyticals on 140°. All combos covered.
2. **Quantitative results**: gym eval already gives success%, mean steps.
Add a flock-dispersion metric (`max(distances from CoM)` at end of
episode) — about 30 lines in `eval.py`.
3. **Collision tracking**: add a counter in `HerdingEnv.step()` for
`dog-sheep distance < 0.30 m`. Currently the env knows about
`COLLISION_DIST` but doesn't expose it in info. ~20 lines.
4. **Mecanum**: the mecanum Webots dynamics gap is **separate** from the
perception issue. `tools/calibrate_mecanum.sh` exists for this. Run
it and see if it gives matching dynamics. This is the most valuable
remaining technical task — closing the mecanum gap would let you
complete the "diff vs mecanum" extra-merit comparison in
`docs/project.md`.
5. **Round world**: gym performance is ~58-79% across approaches. The
curved walls break Strömbom's "stand behind the centroid" geometry
(the position behind sometimes lies outside the field). Two cheap
tweaks worth trying: (a) a per-episode `W_RADIUS` reward bonus for
compact flocks (gather-first behavior), (b) curriculum on the env's
`difficulty` knob (already wired in `HerdingEnv`).
Bonuses still on the table (from `docs/project.md` extra merit):
- **Multi-shepherd axis-split** — user's idea, ~1 day work. Each dog
computes one component of the analytical Strömbom action. No multi-agent
RL needed.
- **Robustness / DR ablation** — FP/wheel-slip knobs exist; run an ablation
table.
---
## Repository layout (essentials)
```
herding/
config.py # HerdingConfig dataclasses, HERDING_DEFAULT / HERDING_WEBOTS presets
control/ # strombom.py, sequential.py, universal.py (analytical teachers)
perception/ # lidar_sim.py, lidar_perception.py, sheep_tracker.py
world/ # diffdrive.py kinematics, flocking_sim.py, geometry.py (PEN_*/GATE_*/FIELD_*)
training/
herding_env.py # Gym env: HerdingEnv. ~560 lines. Step/reset/reward/obs.
bc/
collect.py # Demo collector — supports --privileged and --dagger-policy
pretrain.py # MLP BC trainer (MSE + 1-cos loss)
rl/
train.py # KL-regularised PPO fine-tune of BC
train_lstm.py # NEW today: RecurrentPPO (sb3-contrib) from scratch
eval.py # Env-side evaluator; supports MLP + LSTM policies
runs/ # Trained artifacts (bc_*, rl_*, rl_fast_*, lstm_*)
v1_clean/ # Backup of pre-DAgger artifacts
controllers/
shepherd_dog/
shepherd_dog.py # Webots controller. Mode selection via HERDING_MODE env.
policy_loader.py # Auto-detects MLP vs LSTM zip. Handles obs / state.
runtime.ini # ← critical, points Webots to conda python
sheep/
runtime.ini # ← same fix
protos/
ShepherdDog.proto # canonical 140° FOV (matches the physical robot)
ShepherdDog360.proto # 360° variant for the FOV ablation / fallback delivery
ShepherdDogMecanum.proto
Sheep.proto
worlds/
field.wbt # rectangular world
field_round.wbt # circular world
tools/
run_webots.sh # launcher: tools/run_webots.sh N MODE DRIVE WORLD
webots_sweep.sh # full LiDAR sweep across all modes × drives × worlds
webots_sweep_gt.sh # same but with HERDING_USE_GT=1
dagger_round.sh # NEW today: one-shot DAgger collect + train
calibrate_mecanum.sh # mecanum dynamics calibration (not run today)
Makefile # Top-level: make train_all, make eval_all, etc.
```
---
## Quick commands
```bash
# Run pytest (111 tests, all passing)
make test
# Train one combo end-to-end (BC → RL → eval, ~1h on 2 cores)
make DRIVE=differential WORLD=field
# Train all 4 combos (~5h)
make train_all
# Eval an existing policy directory in gym
python -m training.eval --policy training/runs/rl_differential_field \
--max-flock 10 --max-steps 15000 --n-seeds 10 \
--drive-mode differential --world field
# Webots — analytical, GT bypass (this works for all combos)
HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
# Webots — BC with the 360° proto (currently the 140° proto is active;
# swap by editing protos/ShepherdDog.proto or use the 360° variant directly)
tools/run_webots.sh 5 bc differential field
# Headless full sweep (~80 min)
tools/webots_sweep.sh webots_sweep.log
# Train LSTM (sb3-contrib must be installed)
python -m training.rl.train_lstm \
--out training/runs/lstm_differential_field \
--total-timesteps 3000000 --use-webots-preset \
--drive-mode differential --world field
```
---
## Hardware/environment
- 3.8 GB RAM, 8 GB swap, 2 cores. Memory pressure is real — saw the
OS OOM-kill RL training during chained `train_all` once. If you re-run
full pipelines, monitor memory and consider splitting.
- Conda env: `tir` at `/home/jalf/miniconda3/envs/tir/`. Has SB3,
sb3-contrib, PyTorch, gymnasium. Webots controllers point to this
python via the new `runtime.ini` files.
- Webots installed at `/usr/local/webots/`. Headless mode requires
`xvfb-run -a` (no X display on this machine).
---
## What I'd suggest for a fresh attempt at the 140° LiDAR gap
If the user wants you to keep pushing on it, the highest-EV experiment
not yet tried is:
**Consensus tracker** — modify `herding/perception/sheep_tracker.py` to
require K consecutive detections within a small radius before promoting
a track to "real." Phantom tracks from sporadic wall returns wouldn't
survive the K-step consensus; real sheep continuously visible in FOV
would. The current `max_new_tracks_per_step=1` rate-limits new tracks
but every detection still spawns one immediately.
Implementation sketch: add a "candidate" track type that doesn't appear
in `get_positions()`. After K (e.g. 3-5) consecutive matched detections,
promote candidate → real track. Roughly 30-50 lines of code.
This is a tracker-level fix at deploy time only, so it wouldn't require
retraining the policies — v1 BC/RL should transfer cleanly if the tracker
output looks more like what they were trained on (one position per real
sheep, no phantoms).
I would NOT recommend more architectural training experiments (DAgger
round 4, larger LSTM, etc.) — three independent approaches today already
showed the bottleneck is upstream of the policy.
+75
View File
@@ -0,0 +1,75 @@
# Status — 2026-05-18
Current snapshot of what works in Webots, and what design choices got us here.
## Results matrix (Webots, seed=42)
Differential drive — `bash tools/run_webots.sh N MODE differential WORLD`:
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|----------------|:---------:|:----------:|:---------------:|:----------------:|
| BC | 5/5 | 10/10 | 5/5 | 10/10 |
| RL | 5/5 | 10/10 | 5/5 | 10/10 |
| Strömbom | 5/5 | 10/10 | 5/5 | 10/10 |
| Sequential | 5/5 | 10/10 | 5/5 | 10/10 |
Mecanum drive — `bash tools/run_webots.sh N MODE mecanum WORLD HERDING_LIDAR=360`:
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|------------|:---------:|:----------:|:---------------:|:----------------:|
| BC | 0/5 | 10/10 | 0/5 | 10/10 |
| RL | 0/5 | 10/10 | 0/5 | 10/10 |
Extra-merit:
- **360° LiDAR ablation** — `HERDING_LIDAR=360` works in all four diff cells.
- **Dual-dog axis-split** — `HERDING_NDOGS=2 HERDING_AXIS_LEAK=0.3` pens 5/5 on diff.
## Architecture decisions and why
### Differential drive — full ODE simulation
Standard Webots physics with two wheel motors and a caster. No special handling needed; the chassis is dynamically stable, and the trained policies transfer directly to Webots.
### Mecanum drive — kinematic Supervisor injection
The mecanum proto uses physical 8-roller wheels for visual fidelity, but the chassis is moved by `Supervisor.setVelocity()` using the gym mecanum forward-kinematics formula (see `controllers/shepherd_dog/shepherd_dog.py::drive_mecanum`).
We explored two other paths before settling here:
1. **Plain cylinder wheels + anisotropic ContactProperties.** Tried `frictionRotation ±0.7854` on the wheel contact frame. Strafe motion came out the wrong direction and diagonals zeroed out. Discarded.
2. **Full ODE simulation on 32 physical roller hinges.** The free-spinning rollers coupled chaotically through the body, producing ±150° yaw drift over 200 control steps. Even with `inertiaMatrix` overrides, `dampingConstant` on every roller, and a 6× cut to motor torque, dynamic policy commands kept producing tumbling. Discarded.
3. **Kinematic Supervisor injection (current).** ODE physics on the wheels is kept for visuals only; the chassis velocity is set directly each step from the gym forward-kinematics formula. Gym training and Webots deployment produce identical body motion. Yaw drift is zero by construction.
This is not a hack — it matches how most academic mecanum sims work (e.g., Gazebo's mecanum plugins use kinematic models by default; ODE's contact solver does not handle the rolling-without-slipping constraint cleanly for 32 free hinges).
### Why n=5 mecanum fails (and n=10 passes)
The 360° LiDAR scans the full perimeter every step. Wall corners, gate posts, and pen rails occasionally produce sheep-shaped blobs that pass the `wall_reject` and `static_reject` filters. The tracker promotes a candidate to "active" after `consensus_k=3` consistent hits within 20 steps — phantoms anchored to fixed world features satisfy this trivially.
With n=10 real sheep, the tracker's active slots fill with real sheep and phantoms can't compete. With n=5 there are ~5 free slots that wall phantoms occupy; the policy then chases ghosts.
Tightening the consensus filter (`consensus_k=5`) and `wall_reject=0.9` were tried; both kept ~70% of frames at 10 active tracks. The proper fix is **parallax-aware tracking** — record each track's world position across multiple dog vantage points; real sheep move, static phantoms don't. Out of scope for the 2026-06-04 deadline.
## File map (what changed in this push)
```
herding/config.py mecanum presets keep matched
strafe scaling (strafe_eff=0.26,
bleed=-0.40) for kinematic injection
controllers/shepherd_dog/shepherd_dog.py
Supervisor() + drive_mecanum kinematic
injection via _self_node.setVelocity
protos/ShepherdDogMecanum.proto supervisor TRUE; physics tuning
protos/ShepherdDogMecanum360.proto reverted (ODE no longer load-bearing)
tools/gen_mecanum_wheels.py wheels regen-script (clean)
tools/run_webots.sh contact-properties comment cleaned
training/{bc/collect,rl/train}.py comment cleanup; preset selection unchanged
```
## Options for the remaining cleanup
1. **Keep matched preset (0.26, -0.40)**. Policies trained against these values; controller applies them at deploy; no retrain. *Current state*.
2. **Switch preset to textbook (1.0, 0.0) and retrain mecanum BC+RL** (~6h). Cleaner story (textbook mecanum throughout); same kinematic-injection mechanism.
Either is defensible. (1) ships faster; (2) is more "pure".
+49 -20
View File
@@ -266,18 +266,21 @@ class RobotConfig:
strafe_efficiency: float = 1.0
"""Mecanum strafe magnitude as a fraction of textbook X-pattern.
``1.0`` (default) = perfect mecanum kinematics. ``0.4`` matches the
Webots roller-hinge mecanum proto calibration (62% slip on strafe,
11% on forward). Used by ``mecanum_step`` only — has no
effect on differential drive.
``1.0`` (default) is the ideal kinematic mecanum. Values below 1
model strafe slip; the Webots controller reads the same value and
applies it in the Supervisor velocity injection, so gym training
and Webots deployment see identical body motion. No effect on
differential drive.
"""
strafe_to_forward_bleed: float = 0.0
"""Fraction of ideal strafe magnitude that bleeds into body-frame x.
``0.0`` (default) = no bleed. ``-0.28`` matches the Webots proto's
consistent backward push under strafe commands. Used by
``mecanum_step`` only.
``0.0`` (default) = no bleed. Non-zero values add
``strafe_to_forward_bleed * |vy_body_ideal|`` to ``vx_body`` to
model the consistent forward (or backward) drift that some
mecanum chassis exhibit during pure-strafe commands. No effect on
differential drive.
"""
def __post_init__(self) -> None:
@@ -407,23 +410,49 @@ HERDING_MEC_WEBOTS = HerdingConfig(
),
robot=RobotConfig(
action_smooth=0.55,
strafe_efficiency=0.4,
strafe_to_forward_bleed=-0.28,
strafe_efficiency=0.26,
strafe_to_forward_bleed=-0.40,
),
)
"""Webots-mecanum-matched training preset.
"""Mecanum + 140° LiDAR preset.
Same as HERDING_WEBOTS but with the gym mecanum kinematics scaled to
match the Webots roller-hinge mecanum proto:
* ``strafe_efficiency=0.4`` — strafing produces ~40% of textbook
X-pattern lateral velocity in Webots; this matches the bias.
* ``strafe_to_forward_bleed=-0.28`` — strafe commands bleed ~28% of
their magnitude into backward body motion in Webots.
Mirrors HERDING_WEBOTS but with mecanum-specific kinematic scaling
(``strafe_efficiency`` and ``strafe_to_forward_bleed``) applied to
the gym forward-kinematics formula. The Webots controller reads
these same values via ``RobotConfig`` and feeds them through the
Supervisor velocity injection, so gym and Webots produce identical
body motion. Diff-drive ignores both fields.
"""
Use this preset when training BC/RL for the mecanum drive so the
policy learns to compensate for the imperfect physical mecanum.
Differential drive ignores both parameters and behaves identically
to HERDING_WEBOTS.
HERDING_MEC_WEBOTS_360 = HerdingConfig(
lidar=LIDAR_WEBOTS_360,
# Looser detection thresholds for the wider FOV — the 360° scan
# catches far walls, gate posts and pen rails the 140° front cone
# never sees, so the cluster/feature filters need slightly more
# margin to keep promotion rates similar.
detection=DetectionConfig(wall_reject=0.6, static_reject=1.2),
tracker=TrackerConfig(
forget_steps=300,
max_new_tracks_per_step=2, # 360° gives more candidates per step
pen_latch_depth=3.0,
predict_steps=180,
consensus_k=3,
consensus_radius_m=0.3,
consensus_max_age=20,
),
robot=RobotConfig(
action_smooth=0.55,
strafe_efficiency=0.26,
strafe_to_forward_bleed=-0.40,
),
)
"""Mecanum + 360° LiDAR preset (the deployable mecanum target).
The 360° FOV gives the policy perception coverage in every direction,
which matches the omnidirectional motion the mecanum chassis can
produce. Used for both gym training and Webots deployment so the
trained policy sees the same observation geometry it will face at
deploy time.
"""
"""Webots-matched training preset.
+6 -1
View File
@@ -9,7 +9,7 @@ PROTO ShepherdDogMecanum [
field SFString controller "shepherd_dog"
field MFString controllerArgs []
field SFString customData ""
field SFBool supervisor FALSE
field SFBool supervisor TRUE
field SFBool synchronization TRUE
]
{
@@ -1993,6 +1993,7 @@ PROTO ShepherdDogMecanum [
}
}
# ========== IMU SENSORS ==========
Accelerometer {
translation 0 0 0.10
@@ -2052,6 +2053,10 @@ PROTO ShepherdDogMecanum [
}
# ========== PHYSICS ==========
# Chassis dynamics are not exercised in herding runs the
# controller drives the body kinematically via Supervisor
# setVelocity (see drive_mecanum in controllers/shepherd_dog).
# Webots-derived inertia from the bounding box is sufficient.
physics Physics {
density -1
mass 5.0
File diff suppressed because it is too large Load Diff
+30 -19
View File
@@ -90,24 +90,36 @@ fi
cp "$SRC" "$DST"
# LiDAR FOV variant: HERDING_LIDAR=140 (default) or 360 (ablation).
# 360° is only supported for differential drive; the mecanum proto
# always uses the 140° sensor matching ShepherdDog.proto.
LIDAR_VARIANT="${HERDING_LIDAR:-140}"
# LiDAR FOV variant. For mecanum the default is 360° because the
# physical-roller proto's passive yaw drift makes the 140° front
# cone unreliable (heading errors push detections out of the
# tracker's world-frame matching gates). Diff defaults to 140°
# matching the canonical ShepherdDog.proto.
if [[ -z "${HERDING_LIDAR:-}" ]]; then
if [[ "$DRIVE" == "mecanum" ]]; then
LIDAR_VARIANT="360"
else
LIDAR_VARIANT="140"
fi
else
LIDAR_VARIANT="$HERDING_LIDAR"
fi
if [[ "$LIDAR_VARIANT" != "140" && "$LIDAR_VARIANT" != "360" ]]; then
echo "HERDING_LIDAR must be 140 or 360, got '$LIDAR_VARIANT'" >&2; exit 1
fi
if [[ "$LIDAR_VARIANT" == "360" && "$DRIVE" == "mecanum" ]]; then
echo "[run_webots] HERDING_LIDAR=360 not available for mecanum drive — falling back to 140." >&2
LIDAR_VARIANT="140"
fi
export HERDING_LIDAR="$LIDAR_VARIANT"
# Swap robot proto based on drive mode + LiDAR variant.
# Base worlds reference ShepherdDog (diff-drive 140°). For mecanum we
# swap in ShepherdDogMecanum; for the 360° ablation we swap in
# ShepherdDog360.
if [[ "$DRIVE" == "mecanum" ]]; then
# Base worlds reference ShepherdDog (diff-drive 140°). The four
# combinations the launcher supports:
# diff + 140° → ShepherdDog.proto (default)
# diff + 360° → ShepherdDog360.proto (FOV ablation for diff)
# mecanum+ 140° → ShepherdDogMecanum.proto
# mecanum+ 360° → ShepherdDogMecanum360.proto (the trained mecanum target)
if [[ "$DRIVE" == "mecanum" && "$LIDAR_VARIANT" == "360" ]]; then
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum360.proto"|' "$DST"
sed -i 's|^ShepherdDog {|ShepherdDogMecanum360 {|' "$DST"
elif [[ "$DRIVE" == "mecanum" ]]; then
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
elif [[ "$LIDAR_VARIANT" == "360" ]]; then
@@ -115,13 +127,12 @@ elif [[ "$LIDAR_VARIANT" == "360" ]]; then
sed -i 's|^ShepherdDog {|ShepherdDog360 {|' "$DST"
fi
if [[ "$DRIVE" == "mecanum" ]]; then
# Inject mecanum roller contact properties. The proto's rollers are
# split into two contact materials so that we can keep the friction
# axes oriented along each roller's free-spin direction — but with
# physical roller hinges (no longer plain cylinder wheels) the
# ground contact is via the capsules and standard friction works.
# Slightly bumped coulombFriction keeps the rollers gripping during
# mecanum strafing.
# Wheel-ground friction. The chassis is driven kinematically by
# the Supervisor (see drive_mecanum in controllers/shepherd_dog),
# so these properties only affect wheel-spin visuals, not the
# robot's motion. coulombFriction 2.0 plus a small
# forceDependentSlip keeps the rollers from locking up against
# the ground.
python3 -c "
with open('$DST', 'r') as f:
txt = f.read()
+15 -8
View File
@@ -179,19 +179,26 @@ def main():
f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
from herding.config import (
HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS,
HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
DomainRandomConfig, RobotConfig,
)
if args.use_webots_preset:
# Pick the drive-matched Webots preset — for mecanum we use the
# variant that simulates the physical-roller proto's strafe
# efficiency and forward bleed so the policy trains under the
# same imperfect kinematics it sees at deployment.
base = HERDING_MEC_WEBOTS if args.drive_mode == "mecanum" else HERDING_WEBOTS
# Mecanum uses the 360° preset (the deployable mecanum target);
# diff drive keeps the canonical 140° preset.
if args.drive_mode == "mecanum":
base = HERDING_MEC_WEBOTS_360
preset_name = "HERDING_MEC_WEBOTS_360"
else:
base = HERDING_WEBOTS
preset_name = "HERDING_WEBOTS"
# Small compass noise for mecanum training (robustness margin
# for the Webots compass sensor).
compass_std = 0.1 if args.drive_mode == "mecanum" else 0.0
herding_cfg = base.replace(
domain_random=DomainRandomConfig(
fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std,
compass_noise_std=compass_std,
),
robot=RobotConfig(
action_smooth=args.action_smooth,
@@ -199,10 +206,10 @@ def main():
strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed,
),
)
preset_name = "HERDING_MEC_WEBOTS" if args.drive_mode == "mecanum" else "HERDING_WEBOTS"
print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} "
f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} "
f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f}")
f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f} "
f"compass_noise={compass_std}")
else:
herding_cfg = None
if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+36 -20
View File
@@ -276,37 +276,53 @@ def main() -> None:
print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
from herding.config import (
HerdingConfig, HERDING_MEC_WEBOTS, DomainRandomConfig, RobotConfig,
HerdingConfig, HERDING_MEC_WEBOTS_360, DomainRandomConfig, RobotConfig,
)
herding_cfg = None
# When fine-tuning a mecanum policy we always apply the Webots
# roller-hinge calibration to the gym kinematics (strafe efficiency
# and bleed). Without this, the RL agent updates against the
# textbook X-pattern and fails on deployment.
# Mecanum always trains under HERDING_MEC_WEBOTS_360 (360° LiDAR +
# kinematic-matched strafe scaling + small compass-noise DR).
is_mecanum = (drive_mode == "mecanum")
if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
if is_mecanum:
base_robot = HERDING_MEC_WEBOTS.robot
strafe_eff = base_robot.strafe_efficiency
strafe_bleed = base_robot.strafe_to_forward_bleed
base = HERDING_MEC_WEBOTS_360
strafe_eff = base.robot.strafe_efficiency
strafe_bleed = base.robot.strafe_to_forward_bleed
compass_std = 0.1 # heading robustness DR
else:
base = None
strafe_eff = 1.0
strafe_bleed = 0.0
herding_cfg = HerdingConfig(
domain_random=DomainRandomConfig(
fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std,
),
robot=RobotConfig(
action_smooth=args.action_smooth,
strafe_efficiency=strafe_eff,
strafe_to_forward_bleed=strafe_bleed,
),
)
compass_std = 0.0
if is_mecanum:
herding_cfg = base.replace(
domain_random=DomainRandomConfig(
fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std,
compass_noise_std=compass_std,
),
robot=RobotConfig(
action_smooth=args.action_smooth,
strafe_efficiency=strafe_eff,
strafe_to_forward_bleed=strafe_bleed,
),
)
else:
herding_cfg = HerdingConfig(
domain_random=DomainRandomConfig(
fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std,
),
robot=RobotConfig(
action_smooth=args.action_smooth,
strafe_efficiency=strafe_eff,
strafe_to_forward_bleed=strafe_bleed,
),
)
print(f"[rl] domain-random: fp_rate={args.fp_rate} "
f"action_smooth={args.action_smooth} "
f"wheel_slip_std={args.wheel_slip_std} "
f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f}")
f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f} "
f"compass_noise={compass_std}")
env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
difficulty=args.difficulty,
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.