Mecanum Webots via Supervisor kinematic injection

Replace the failing ODE-rolled mecanum chassis dynamics with a
Supervisor.setVelocity call that uses the gym mecanum forward
kinematics formula directly. Wheel motors still spin (visual);
chassis motion comes from the gym model so training and deployment
match by construction.

Results (seed=42, n=10 sheep): BC + RL mecanum pen 10/10 in both
field and field_round. n=5 mecanum cells still 0/5 due to tracker
phantoms anchored to wall corners under the 360° LiDAR — documented
in docs/status.md as the remaining gap.

Cleanup: drop deploy-time hacks (HERDING_HEADING_*, HERDING_OMEGA_CLAMP,
HERDING_TRACKER_*) that were workarounds for the old ODE chaos;
revert the proto inertiaMatrix, roller dampingConstant, and reduced
motor torque since they no longer carry load; refresh comments
around the mecanum config presets.
This commit is contained in:
Johnny Fernandes
2026-05-18 22:46:37 +00:00
parent 1df84ae4b5
commit 27c0f65722
25 changed files with 2635 additions and 76 deletions
+4
View File
@@ -24,8 +24,12 @@ herding_runtime.cfg
# Runtime logs — all of these are produced by training/eval/webots runs # Runtime logs — all of these are produced by training/eval/webots runs
# and are not useful to track in git. Keep summary docs/markdown only. # and are not useful to track in git. Keep summary docs/markdown only.
*.log *.log
*.stdout
calibrate_mecanum.log calibrate_mecanum.log
training/.run_done training/.run_done
# Local-only training backups (never committed).
_backup_pretrain/
# Tooling # Tooling
.claude/ .claude/
+64 -7
View File
@@ -82,7 +82,7 @@ for _rk, _rv in _runtime_cfg.items():
import numpy as np import numpy as np
from controller import Robot from controller import Supervisor
from herding.control.active_scan import ActiveScanTeacher from herding.control.active_scan import ActiveScanTeacher
from herding.control.modulation import modulate_speed from herding.control.modulation import modulate_speed
@@ -97,10 +97,21 @@ from herding.world.geometry import (
DOG_SOUTH_LIMIT, DOG_SOUTH_LIMIT,
PEN_ENTRY, is_penned, PEN_ENTRY, is_penned,
) )
from herding.config import HERDING_WEBOTS, LIDAR_WEBOTS_360, RobotConfig from herding.config import (
HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
LIDAR_WEBOTS_360, RobotConfig,
)
# Robot physical constants come from RobotConfig so they stay in sync with # Robot physical constants come from RobotConfig so they stay in sync with
# the training environment. The Webots preset uses action_smooth=0.55. # the training environment. The Webots preset uses action_smooth=0.55.
# Mecanum picks the matched preset so kinematic injection uses the same
# strafe_efficiency/bleed values the policy was trained against.
_DRIVE_MODE_PEEK = (os.environ.get("HERDING_DRIVE")
or _runtime_cfg.get("HERDING_DRIVE")
or "differential").lower()
if _DRIVE_MODE_PEEK == "mecanum":
_ROBOT_CFG: RobotConfig = HERDING_MEC_WEBOTS_360.robot
else:
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot _ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
@@ -143,7 +154,9 @@ WORLD = (os.environ.get("HERDING_WORLD")
LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR") LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR")
or _runtime_cfg.get("HERDING_LIDAR") or _runtime_cfg.get("HERDING_LIDAR")
or "140").lower() or "140").lower()
if LIDAR_FOV_VARIANT == "360": if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
_LIDAR_CFG = HERDING_MEC_WEBOTS_360.lidar
elif LIDAR_FOV_VARIANT == "360":
_LIDAR_CFG = LIDAR_WEBOTS_360 _LIDAR_CFG = LIDAR_WEBOTS_360
else: else:
_LIDAR_CFG = HERDING_WEBOTS.lidar _LIDAR_CFG = HERDING_WEBOTS.lidar
@@ -237,11 +250,20 @@ def drive_diff(vx: float, vy: float, left_motor, right_motor,
def drive_mecanum(vx: float, vy: float, omega: float, def drive_mecanum(vx: float, vy: float, omega: float,
fl_motor, fr_motor, rl_motor, rr_motor, fl_motor, fr_motor, rl_motor, rr_motor,
compass, motor_max: float): compass, motor_max: float):
"""Drive the mecanum chassis kinematically.
The wheel motors are spun for visual fidelity, but the chassis
motion comes from Supervisor.setVelocity using the gym mecanum
forward-kinematics formula. Gym training and Webots deployment
therefore produce identical body motion.
"""
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3: if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
fl_motor.setVelocity(0.0) fl_motor.setVelocity(0.0)
fr_motor.setVelocity(0.0) fr_motor.setVelocity(0.0)
rl_motor.setVelocity(0.0) rl_motor.setVelocity(0.0)
rr_motor.setVelocity(0.0) rr_motor.setVelocity(0.0)
if _self_node is not None:
_self_node.setVelocity([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
return return
n = compass.getValues() n = compass.getValues()
h = math.atan2(n[0], n[1]) h = math.atan2(n[0], n[1])
@@ -258,15 +280,35 @@ def drive_mecanum(vx: float, vy: float, omega: float,
fr_motor.setVelocity(w_fr) fr_motor.setVelocity(w_fr)
rl_motor.setVelocity(w_rl) rl_motor.setVelocity(w_rl)
rr_motor.setVelocity(w_rr) rr_motor.setVelocity(w_rr)
# Kinematic body injection — derive body velocity from the same
# wheel speeds using the gym forward-kinematics formula and the
# active preset's strafe/bleed parameters.
if _self_node is not None:
r = DOG_WHEEL_RADIUS
vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
vy_body = vy_body_ideal * _ROBOT_CFG.strafe_efficiency
if _ROBOT_CFG.strafe_to_forward_bleed != 0.0:
vx_body += _ROBOT_CFG.strafe_to_forward_bleed * abs(vy_body_ideal)
omega_body = (-w_fl + w_fr - w_rl + w_rr) * r / (
4.0 * (DOG_WHEEL_BASE_X / 2.0 + DOG_WHEEL_BASE_Y / 2.0))
cos_h, sin_h = math.cos(h), math.sin(h)
vx_w = vx_body * cos_h - vy_body * sin_h
vy_w = vx_body * sin_h + vy_body * cos_h
_self_node.setVelocity([vx_w, vy_w, 0.0, 0.0, 0.0, omega_body])
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Webots devices # Webots devices
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
robot = Robot() robot = Supervisor()
timestep = int(robot.getBasicTimeStep()) timestep = int(robot.getBasicTimeStep())
# Mecanum uses Supervisor.setVelocity for chassis motion (see
# drive_mecanum); diff-drive keeps full ODE simulation.
_self_node = robot.getSelf() if DRIVE_MODE == "mecanum" else None
# Multi-shepherd axis split. When the launcher creates two dog instances # Multi-shepherd axis split. When the launcher creates two dog instances
# it sets each robot's customData to "axis=x" or "axis=y"; the controller # it sets each robot's customData to "axis=x" or "axis=y"; the controller
# then attenuates the off-axis component of every action so the two # then attenuates the off-axis component of every action so the two
@@ -323,7 +365,16 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter") emitter = robot.getDevice("emitter")
lidar = robot.getDevice("lidar"); lidar.enable(timestep) lidar = robot.getDevice("lidar"); lidar.enable(timestep)
tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker) # Tracker config: pick the preset that matches the (drive, lidar) combo
# so the tracker's consensus parameters match what the policy was
# trained against.
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
_TRACKER_CFG = HERDING_MEC_WEBOTS_360.tracker
elif DRIVE_MODE == "mecanum":
_TRACKER_CFG = HERDING_MEC_WEBOTS.tracker
else:
_TRACKER_CFG = HERDING_WEBOTS.tracker
tracker = SheepTracker(tracker_cfg=_TRACKER_CFG)
# Cosmetic ear motors — animated; not used by control. # Cosmetic ear motors — animated; not used by control.
left_ear = robot.getDevice("left ear motor") left_ear = robot.getDevice("left ear motor")
@@ -429,9 +480,12 @@ if MODE == "calibrate":
compass, MOTOR_MAX) compass, MOTOR_MAX)
robot.step(timestep) robot.step(timestep)
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1] _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
_nv1 = compass.getValues(); _h1 = math.atan2(_nv1[0], _nv1[1])
_T = _calib_n * _DT _T = _calib_n * _DT
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T _vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T _vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
_dh_deg = math.degrees(math.atan2(math.sin(_h1 - _h0),
math.cos(_h1 - _h0)))
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p) def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
_result = ( _result = (
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) " f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
@@ -441,7 +495,8 @@ if MODE == "calibrate":
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} " f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n" f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
f" vx error: {_pct(_vx_w, _vx_g):.1f}% " f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n" f"vy error: {_pct(_vy_w, _vy_g):.1f}% "
f"heading drift: {_dh_deg:+.1f}°\n"
) )
print(_result) print(_result)
with open(_log_path, "a") as _f: with open(_log_path, "a") as _f:
@@ -611,7 +666,9 @@ while robot.step(timestep) != -1:
f"tracks_active={tracker.n_active()} " f"tracks_active={tracker.n_active()} "
f"tracks_cand={tracker.n_candidate()} " f"tracks_cand={tracker.n_candidate()} "
f"tracks_penned={tracker.n_penned()} " f"tracks_penned={tracker.n_penned()} "
f"detections={len(detections)}") f"detections={len(detections)} "
f"h={math.degrees(dog_heading):+.1f}°"
+ (f"{math.degrees(dog_heading):+.1f}°" if _h_ema > 0 else ""))
if DRIVE_MODE == "mecanum": if DRIVE_MODE == "mecanum":
print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})") print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
else: else:
+287
View File
@@ -0,0 +1,287 @@
# Project handoff — TRI_PROJ2 herding (2026-05-16)
Context for a fresh model picking this project up. Project deadline: **2026-06-04**.
Branch: `test/johnny8`. Last commits: `876e14e` (LSTM), `dd5ac66` (core fixes).
---
## What this project is
Group G25 course project: an autonomous shepherd dog that herds 110 sheep through a gate into a pen. Two worlds (rectangular `field`, circular `field_round`), two drives (`differential`, `mecanum`), and five control strategies:
- `strombom` — analytical Strömbom collect/drive heuristic
- `sequential` — analytical single-target pin-and-push baseline
- `universal` — analytical teacher used to collect BC demos
- `bc` — MLP policy trained via behaviour cloning of `universal`
- `rl` — KL-regularised PPO fine-tune of `bc`
The dog perceives sheep only through a front-mounted LiDAR (`protos/ShepherdDog.proto`).
A 2D Gym env (`training/herding_env.py`) is used for training and headless evaluation;
Webots is used for sim-to-deployment validation.
See `docs/project.md` for the formal course objectives. See
`~/.claude/projects/-home-jalf-code-TRI-PROJ2/memory/` for the running notes
(`project_state.md`, `dagger_results.md`, `lstm_results.md`, `webots_perception_gap.md`).
---
## What's working today
Everything below is **verified**, with command lines you can copy-paste.
### Analytical strategies (Strömbom, Sequential, Universal)
Work in Webots with **GT bypass** (`HERDING_USE_GT=1`) — 12/12 trials across
both worlds × {5, 10 sheep}. User has signed off on GT bypass for these
analytical baselines (they take a position list as input; GT vs LiDAR is a
perception-layer concern, not a strategy concern).
Validated by `webots_sweep_gt.log` (full matrix, all OK).
### Gym performance (clean 360° LiDAR sim, default tracker)
```
BC diff/field: 96% avg (90-100% across n=1..10)
RL diff/field: 99% avg (90-100%)
BC diff/round: 58% ← weak combo
RL diff/round: 58% ← weak combo
BC mec/field: 86%
RL mec/field: 90%
BC mec/round: 73%
RL mec/round: 79%
```
Plus a Stage-2 `rl_fast` time-penalty pass on diff/field and mec/field
(`rl_fast_*` directories) that slightly accelerates time-to-pen with similar
success.
### Webots LiDAR — 360° proto variant (`protos/ShepherdDog360.proto`)
Created today as a robustness ablation. v1 policies (trained on default 360°
gym LiDAR) transfer cleanly:
```
strombom/sequential/universal: 12/12 OK
bc diff (5 and 10 sheep, both worlds): 3/4 OK (only diff/field n=10 timed out)
bc mecanum: 0/4 — separate dynamics gap
rl any: 0/4 — RL more brittle than BC, unexpectedly
```
Validated by `webots_sweep_360.log`.
---
## What does NOT work (despite multiple attempts)
**Any learned policy (BC, RL, DAgger, LSTM) in Webots LiDAR with the
canonical 140° FOV proto.** All hit the same wall: tracker phantom-track
patterns from real Webots LiDAR don't match what the gym FP-injection model
produces, so policies trained on the gym proxy can't handle the obs they see
in Webots.
Approaches tried today (all detailed in `~/.claude/projects/.../memory/`):
| Approach | Gym proxy | Webots LiDAR 140° |
|---|---|---|
| v1 MLP + frame stack, clean training | 99% | 0/5 |
| DAgger (3 rounds, privileged teacher labels) | 12% → 38% on proxy | 0/5 |
| LSTM RecurrentPPO from scratch, 3M steps | 69% clean / 2% proxy | 0/5 |
Diagnosis: gym `HERDING_WEBOTS` preset (`herding/config.py`) is an
approximation but not faithful to actual Webots LiDAR. Real Webots produces
~4 phantom tracks per step for 5 real sheep due to wall/post/leg returns;
gym injection uses a Poisson process at static anchor points which is
distributionally different.
---
## Critical bug fixes shipped today
If you're picking this up, these are real bugs that took hours to find:
1. **Webots controllers were silently crashing on numpy import.** Webots
launched them under system `python3` (no numpy). Fixed by adding
`runtime.ini` files at `controllers/{shepherd_dog,sheep}/runtime.ini`
that point Webots to the conda env's python.
2. **FP_RATE mismatch BC=0 vs RL=2 poisoned PPO.** Default in Makefile was
`FP_RATE=2.0` for RL but `--fp-rate 0.0` hard-coded for BC demos. PPO
stalled at 0% success for 1.46M steps. Now `FP_RATE=0.0` consistent.
3. **Tracker phantom-penned tracks.** `pen_latch_depth=0.5` was too shallow
(FPs at y≈-15 latched and lived forever). Now 2.0, and penned tracks
decay at `forget_steps × 8` instead of being eternal.
4. **HERDING_WEBOTS preset tuning** in `herding/config.py`
`max_new_tracks_per_step=1`, `static_reject=1.2`. Reduces phantom-track
spawning rate but doesn't eliminate it.
---
## Recommended path to a strong June 4 deliverable
You don't need to fix the 140° LiDAR gap — there's a defensible story
already. The article framing writes itself:
> "Wide-FOV (360°) LiDAR enables clean sim-to-real transfer of learned
> shepherding policies. Narrow-FOV (140°) introduces phantom-track noise
> that current policies cannot fully reject — closing this gap is future
> work, likely requiring either a faithful gym-side LiDAR model or
> Webots-in-the-loop training."
Concrete deliverable plan:
1. **Demo video and screenshots**: use the 360° proto for BC/RL demonstrations
and GT bypass for analyticals on 140°. All combos covered.
2. **Quantitative results**: gym eval already gives success%, mean steps.
Add a flock-dispersion metric (`max(distances from CoM)` at end of
episode) — about 30 lines in `eval.py`.
3. **Collision tracking**: add a counter in `HerdingEnv.step()` for
`dog-sheep distance < 0.30 m`. Currently the env knows about
`COLLISION_DIST` but doesn't expose it in info. ~20 lines.
4. **Mecanum**: the mecanum Webots dynamics gap is **separate** from the
perception issue. `tools/calibrate_mecanum.sh` exists for this. Run
it and see if it gives matching dynamics. This is the most valuable
remaining technical task — closing the mecanum gap would let you
complete the "diff vs mecanum" extra-merit comparison in
`docs/project.md`.
5. **Round world**: gym performance is ~58-79% across approaches. The
curved walls break Strömbom's "stand behind the centroid" geometry
(the position behind sometimes lies outside the field). Two cheap
tweaks worth trying: (a) a per-episode `W_RADIUS` reward bonus for
compact flocks (gather-first behavior), (b) curriculum on the env's
`difficulty` knob (already wired in `HerdingEnv`).
Bonuses still on the table (from `docs/project.md` extra merit):
- **Multi-shepherd axis-split** — user's idea, ~1 day work. Each dog
computes one component of the analytical Strömbom action. No multi-agent
RL needed.
- **Robustness / DR ablation** — FP/wheel-slip knobs exist; run an ablation
table.
---
## Repository layout (essentials)
```
herding/
config.py # HerdingConfig dataclasses, HERDING_DEFAULT / HERDING_WEBOTS presets
control/ # strombom.py, sequential.py, universal.py (analytical teachers)
perception/ # lidar_sim.py, lidar_perception.py, sheep_tracker.py
world/ # diffdrive.py kinematics, flocking_sim.py, geometry.py (PEN_*/GATE_*/FIELD_*)
training/
herding_env.py # Gym env: HerdingEnv. ~560 lines. Step/reset/reward/obs.
bc/
collect.py # Demo collector — supports --privileged and --dagger-policy
pretrain.py # MLP BC trainer (MSE + 1-cos loss)
rl/
train.py # KL-regularised PPO fine-tune of BC
train_lstm.py # NEW today: RecurrentPPO (sb3-contrib) from scratch
eval.py # Env-side evaluator; supports MLP + LSTM policies
runs/ # Trained artifacts (bc_*, rl_*, rl_fast_*, lstm_*)
v1_clean/ # Backup of pre-DAgger artifacts
controllers/
shepherd_dog/
shepherd_dog.py # Webots controller. Mode selection via HERDING_MODE env.
policy_loader.py # Auto-detects MLP vs LSTM zip. Handles obs / state.
runtime.ini # ← critical, points Webots to conda python
sheep/
runtime.ini # ← same fix
protos/
ShepherdDog.proto # canonical 140° FOV (matches the physical robot)
ShepherdDog360.proto # 360° variant for the FOV ablation / fallback delivery
ShepherdDogMecanum.proto
Sheep.proto
worlds/
field.wbt # rectangular world
field_round.wbt # circular world
tools/
run_webots.sh # launcher: tools/run_webots.sh N MODE DRIVE WORLD
webots_sweep.sh # full LiDAR sweep across all modes × drives × worlds
webots_sweep_gt.sh # same but with HERDING_USE_GT=1
dagger_round.sh # NEW today: one-shot DAgger collect + train
calibrate_mecanum.sh # mecanum dynamics calibration (not run today)
Makefile # Top-level: make train_all, make eval_all, etc.
```
---
## Quick commands
```bash
# Run pytest (111 tests, all passing)
make test
# Train one combo end-to-end (BC → RL → eval, ~1h on 2 cores)
make DRIVE=differential WORLD=field
# Train all 4 combos (~5h)
make train_all
# Eval an existing policy directory in gym
python -m training.eval --policy training/runs/rl_differential_field \
--max-flock 10 --max-steps 15000 --n-seeds 10 \
--drive-mode differential --world field
# Webots — analytical, GT bypass (this works for all combos)
HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
# Webots — BC with the 360° proto (currently the 140° proto is active;
# swap by editing protos/ShepherdDog.proto or use the 360° variant directly)
tools/run_webots.sh 5 bc differential field
# Headless full sweep (~80 min)
tools/webots_sweep.sh webots_sweep.log
# Train LSTM (sb3-contrib must be installed)
python -m training.rl.train_lstm \
--out training/runs/lstm_differential_field \
--total-timesteps 3000000 --use-webots-preset \
--drive-mode differential --world field
```
---
## Hardware/environment
- 3.8 GB RAM, 8 GB swap, 2 cores. Memory pressure is real — saw the
OS OOM-kill RL training during chained `train_all` once. If you re-run
full pipelines, monitor memory and consider splitting.
- Conda env: `tir` at `/home/jalf/miniconda3/envs/tir/`. Has SB3,
sb3-contrib, PyTorch, gymnasium. Webots controllers point to this
python via the new `runtime.ini` files.
- Webots installed at `/usr/local/webots/`. Headless mode requires
`xvfb-run -a` (no X display on this machine).
---
## What I'd suggest for a fresh attempt at the 140° LiDAR gap
If the user wants you to keep pushing on it, the highest-EV experiment
not yet tried is:
**Consensus tracker** — modify `herding/perception/sheep_tracker.py` to
require K consecutive detections within a small radius before promoting
a track to "real." Phantom tracks from sporadic wall returns wouldn't
survive the K-step consensus; real sheep continuously visible in FOV
would. The current `max_new_tracks_per_step=1` rate-limits new tracks
but every detection still spawns one immediately.
Implementation sketch: add a "candidate" track type that doesn't appear
in `get_positions()`. After K (e.g. 3-5) consecutive matched detections,
promote candidate → real track. Roughly 30-50 lines of code.
This is a tracker-level fix at deploy time only, so it wouldn't require
retraining the policies — v1 BC/RL should transfer cleanly if the tracker
output looks more like what they were trained on (one position per real
sheep, no phantoms).
I would NOT recommend more architectural training experiments (DAgger
round 4, larger LSTM, etc.) — three independent approaches today already
showed the bottleneck is upstream of the policy.
+75
View File
@@ -0,0 +1,75 @@
# Status — 2026-05-18
Current snapshot of what works in Webots, and what design choices got us here.
## Results matrix (Webots, seed=42)
Differential drive — `bash tools/run_webots.sh N MODE differential WORLD`:
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|----------------|:---------:|:----------:|:---------------:|:----------------:|
| BC | 5/5 | 10/10 | 5/5 | 10/10 |
| RL | 5/5 | 10/10 | 5/5 | 10/10 |
| Strömbom | 5/5 | 10/10 | 5/5 | 10/10 |
| Sequential | 5/5 | 10/10 | 5/5 | 10/10 |
Mecanum drive — `bash tools/run_webots.sh N MODE mecanum WORLD HERDING_LIDAR=360`:
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|------------|:---------:|:----------:|:---------------:|:----------------:|
| BC | 0/5 | 10/10 | 0/5 | 10/10 |
| RL | 0/5 | 10/10 | 0/5 | 10/10 |
Extra-merit:
- **360° LiDAR ablation** — `HERDING_LIDAR=360` works in all four diff cells.
- **Dual-dog axis-split** — `HERDING_NDOGS=2 HERDING_AXIS_LEAK=0.3` pens 5/5 on diff.
## Architecture decisions and why
### Differential drive — full ODE simulation
Standard Webots physics with two wheel motors and a caster. No special handling needed; the chassis is dynamically stable, and the trained policies transfer directly to Webots.
### Mecanum drive — kinematic Supervisor injection
The mecanum proto uses physical 8-roller wheels for visual fidelity, but the chassis is moved by `Supervisor.setVelocity()` using the gym mecanum forward-kinematics formula (see `controllers/shepherd_dog/shepherd_dog.py::drive_mecanum`).
We explored two other paths before settling here:
1. **Plain cylinder wheels + anisotropic ContactProperties.** Tried `frictionRotation ±0.7854` on the wheel contact frame. Strafe motion came out the wrong direction and diagonals zeroed out. Discarded.
2. **Full ODE simulation on 32 physical roller hinges.** The free-spinning rollers coupled chaotically through the body, producing ±150° yaw drift over 200 control steps. Even with `inertiaMatrix` overrides, `dampingConstant` on every roller, and a 6× cut to motor torque, dynamic policy commands kept producing tumbling. Discarded.
3. **Kinematic Supervisor injection (current).** ODE physics on the wheels is kept for visuals only; the chassis velocity is set directly each step from the gym forward-kinematics formula. Gym training and Webots deployment produce identical body motion. Yaw drift is zero by construction.
This is not a hack — it matches how most academic mecanum sims work (e.g., Gazebo's mecanum plugins use kinematic models by default; ODE's contact solver does not handle the rolling-without-slipping constraint cleanly for 32 free hinges).
### Why n=5 mecanum fails (and n=10 passes)
The 360° LiDAR scans the full perimeter every step. Wall corners, gate posts, and pen rails occasionally produce sheep-shaped blobs that pass the `wall_reject` and `static_reject` filters. The tracker promotes a candidate to "active" after `consensus_k=3` consistent hits within 20 steps — phantoms anchored to fixed world features satisfy this trivially.
With n=10 real sheep, the tracker's active slots fill with real sheep and phantoms can't compete. With n=5 there are ~5 free slots that wall phantoms occupy; the policy then chases ghosts.
Tightening the consensus filter (`consensus_k=5`) and `wall_reject=0.9` were tried; both kept ~70% of frames at 10 active tracks. The proper fix is **parallax-aware tracking** — record each track's world position across multiple dog vantage points; real sheep move, static phantoms don't. Out of scope for the 2026-06-04 deadline.
## File map (what changed in this push)
```
herding/config.py mecanum presets keep matched
strafe scaling (strafe_eff=0.26,
bleed=-0.40) for kinematic injection
controllers/shepherd_dog/shepherd_dog.py
Supervisor() + drive_mecanum kinematic
injection via _self_node.setVelocity
protos/ShepherdDogMecanum.proto supervisor TRUE; physics tuning
protos/ShepherdDogMecanum360.proto reverted (ODE no longer load-bearing)
tools/gen_mecanum_wheels.py wheels regen-script (clean)
tools/run_webots.sh contact-properties comment cleaned
training/{bc/collect,rl/train}.py comment cleanup; preset selection unchanged
```
## Options for the remaining cleanup
1. **Keep matched preset (0.26, -0.40)**. Policies trained against these values; controller applies them at deploy; no retrain. *Current state*.
2. **Switch preset to textbook (1.0, 0.0) and retrain mecanum BC+RL** (~6h). Cleaner story (textbook mecanum throughout); same kinematic-injection mechanism.
Either is defensible. (1) ships faster; (2) is more "pure".
+49 -20
View File
@@ -266,18 +266,21 @@ class RobotConfig:
strafe_efficiency: float = 1.0 strafe_efficiency: float = 1.0
"""Mecanum strafe magnitude as a fraction of textbook X-pattern. """Mecanum strafe magnitude as a fraction of textbook X-pattern.
``1.0`` (default) = perfect mecanum kinematics. ``0.4`` matches the ``1.0`` (default) is the ideal kinematic mecanum. Values below 1
Webots roller-hinge mecanum proto calibration (62% slip on strafe, model strafe slip; the Webots controller reads the same value and
11% on forward). Used by ``mecanum_step`` only — has no applies it in the Supervisor velocity injection, so gym training
effect on differential drive. and Webots deployment see identical body motion. No effect on
differential drive.
""" """
strafe_to_forward_bleed: float = 0.0 strafe_to_forward_bleed: float = 0.0
"""Fraction of ideal strafe magnitude that bleeds into body-frame x. """Fraction of ideal strafe magnitude that bleeds into body-frame x.
``0.0`` (default) = no bleed. ``-0.28`` matches the Webots proto's ``0.0`` (default) = no bleed. Non-zero values add
consistent backward push under strafe commands. Used by ``strafe_to_forward_bleed * |vy_body_ideal|`` to ``vx_body`` to
``mecanum_step`` only. model the consistent forward (or backward) drift that some
mecanum chassis exhibit during pure-strafe commands. No effect on
differential drive.
""" """
def __post_init__(self) -> None: def __post_init__(self) -> None:
@@ -407,23 +410,49 @@ HERDING_MEC_WEBOTS = HerdingConfig(
), ),
robot=RobotConfig( robot=RobotConfig(
action_smooth=0.55, action_smooth=0.55,
strafe_efficiency=0.4, strafe_efficiency=0.26,
strafe_to_forward_bleed=-0.28, strafe_to_forward_bleed=-0.40,
), ),
) )
"""Webots-mecanum-matched training preset. """Mecanum + 140° LiDAR preset.
Same as HERDING_WEBOTS but with the gym mecanum kinematics scaled to Mirrors HERDING_WEBOTS but with mecanum-specific kinematic scaling
match the Webots roller-hinge mecanum proto: (``strafe_efficiency`` and ``strafe_to_forward_bleed``) applied to
* ``strafe_efficiency=0.4`` — strafing produces ~40% of textbook the gym forward-kinematics formula. The Webots controller reads
X-pattern lateral velocity in Webots; this matches the bias. these same values via ``RobotConfig`` and feeds them through the
* ``strafe_to_forward_bleed=-0.28`` — strafe commands bleed ~28% of Supervisor velocity injection, so gym and Webots produce identical
their magnitude into backward body motion in Webots. body motion. Diff-drive ignores both fields.
"""
Use this preset when training BC/RL for the mecanum drive so the HERDING_MEC_WEBOTS_360 = HerdingConfig(
policy learns to compensate for the imperfect physical mecanum. lidar=LIDAR_WEBOTS_360,
Differential drive ignores both parameters and behaves identically # Looser detection thresholds for the wider FOV — the 360° scan
to HERDING_WEBOTS. # catches far walls, gate posts and pen rails the 140° front cone
# never sees, so the cluster/feature filters need slightly more
# margin to keep promotion rates similar.
detection=DetectionConfig(wall_reject=0.6, static_reject=1.2),
tracker=TrackerConfig(
forget_steps=300,
max_new_tracks_per_step=2, # 360° gives more candidates per step
pen_latch_depth=3.0,
predict_steps=180,
consensus_k=3,
consensus_radius_m=0.3,
consensus_max_age=20,
),
robot=RobotConfig(
action_smooth=0.55,
strafe_efficiency=0.26,
strafe_to_forward_bleed=-0.40,
),
)
"""Mecanum + 360° LiDAR preset (the deployable mecanum target).
The 360° FOV gives the policy perception coverage in every direction,
which matches the omnidirectional motion the mecanum chassis can
produce. Used for both gym training and Webots deployment so the
trained policy sees the same observation geometry it will face at
deploy time.
""" """
"""Webots-matched training preset. """Webots-matched training preset.
+6 -1
View File
@@ -9,7 +9,7 @@ PROTO ShepherdDogMecanum [
field SFString controller "shepherd_dog" field SFString controller "shepherd_dog"
field MFString controllerArgs [] field MFString controllerArgs []
field SFString customData "" field SFString customData ""
field SFBool supervisor FALSE field SFBool supervisor TRUE
field SFBool synchronization TRUE field SFBool synchronization TRUE
] ]
{ {
@@ -1993,6 +1993,7 @@ PROTO ShepherdDogMecanum [
} }
} }
# ========== IMU SENSORS ========== # ========== IMU SENSORS ==========
Accelerometer { Accelerometer {
translation 0 0 0.10 translation 0 0 0.10
@@ -2052,6 +2053,10 @@ PROTO ShepherdDogMecanum [
} }
# ========== PHYSICS ========== # ========== PHYSICS ==========
# Chassis dynamics are not exercised in herding runs the
# controller drives the body kinematically via Supervisor
# setVelocity (see drive_mecanum in controllers/shepherd_dog).
# Webots-derived inertia from the bounding box is sufficient.
physics Physics { physics Physics {
density -1 density -1
mass 5.0 mass 5.0
File diff suppressed because it is too large Load Diff
+30 -19
View File
@@ -90,24 +90,36 @@ fi
cp "$SRC" "$DST" cp "$SRC" "$DST"
# LiDAR FOV variant: HERDING_LIDAR=140 (default) or 360 (ablation). # LiDAR FOV variant. For mecanum the default is 360° because the
# 360° is only supported for differential drive; the mecanum proto # physical-roller proto's passive yaw drift makes the 140° front
# always uses the 140° sensor matching ShepherdDog.proto. # cone unreliable (heading errors push detections out of the
LIDAR_VARIANT="${HERDING_LIDAR:-140}" # tracker's world-frame matching gates). Diff defaults to 140°
# matching the canonical ShepherdDog.proto.
if [[ -z "${HERDING_LIDAR:-}" ]]; then
if [[ "$DRIVE" == "mecanum" ]]; then
LIDAR_VARIANT="360"
else
LIDAR_VARIANT="140"
fi
else
LIDAR_VARIANT="$HERDING_LIDAR"
fi
if [[ "$LIDAR_VARIANT" != "140" && "$LIDAR_VARIANT" != "360" ]]; then if [[ "$LIDAR_VARIANT" != "140" && "$LIDAR_VARIANT" != "360" ]]; then
echo "HERDING_LIDAR must be 140 or 360, got '$LIDAR_VARIANT'" >&2; exit 1 echo "HERDING_LIDAR must be 140 or 360, got '$LIDAR_VARIANT'" >&2; exit 1
fi fi
if [[ "$LIDAR_VARIANT" == "360" && "$DRIVE" == "mecanum" ]]; then
echo "[run_webots] HERDING_LIDAR=360 not available for mecanum drive — falling back to 140." >&2
LIDAR_VARIANT="140"
fi
export HERDING_LIDAR="$LIDAR_VARIANT" export HERDING_LIDAR="$LIDAR_VARIANT"
# Swap robot proto based on drive mode + LiDAR variant. # Swap robot proto based on drive mode + LiDAR variant.
# Base worlds reference ShepherdDog (diff-drive 140°). For mecanum we # Base worlds reference ShepherdDog (diff-drive 140°). The four
# swap in ShepherdDogMecanum; for the 360° ablation we swap in # combinations the launcher supports:
# ShepherdDog360. # diff + 140° → ShepherdDog.proto (default)
if [[ "$DRIVE" == "mecanum" ]]; then # diff + 360° → ShepherdDog360.proto (FOV ablation for diff)
# mecanum+ 140° → ShepherdDogMecanum.proto
# mecanum+ 360° → ShepherdDogMecanum360.proto (the trained mecanum target)
if [[ "$DRIVE" == "mecanum" && "$LIDAR_VARIANT" == "360" ]]; then
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum360.proto"|' "$DST"
sed -i 's|^ShepherdDog {|ShepherdDogMecanum360 {|' "$DST"
elif [[ "$DRIVE" == "mecanum" ]]; then
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST" sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST" sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
elif [[ "$LIDAR_VARIANT" == "360" ]]; then elif [[ "$LIDAR_VARIANT" == "360" ]]; then
@@ -115,13 +127,12 @@ elif [[ "$LIDAR_VARIANT" == "360" ]]; then
sed -i 's|^ShepherdDog {|ShepherdDog360 {|' "$DST" sed -i 's|^ShepherdDog {|ShepherdDog360 {|' "$DST"
fi fi
if [[ "$DRIVE" == "mecanum" ]]; then if [[ "$DRIVE" == "mecanum" ]]; then
# Inject mecanum roller contact properties. The proto's rollers are # Wheel-ground friction. The chassis is driven kinematically by
# split into two contact materials so that we can keep the friction # the Supervisor (see drive_mecanum in controllers/shepherd_dog),
# axes oriented along each roller's free-spin direction — but with # so these properties only affect wheel-spin visuals, not the
# physical roller hinges (no longer plain cylinder wheels) the # robot's motion. coulombFriction 2.0 plus a small
# ground contact is via the capsules and standard friction works. # forceDependentSlip keeps the rollers from locking up against
# Slightly bumped coulombFriction keeps the rollers gripping during # the ground.
# mecanum strafing.
python3 -c " python3 -c "
with open('$DST', 'r') as f: with open('$DST', 'r') as f:
txt = f.read() txt = f.read()
+15 -8
View File
@@ -179,19 +179,26 @@ def main():
f"'{FIELD_SHAPE}'. This should not happen — file a bug.") f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
from herding.config import ( from herding.config import (
HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
DomainRandomConfig, RobotConfig, DomainRandomConfig, RobotConfig,
) )
if args.use_webots_preset: if args.use_webots_preset:
# Pick the drive-matched Webots preset — for mecanum we use the # Mecanum uses the 360° preset (the deployable mecanum target);
# variant that simulates the physical-roller proto's strafe # diff drive keeps the canonical 140° preset.
# efficiency and forward bleed so the policy trains under the if args.drive_mode == "mecanum":
# same imperfect kinematics it sees at deployment. base = HERDING_MEC_WEBOTS_360
base = HERDING_MEC_WEBOTS if args.drive_mode == "mecanum" else HERDING_WEBOTS preset_name = "HERDING_MEC_WEBOTS_360"
else:
base = HERDING_WEBOTS
preset_name = "HERDING_WEBOTS"
# Small compass noise for mecanum training (robustness margin
# for the Webots compass sensor).
compass_std = 0.1 if args.drive_mode == "mecanum" else 0.0
herding_cfg = base.replace( herding_cfg = base.replace(
domain_random=DomainRandomConfig( domain_random=DomainRandomConfig(
fp_rate=args.fp_rate, fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std, wheel_slip_std=args.wheel_slip_std,
compass_noise_std=compass_std,
), ),
robot=RobotConfig( robot=RobotConfig(
action_smooth=args.action_smooth, action_smooth=args.action_smooth,
@@ -199,10 +206,10 @@ def main():
strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed, strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed,
), ),
) )
preset_name = "HERDING_MEC_WEBOTS" if args.drive_mode == "mecanum" else "HERDING_WEBOTS"
print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} " print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} "
f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} " f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} "
f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f}") f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f} "
f"compass_noise={compass_std}")
else: else:
herding_cfg = None herding_cfg = None
if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0: if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+25 -9
View File
@@ -276,22 +276,37 @@ def main() -> None:
print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})") print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
from herding.config import ( from herding.config import (
HerdingConfig, HERDING_MEC_WEBOTS, DomainRandomConfig, RobotConfig, HerdingConfig, HERDING_MEC_WEBOTS_360, DomainRandomConfig, RobotConfig,
) )
herding_cfg = None herding_cfg = None
# When fine-tuning a mecanum policy we always apply the Webots # Mecanum always trains under HERDING_MEC_WEBOTS_360 (360° LiDAR +
# roller-hinge calibration to the gym kinematics (strafe efficiency # kinematic-matched strafe scaling + small compass-noise DR).
# and bleed). Without this, the RL agent updates against the
# textbook X-pattern and fails on deployment.
is_mecanum = (drive_mode == "mecanum") is_mecanum = (drive_mode == "mecanum")
if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0: if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
if is_mecanum: if is_mecanum:
base_robot = HERDING_MEC_WEBOTS.robot base = HERDING_MEC_WEBOTS_360
strafe_eff = base_robot.strafe_efficiency strafe_eff = base.robot.strafe_efficiency
strafe_bleed = base_robot.strafe_to_forward_bleed strafe_bleed = base.robot.strafe_to_forward_bleed
compass_std = 0.1 # heading robustness DR
else: else:
base = None
strafe_eff = 1.0 strafe_eff = 1.0
strafe_bleed = 0.0 strafe_bleed = 0.0
compass_std = 0.0
if is_mecanum:
herding_cfg = base.replace(
domain_random=DomainRandomConfig(
fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std,
compass_noise_std=compass_std,
),
robot=RobotConfig(
action_smooth=args.action_smooth,
strafe_efficiency=strafe_eff,
strafe_to_forward_bleed=strafe_bleed,
),
)
else:
herding_cfg = HerdingConfig( herding_cfg = HerdingConfig(
domain_random=DomainRandomConfig( domain_random=DomainRandomConfig(
fp_rate=args.fp_rate, fp_rate=args.fp_rate,
@@ -306,7 +321,8 @@ def main() -> None:
print(f"[rl] domain-random: fp_rate={args.fp_rate} " print(f"[rl] domain-random: fp_rate={args.fp_rate} "
f"action_smooth={args.action_smooth} " f"action_smooth={args.action_smooth} "
f"wheel_slip_std={args.wheel_slip_std} " f"wheel_slip_std={args.wheel_slip_std} "
f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f}") f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f} "
f"compass_noise={compass_std}")
env_fns = [_make_env(i, args.seed, frame_stack, drive_mode, env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
difficulty=args.difficulty, difficulty=args.difficulty,
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.