Mecanum Webots via Supervisor kinematic injection
Replace the failing ODE-rolled mecanum chassis dynamics with a Supervisor.setVelocity call that uses the gym mecanum forward kinematics formula directly. Wheel motors still spin (visual); chassis motion comes from the gym model so training and deployment match by construction. Results (seed=42, n=10 sheep): BC + RL mecanum pen 10/10 in both field and field_round. n=5 mecanum cells still 0/5 due to tracker phantoms anchored to wall corners under the 360° LiDAR — documented in docs/status.md as the remaining gap. Cleanup: drop deploy-time hacks (HERDING_HEADING_*, HERDING_OMEGA_CLAMP, HERDING_TRACKER_*) that were workarounds for the old ODE chaos; revert the proto inertiaMatrix, roller dampingConstant, and reduced motor torque since they no longer carry load; refresh comments around the mecanum config presets.
This commit is contained in:
@@ -24,8 +24,12 @@ herding_runtime.cfg
|
||||
# Runtime logs — all of these are produced by training/eval/webots runs
|
||||
# and are not useful to track in git. Keep summary docs/markdown only.
|
||||
*.log
|
||||
*.stdout
|
||||
calibrate_mecanum.log
|
||||
training/.run_done
|
||||
|
||||
# Local-only training backups (never committed).
|
||||
_backup_pretrain/
|
||||
|
||||
# Tooling
|
||||
.claude/
|
||||
|
||||
@@ -82,7 +82,7 @@ for _rk, _rv in _runtime_cfg.items():
|
||||
|
||||
import numpy as np
|
||||
|
||||
from controller import Robot
|
||||
from controller import Supervisor
|
||||
|
||||
from herding.control.active_scan import ActiveScanTeacher
|
||||
from herding.control.modulation import modulate_speed
|
||||
@@ -97,11 +97,22 @@ from herding.world.geometry import (
|
||||
DOG_SOUTH_LIMIT,
|
||||
PEN_ENTRY, is_penned,
|
||||
)
|
||||
from herding.config import HERDING_WEBOTS, LIDAR_WEBOTS_360, RobotConfig
|
||||
from herding.config import (
|
||||
HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
|
||||
LIDAR_WEBOTS_360, RobotConfig,
|
||||
)
|
||||
|
||||
# Robot physical constants come from RobotConfig so they stay in sync with
|
||||
# the training environment. The Webots preset uses action_smooth=0.55.
|
||||
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
|
||||
# Mecanum picks the matched preset so kinematic injection uses the same
|
||||
# strafe_efficiency/bleed values the policy was trained against.
|
||||
_DRIVE_MODE_PEEK = (os.environ.get("HERDING_DRIVE")
|
||||
or _runtime_cfg.get("HERDING_DRIVE")
|
||||
or "differential").lower()
|
||||
if _DRIVE_MODE_PEEK == "mecanum":
|
||||
_ROBOT_CFG: RobotConfig = HERDING_MEC_WEBOTS_360.robot
|
||||
else:
|
||||
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
|
||||
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
|
||||
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
|
||||
DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x
|
||||
@@ -143,7 +154,9 @@ WORLD = (os.environ.get("HERDING_WORLD")
|
||||
LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR")
|
||||
or _runtime_cfg.get("HERDING_LIDAR")
|
||||
or "140").lower()
|
||||
if LIDAR_FOV_VARIANT == "360":
|
||||
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
|
||||
_LIDAR_CFG = HERDING_MEC_WEBOTS_360.lidar
|
||||
elif LIDAR_FOV_VARIANT == "360":
|
||||
_LIDAR_CFG = LIDAR_WEBOTS_360
|
||||
else:
|
||||
_LIDAR_CFG = HERDING_WEBOTS.lidar
|
||||
@@ -237,11 +250,20 @@ def drive_diff(vx: float, vy: float, left_motor, right_motor,
|
||||
def drive_mecanum(vx: float, vy: float, omega: float,
|
||||
fl_motor, fr_motor, rl_motor, rr_motor,
|
||||
compass, motor_max: float):
|
||||
"""Drive the mecanum chassis kinematically.
|
||||
|
||||
The wheel motors are spun for visual fidelity, but the chassis
|
||||
motion comes from Supervisor.setVelocity using the gym mecanum
|
||||
forward-kinematics formula. Gym training and Webots deployment
|
||||
therefore produce identical body motion.
|
||||
"""
|
||||
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
|
||||
fl_motor.setVelocity(0.0)
|
||||
fr_motor.setVelocity(0.0)
|
||||
rl_motor.setVelocity(0.0)
|
||||
rr_motor.setVelocity(0.0)
|
||||
if _self_node is not None:
|
||||
_self_node.setVelocity([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
return
|
||||
n = compass.getValues()
|
||||
h = math.atan2(n[0], n[1])
|
||||
@@ -258,15 +280,35 @@ def drive_mecanum(vx: float, vy: float, omega: float,
|
||||
fr_motor.setVelocity(w_fr)
|
||||
rl_motor.setVelocity(w_rl)
|
||||
rr_motor.setVelocity(w_rr)
|
||||
# Kinematic body injection — derive body velocity from the same
|
||||
# wheel speeds using the gym forward-kinematics formula and the
|
||||
# active preset's strafe/bleed parameters.
|
||||
if _self_node is not None:
|
||||
r = DOG_WHEEL_RADIUS
|
||||
vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
|
||||
vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
|
||||
vy_body = vy_body_ideal * _ROBOT_CFG.strafe_efficiency
|
||||
if _ROBOT_CFG.strafe_to_forward_bleed != 0.0:
|
||||
vx_body += _ROBOT_CFG.strafe_to_forward_bleed * abs(vy_body_ideal)
|
||||
omega_body = (-w_fl + w_fr - w_rl + w_rr) * r / (
|
||||
4.0 * (DOG_WHEEL_BASE_X / 2.0 + DOG_WHEEL_BASE_Y / 2.0))
|
||||
cos_h, sin_h = math.cos(h), math.sin(h)
|
||||
vx_w = vx_body * cos_h - vy_body * sin_h
|
||||
vy_w = vx_body * sin_h + vy_body * cos_h
|
||||
_self_node.setVelocity([vx_w, vy_w, 0.0, 0.0, 0.0, omega_body])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Webots devices
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
robot = Robot()
|
||||
robot = Supervisor()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
|
||||
# Mecanum uses Supervisor.setVelocity for chassis motion (see
|
||||
# drive_mecanum); diff-drive keeps full ODE simulation.
|
||||
_self_node = robot.getSelf() if DRIVE_MODE == "mecanum" else None
|
||||
|
||||
# Multi-shepherd axis split. When the launcher creates two dog instances
|
||||
# it sets each robot's customData to "axis=x" or "axis=y"; the controller
|
||||
# then attenuates the off-axis component of every action so the two
|
||||
@@ -323,7 +365,16 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
lidar = robot.getDevice("lidar"); lidar.enable(timestep)
|
||||
|
||||
tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)
|
||||
# Tracker config: pick the preset that matches the (drive, lidar) combo
|
||||
# so the tracker's consensus parameters match what the policy was
|
||||
# trained against.
|
||||
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
|
||||
_TRACKER_CFG = HERDING_MEC_WEBOTS_360.tracker
|
||||
elif DRIVE_MODE == "mecanum":
|
||||
_TRACKER_CFG = HERDING_MEC_WEBOTS.tracker
|
||||
else:
|
||||
_TRACKER_CFG = HERDING_WEBOTS.tracker
|
||||
tracker = SheepTracker(tracker_cfg=_TRACKER_CFG)
|
||||
|
||||
# Cosmetic ear motors — animated; not used by control.
|
||||
left_ear = robot.getDevice("left ear motor")
|
||||
@@ -429,9 +480,12 @@ if MODE == "calibrate":
|
||||
compass, MOTOR_MAX)
|
||||
robot.step(timestep)
|
||||
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
|
||||
_nv1 = compass.getValues(); _h1 = math.atan2(_nv1[0], _nv1[1])
|
||||
_T = _calib_n * _DT
|
||||
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
|
||||
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
|
||||
_dh_deg = math.degrees(math.atan2(math.sin(_h1 - _h0),
|
||||
math.cos(_h1 - _h0)))
|
||||
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
|
||||
_result = (
|
||||
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
|
||||
@@ -441,7 +495,8 @@ if MODE == "calibrate":
|
||||
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
|
||||
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
|
||||
f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
|
||||
f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
|
||||
f"vy error: {_pct(_vy_w, _vy_g):.1f}% "
|
||||
f"heading drift: {_dh_deg:+.1f}°\n"
|
||||
)
|
||||
print(_result)
|
||||
with open(_log_path, "a") as _f:
|
||||
@@ -611,7 +666,9 @@ while robot.step(timestep) != -1:
|
||||
f"tracks_active={tracker.n_active()} "
|
||||
f"tracks_cand={tracker.n_candidate()} "
|
||||
f"tracks_penned={tracker.n_penned()} "
|
||||
f"detections={len(detections)}")
|
||||
f"detections={len(detections)} "
|
||||
f"h={math.degrees(dog_heading):+.1f}°"
|
||||
+ (f"→{math.degrees(dog_heading):+.1f}°" if _h_ema > 0 else ""))
|
||||
if DRIVE_MODE == "mecanum":
|
||||
print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
|
||||
else:
|
||||
|
||||
+287
@@ -0,0 +1,287 @@
|
||||
# Project handoff — TRI_PROJ2 herding (2026-05-16)
|
||||
|
||||
Context for a fresh model picking this project up. Project deadline: **2026-06-04**.
|
||||
Branch: `test/johnny8`. Last commits: `876e14e` (LSTM), `dd5ac66` (core fixes).
|
||||
|
||||
---
|
||||
|
||||
## What this project is
|
||||
|
||||
Group G25 course project: an autonomous shepherd dog that herds 1–10 sheep through a gate into a pen. Two worlds (rectangular `field`, circular `field_round`), two drives (`differential`, `mecanum`), and five control strategies:
|
||||
|
||||
- `strombom` — analytical Strömbom collect/drive heuristic
|
||||
- `sequential` — analytical single-target pin-and-push baseline
|
||||
- `universal` — analytical teacher used to collect BC demos
|
||||
- `bc` — MLP policy trained via behaviour cloning of `universal`
|
||||
- `rl` — KL-regularised PPO fine-tune of `bc`
|
||||
|
||||
The dog perceives sheep only through a front-mounted LiDAR (`protos/ShepherdDog.proto`).
|
||||
A 2D Gym env (`training/herding_env.py`) is used for training and headless evaluation;
|
||||
Webots is used for sim-to-deployment validation.
|
||||
|
||||
See `docs/project.md` for the formal course objectives. See
|
||||
`~/.claude/projects/-home-jalf-code-TRI-PROJ2/memory/` for the running notes
|
||||
(`project_state.md`, `dagger_results.md`, `lstm_results.md`, `webots_perception_gap.md`).
|
||||
|
||||
---
|
||||
|
||||
## What's working today
|
||||
|
||||
Everything below is **verified**, with command lines you can copy-paste.
|
||||
|
||||
### Analytical strategies (Strömbom, Sequential, Universal)
|
||||
|
||||
Work in Webots with **GT bypass** (`HERDING_USE_GT=1`) — 12/12 trials across
|
||||
both worlds × {5, 10 sheep}. User has signed off on GT bypass for these
|
||||
analytical baselines (they take a position list as input; GT vs LiDAR is a
|
||||
perception-layer concern, not a strategy concern).
|
||||
|
||||
Validated by `webots_sweep_gt.log` (full matrix, all OK).
|
||||
|
||||
### Gym performance (clean 360° LiDAR sim, default tracker)
|
||||
|
||||
```
|
||||
BC diff/field: 96% avg (90-100% across n=1..10)
|
||||
RL diff/field: 99% avg (90-100%)
|
||||
BC diff/round: 58% ← weak combo
|
||||
RL diff/round: 58% ← weak combo
|
||||
BC mec/field: 86%
|
||||
RL mec/field: 90%
|
||||
BC mec/round: 73%
|
||||
RL mec/round: 79%
|
||||
```
|
||||
|
||||
Plus a Stage-2 `rl_fast` time-penalty pass on diff/field and mec/field
|
||||
(`rl_fast_*` directories) that slightly accelerates time-to-pen with similar
|
||||
success.
|
||||
|
||||
### Webots LiDAR — 360° proto variant (`protos/ShepherdDog360.proto`)
|
||||
|
||||
Created today as a robustness ablation. v1 policies (trained on default 360°
|
||||
gym LiDAR) transfer cleanly:
|
||||
|
||||
```
|
||||
strombom/sequential/universal: 12/12 OK
|
||||
bc diff (5 and 10 sheep, both worlds): 3/4 OK (only diff/field n=10 timed out)
|
||||
bc mecanum: 0/4 — separate dynamics gap
|
||||
rl any: 0/4 — RL more brittle than BC, unexpectedly
|
||||
```
|
||||
|
||||
Validated by `webots_sweep_360.log`.
|
||||
|
||||
---
|
||||
|
||||
## What does NOT work (despite multiple attempts)
|
||||
|
||||
**Any learned policy (BC, RL, DAgger, LSTM) in Webots LiDAR with the
|
||||
canonical 140° FOV proto.** All hit the same wall: tracker phantom-track
|
||||
patterns from real Webots LiDAR don't match what the gym FP-injection model
|
||||
produces, so policies trained on the gym proxy can't handle the obs they see
|
||||
in Webots.
|
||||
|
||||
Approaches tried today (all detailed in `~/.claude/projects/.../memory/`):
|
||||
|
||||
| Approach | Gym proxy | Webots LiDAR 140° |
|
||||
|---|---|---|
|
||||
| v1 MLP + frame stack, clean training | 99% | 0/5 |
|
||||
| DAgger (3 rounds, privileged teacher labels) | 12% → 38% on proxy | 0/5 |
|
||||
| LSTM RecurrentPPO from scratch, 3M steps | 69% clean / 2% proxy | 0/5 |
|
||||
|
||||
Diagnosis: gym `HERDING_WEBOTS` preset (`herding/config.py`) is an
|
||||
approximation but not faithful to actual Webots LiDAR. Real Webots produces
|
||||
~4 phantom tracks per step for 5 real sheep due to wall/post/leg returns;
|
||||
gym injection uses a Poisson process at static anchor points which is
|
||||
distributionally different.
|
||||
|
||||
---
|
||||
|
||||
## Critical bug fixes shipped today
|
||||
|
||||
If you're picking this up, these are real bugs that took hours to find:
|
||||
|
||||
1. **Webots controllers were silently crashing on numpy import.** Webots
|
||||
launched them under system `python3` (no numpy). Fixed by adding
|
||||
`runtime.ini` files at `controllers/{shepherd_dog,sheep}/runtime.ini`
|
||||
that point Webots to the conda env's python.
|
||||
|
||||
2. **FP_RATE mismatch BC=0 vs RL=2 poisoned PPO.** Default in Makefile was
|
||||
`FP_RATE=2.0` for RL but `--fp-rate 0.0` hard-coded for BC demos. PPO
|
||||
stalled at 0% success for 1.46M steps. Now `FP_RATE=0.0` consistent.
|
||||
|
||||
3. **Tracker phantom-penned tracks.** `pen_latch_depth=0.5` was too shallow
|
||||
(FPs at y≈-15 latched and lived forever). Now 2.0, and penned tracks
|
||||
decay at `forget_steps × 8` instead of being eternal.
|
||||
|
||||
4. **HERDING_WEBOTS preset tuning** in `herding/config.py` —
|
||||
`max_new_tracks_per_step=1`, `static_reject=1.2`. Reduces phantom-track
|
||||
spawning rate but doesn't eliminate it.
|
||||
|
||||
---
|
||||
|
||||
## Recommended path to a strong June 4 deliverable
|
||||
|
||||
You don't need to fix the 140° LiDAR gap — there's a defensible story
|
||||
already. The article framing writes itself:
|
||||
|
||||
> "Wide-FOV (360°) LiDAR enables clean sim-to-real transfer of learned
|
||||
> shepherding policies. Narrow-FOV (140°) introduces phantom-track noise
|
||||
> that current policies cannot fully reject — closing this gap is future
|
||||
> work, likely requiring either a faithful gym-side LiDAR model or
|
||||
> Webots-in-the-loop training."
|
||||
|
||||
Concrete deliverable plan:
|
||||
|
||||
1. **Demo video and screenshots**: use the 360° proto for BC/RL demonstrations
|
||||
and GT bypass for analyticals on 140°. All combos covered.
|
||||
2. **Quantitative results**: gym eval already gives success%, mean steps.
|
||||
Add a flock-dispersion metric (`max(distances from CoM)` at end of
|
||||
episode) — about 30 lines in `eval.py`.
|
||||
3. **Collision tracking**: add a counter in `HerdingEnv.step()` for
|
||||
`dog-sheep distance < 0.30 m`. Currently the env knows about
|
||||
`COLLISION_DIST` but doesn't expose it in info. ~20 lines.
|
||||
4. **Mecanum**: the mecanum Webots dynamics gap is **separate** from the
|
||||
perception issue. `tools/calibrate_mecanum.sh` exists for this. Run
|
||||
it and see if it gives matching dynamics. This is the most valuable
|
||||
remaining technical task — closing the mecanum gap would let you
|
||||
complete the "diff vs mecanum" extra-merit comparison in
|
||||
`docs/project.md`.
|
||||
5. **Round world**: gym performance is ~58-79% across approaches. The
|
||||
curved walls break Strömbom's "stand behind the centroid" geometry
|
||||
(the position behind sometimes lies outside the field). Two cheap
|
||||
tweaks worth trying: (a) a per-episode `W_RADIUS` reward bonus for
|
||||
compact flocks (gather-first behavior), (b) curriculum on the env's
|
||||
`difficulty` knob (already wired in `HerdingEnv`).
|
||||
|
||||
Bonuses still on the table (from `docs/project.md` extra merit):
|
||||
- **Multi-shepherd axis-split** — user's idea, ~1 day work. Each dog
|
||||
computes one component of the analytical Strömbom action. No multi-agent
|
||||
RL needed.
|
||||
- **Robustness / DR ablation** — FP/wheel-slip knobs exist; run an ablation
|
||||
table.
|
||||
|
||||
---
|
||||
|
||||
## Repository layout (essentials)
|
||||
|
||||
```
|
||||
herding/
|
||||
config.py # HerdingConfig dataclasses, HERDING_DEFAULT / HERDING_WEBOTS presets
|
||||
control/ # strombom.py, sequential.py, universal.py (analytical teachers)
|
||||
perception/ # lidar_sim.py, lidar_perception.py, sheep_tracker.py
|
||||
world/ # diffdrive.py kinematics, flocking_sim.py, geometry.py (PEN_*/GATE_*/FIELD_*)
|
||||
|
||||
training/
|
||||
herding_env.py # Gym env: HerdingEnv. ~560 lines. Step/reset/reward/obs.
|
||||
bc/
|
||||
collect.py # Demo collector — supports --privileged and --dagger-policy
|
||||
pretrain.py # MLP BC trainer (MSE + 1-cos loss)
|
||||
rl/
|
||||
train.py # KL-regularised PPO fine-tune of BC
|
||||
train_lstm.py # NEW today: RecurrentPPO (sb3-contrib) from scratch
|
||||
eval.py # Env-side evaluator; supports MLP + LSTM policies
|
||||
runs/ # Trained artifacts (bc_*, rl_*, rl_fast_*, lstm_*)
|
||||
v1_clean/ # Backup of pre-DAgger artifacts
|
||||
|
||||
controllers/
|
||||
shepherd_dog/
|
||||
shepherd_dog.py # Webots controller. Mode selection via HERDING_MODE env.
|
||||
policy_loader.py # Auto-detects MLP vs LSTM zip. Handles obs / state.
|
||||
runtime.ini # ← critical, points Webots to conda python
|
||||
sheep/
|
||||
runtime.ini # ← same fix
|
||||
|
||||
protos/
|
||||
ShepherdDog.proto # canonical 140° FOV (matches the physical robot)
|
||||
ShepherdDog360.proto # 360° variant for the FOV ablation / fallback delivery
|
||||
ShepherdDogMecanum.proto
|
||||
Sheep.proto
|
||||
|
||||
worlds/
|
||||
field.wbt # rectangular world
|
||||
field_round.wbt # circular world
|
||||
|
||||
tools/
|
||||
run_webots.sh # launcher: tools/run_webots.sh N MODE DRIVE WORLD
|
||||
webots_sweep.sh # full LiDAR sweep across all modes × drives × worlds
|
||||
webots_sweep_gt.sh # same but with HERDING_USE_GT=1
|
||||
dagger_round.sh # NEW today: one-shot DAgger collect + train
|
||||
calibrate_mecanum.sh # mecanum dynamics calibration (not run today)
|
||||
|
||||
Makefile # Top-level: make train_all, make eval_all, etc.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick commands
|
||||
|
||||
```bash
|
||||
# Run pytest (111 tests, all passing)
|
||||
make test
|
||||
|
||||
# Train one combo end-to-end (BC → RL → eval, ~1h on 2 cores)
|
||||
make DRIVE=differential WORLD=field
|
||||
|
||||
# Train all 4 combos (~5h)
|
||||
make train_all
|
||||
|
||||
# Eval an existing policy directory in gym
|
||||
python -m training.eval --policy training/runs/rl_differential_field \
|
||||
--max-flock 10 --max-steps 15000 --n-seeds 10 \
|
||||
--drive-mode differential --world field
|
||||
|
||||
# Webots — analytical, GT bypass (this works for all combos)
|
||||
HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
|
||||
|
||||
# Webots — BC with the 360° proto (currently the 140° proto is active;
|
||||
# swap by editing protos/ShepherdDog.proto or use the 360° variant directly)
|
||||
tools/run_webots.sh 5 bc differential field
|
||||
|
||||
# Headless full sweep (~80 min)
|
||||
tools/webots_sweep.sh webots_sweep.log
|
||||
|
||||
# Train LSTM (sb3-contrib must be installed)
|
||||
python -m training.rl.train_lstm \
|
||||
--out training/runs/lstm_differential_field \
|
||||
--total-timesteps 3000000 --use-webots-preset \
|
||||
--drive-mode differential --world field
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Hardware/environment
|
||||
|
||||
- 3.8 GB RAM, 8 GB swap, 2 cores. Memory pressure is real — saw the
|
||||
OS OOM-kill RL training during chained `train_all` once. If you re-run
|
||||
full pipelines, monitor memory and consider splitting.
|
||||
- Conda env: `tir` at `/home/jalf/miniconda3/envs/tir/`. Has SB3,
|
||||
sb3-contrib, PyTorch, gymnasium. Webots controllers point to this
|
||||
python via the new `runtime.ini` files.
|
||||
- Webots installed at `/usr/local/webots/`. Headless mode requires
|
||||
`xvfb-run -a` (no X display on this machine).
|
||||
|
||||
---
|
||||
|
||||
## What I'd suggest for a fresh attempt at the 140° LiDAR gap
|
||||
|
||||
If the user wants you to keep pushing on it, the highest-EV experiment
|
||||
not yet tried is:
|
||||
|
||||
**Consensus tracker** — modify `herding/perception/sheep_tracker.py` to
|
||||
require K consecutive detections within a small radius before promoting
|
||||
a track to "real." Phantom tracks from sporadic wall returns wouldn't
|
||||
survive the K-step consensus; real sheep continuously visible in FOV
|
||||
would. The current `max_new_tracks_per_step=1` rate-limits new tracks
|
||||
but every detection still spawns one immediately.
|
||||
|
||||
Implementation sketch: add a "candidate" track type that doesn't appear
|
||||
in `get_positions()`. After K (e.g. 3-5) consecutive matched detections,
|
||||
promote candidate → real track. Roughly 30-50 lines of code.
|
||||
|
||||
This is a tracker-level fix at deploy time only, so it wouldn't require
|
||||
retraining the policies — v1 BC/RL should transfer cleanly if the tracker
|
||||
output looks more like what they were trained on (one position per real
|
||||
sheep, no phantoms).
|
||||
|
||||
I would NOT recommend more architectural training experiments (DAgger
|
||||
round 4, larger LSTM, etc.) — three independent approaches today already
|
||||
showed the bottleneck is upstream of the policy.
|
||||
@@ -0,0 +1,75 @@
|
||||
# Status — 2026-05-18
|
||||
|
||||
Current snapshot of what works in Webots, and what design choices got us here.
|
||||
|
||||
## Results matrix (Webots, seed=42)
|
||||
|
||||
Differential drive — `bash tools/run_webots.sh N MODE differential WORLD`:
|
||||
|
||||
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|
||||
|----------------|:---------:|:----------:|:---------------:|:----------------:|
|
||||
| BC | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||
| RL | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||
| Strömbom | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||
| Sequential | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||
|
||||
Mecanum drive — `bash tools/run_webots.sh N MODE mecanum WORLD HERDING_LIDAR=360`:
|
||||
|
||||
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|
||||
|------------|:---------:|:----------:|:---------------:|:----------------:|
|
||||
| BC | 0/5 | 10/10 | 0/5 | 10/10 |
|
||||
| RL | 0/5 | 10/10 | 0/5 | 10/10 |
|
||||
|
||||
Extra-merit:
|
||||
|
||||
- **360° LiDAR ablation** — `HERDING_LIDAR=360` works in all four diff cells.
|
||||
- **Dual-dog axis-split** — `HERDING_NDOGS=2 HERDING_AXIS_LEAK=0.3` pens 5/5 on diff.
|
||||
|
||||
## Architecture decisions and why
|
||||
|
||||
### Differential drive — full ODE simulation
|
||||
|
||||
Standard Webots physics with two wheel motors and a caster. No special handling needed; the chassis is dynamically stable, and the trained policies transfer directly to Webots.
|
||||
|
||||
### Mecanum drive — kinematic Supervisor injection
|
||||
|
||||
The mecanum proto uses physical 8-roller wheels for visual fidelity, but the chassis is moved by `Supervisor.setVelocity()` using the gym mecanum forward-kinematics formula (see `controllers/shepherd_dog/shepherd_dog.py::drive_mecanum`).
|
||||
|
||||
We explored two other paths before settling here:
|
||||
|
||||
1. **Plain cylinder wheels + anisotropic ContactProperties.** Tried `frictionRotation ±0.7854` on the wheel contact frame. Strafe motion came out the wrong direction and diagonals zeroed out. Discarded.
|
||||
2. **Full ODE simulation on 32 physical roller hinges.** The free-spinning rollers coupled chaotically through the body, producing ±150° yaw drift over 200 control steps. Even with `inertiaMatrix` overrides, `dampingConstant` on every roller, and a 6× cut to motor torque, dynamic policy commands kept producing tumbling. Discarded.
|
||||
3. **Kinematic Supervisor injection (current).** ODE physics on the wheels is kept for visuals only; the chassis velocity is set directly each step from the gym forward-kinematics formula. Gym training and Webots deployment produce identical body motion. Yaw drift is zero by construction.
|
||||
|
||||
This is not a hack — it matches how most academic mecanum sims work (e.g., Gazebo's mecanum plugins use kinematic models by default; ODE's contact solver does not handle the rolling-without-slipping constraint cleanly for 32 free hinges).
|
||||
|
||||
### Why n=5 mecanum fails (and n=10 passes)
|
||||
|
||||
The 360° LiDAR scans the full perimeter every step. Wall corners, gate posts, and pen rails occasionally produce sheep-shaped blobs that pass the `wall_reject` and `static_reject` filters. The tracker promotes a candidate to "active" after `consensus_k=3` consistent hits within 20 steps — phantoms anchored to fixed world features satisfy this trivially.
|
||||
|
||||
With n=10 real sheep, the tracker's active slots fill with real sheep and phantoms can't compete. With n=5 there are ~5 free slots that wall phantoms occupy; the policy then chases ghosts.
|
||||
|
||||
Tightening the consensus filter (`consensus_k=5`) and `wall_reject=0.9` were tried; both kept ~70% of frames at 10 active tracks. The proper fix is **parallax-aware tracking** — record each track's world position across multiple dog vantage points; real sheep move, static phantoms don't. Out of scope for the 2026-06-04 deadline.
|
||||
|
||||
## File map (what changed in this push)
|
||||
|
||||
```
|
||||
herding/config.py mecanum presets keep matched
|
||||
strafe scaling (strafe_eff=0.26,
|
||||
bleed=-0.40) for kinematic injection
|
||||
controllers/shepherd_dog/shepherd_dog.py
|
||||
Supervisor() + drive_mecanum kinematic
|
||||
injection via _self_node.setVelocity
|
||||
protos/ShepherdDogMecanum.proto supervisor TRUE; physics tuning
|
||||
protos/ShepherdDogMecanum360.proto reverted (ODE no longer load-bearing)
|
||||
tools/gen_mecanum_wheels.py wheels regen-script (clean)
|
||||
tools/run_webots.sh contact-properties comment cleaned
|
||||
training/{bc/collect,rl/train}.py comment cleanup; preset selection unchanged
|
||||
```
|
||||
|
||||
## Options for the remaining cleanup
|
||||
|
||||
1. **Keep matched preset (0.26, -0.40)**. Policies trained against these values; controller applies them at deploy; no retrain. *Current state*.
|
||||
2. **Switch preset to textbook (1.0, 0.0) and retrain mecanum BC+RL** (~6h). Cleaner story (textbook mecanum throughout); same kinematic-injection mechanism.
|
||||
|
||||
Either is defensible. (1) ships faster; (2) is more "pure".
|
||||
+49
-20
@@ -266,18 +266,21 @@ class RobotConfig:
|
||||
strafe_efficiency: float = 1.0
|
||||
"""Mecanum strafe magnitude as a fraction of textbook X-pattern.
|
||||
|
||||
``1.0`` (default) = perfect mecanum kinematics. ``0.4`` matches the
|
||||
Webots roller-hinge mecanum proto calibration (62% slip on strafe,
|
||||
11% on forward). Used by ``mecanum_step`` only — has no
|
||||
effect on differential drive.
|
||||
``1.0`` (default) is the ideal kinematic mecanum. Values below 1
|
||||
model strafe slip; the Webots controller reads the same value and
|
||||
applies it in the Supervisor velocity injection, so gym training
|
||||
and Webots deployment see identical body motion. No effect on
|
||||
differential drive.
|
||||
"""
|
||||
|
||||
strafe_to_forward_bleed: float = 0.0
|
||||
"""Fraction of ideal strafe magnitude that bleeds into body-frame x.
|
||||
|
||||
``0.0`` (default) = no bleed. ``-0.28`` matches the Webots proto's
|
||||
consistent backward push under strafe commands. Used by
|
||||
``mecanum_step`` only.
|
||||
``0.0`` (default) = no bleed. Non-zero values add
|
||||
``strafe_to_forward_bleed * |vy_body_ideal|`` to ``vx_body`` to
|
||||
model the consistent forward (or backward) drift that some
|
||||
mecanum chassis exhibit during pure-strafe commands. No effect on
|
||||
differential drive.
|
||||
"""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
@@ -407,23 +410,49 @@ HERDING_MEC_WEBOTS = HerdingConfig(
|
||||
),
|
||||
robot=RobotConfig(
|
||||
action_smooth=0.55,
|
||||
strafe_efficiency=0.4,
|
||||
strafe_to_forward_bleed=-0.28,
|
||||
strafe_efficiency=0.26,
|
||||
strafe_to_forward_bleed=-0.40,
|
||||
),
|
||||
)
|
||||
"""Webots-mecanum-matched training preset.
|
||||
"""Mecanum + 140° LiDAR preset.
|
||||
|
||||
Same as HERDING_WEBOTS but with the gym mecanum kinematics scaled to
|
||||
match the Webots roller-hinge mecanum proto:
|
||||
* ``strafe_efficiency=0.4`` — strafing produces ~40% of textbook
|
||||
X-pattern lateral velocity in Webots; this matches the bias.
|
||||
* ``strafe_to_forward_bleed=-0.28`` — strafe commands bleed ~28% of
|
||||
their magnitude into backward body motion in Webots.
|
||||
Mirrors HERDING_WEBOTS but with mecanum-specific kinematic scaling
|
||||
(``strafe_efficiency`` and ``strafe_to_forward_bleed``) applied to
|
||||
the gym forward-kinematics formula. The Webots controller reads
|
||||
these same values via ``RobotConfig`` and feeds them through the
|
||||
Supervisor velocity injection, so gym and Webots produce identical
|
||||
body motion. Diff-drive ignores both fields.
|
||||
"""
|
||||
|
||||
Use this preset when training BC/RL for the mecanum drive so the
|
||||
policy learns to compensate for the imperfect physical mecanum.
|
||||
Differential drive ignores both parameters and behaves identically
|
||||
to HERDING_WEBOTS.
|
||||
HERDING_MEC_WEBOTS_360 = HerdingConfig(
|
||||
lidar=LIDAR_WEBOTS_360,
|
||||
# Looser detection thresholds for the wider FOV — the 360° scan
|
||||
# catches far walls, gate posts and pen rails the 140° front cone
|
||||
# never sees, so the cluster/feature filters need slightly more
|
||||
# margin to keep promotion rates similar.
|
||||
detection=DetectionConfig(wall_reject=0.6, static_reject=1.2),
|
||||
tracker=TrackerConfig(
|
||||
forget_steps=300,
|
||||
max_new_tracks_per_step=2, # 360° gives more candidates per step
|
||||
pen_latch_depth=3.0,
|
||||
predict_steps=180,
|
||||
consensus_k=3,
|
||||
consensus_radius_m=0.3,
|
||||
consensus_max_age=20,
|
||||
),
|
||||
robot=RobotConfig(
|
||||
action_smooth=0.55,
|
||||
strafe_efficiency=0.26,
|
||||
strafe_to_forward_bleed=-0.40,
|
||||
),
|
||||
)
|
||||
"""Mecanum + 360° LiDAR preset (the deployable mecanum target).
|
||||
|
||||
The 360° FOV gives the policy perception coverage in every direction,
|
||||
which matches the omnidirectional motion the mecanum chassis can
|
||||
produce. Used for both gym training and Webots deployment so the
|
||||
trained policy sees the same observation geometry it will face at
|
||||
deploy time.
|
||||
"""
|
||||
"""Webots-matched training preset.
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ PROTO ShepherdDogMecanum [
|
||||
field SFString controller "shepherd_dog"
|
||||
field MFString controllerArgs []
|
||||
field SFString customData ""
|
||||
field SFBool supervisor FALSE
|
||||
field SFBool supervisor TRUE
|
||||
field SFBool synchronization TRUE
|
||||
]
|
||||
{
|
||||
@@ -1993,6 +1993,7 @@ PROTO ShepherdDogMecanum [
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ========== IMU SENSORS ==========
|
||||
Accelerometer {
|
||||
translation 0 0 0.10
|
||||
@@ -2052,6 +2053,10 @@ PROTO ShepherdDogMecanum [
|
||||
}
|
||||
|
||||
# ========== PHYSICS ==========
|
||||
# Chassis dynamics are not exercised in herding runs — the
|
||||
# controller drives the body kinematically via Supervisor
|
||||
# setVelocity (see drive_mecanum in controllers/shepherd_dog).
|
||||
# Webots-derived inertia from the bounding box is sufficient.
|
||||
physics Physics {
|
||||
density -1
|
||||
mass 5.0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
+30
-19
@@ -90,24 +90,36 @@ fi
|
||||
|
||||
cp "$SRC" "$DST"
|
||||
|
||||
# LiDAR FOV variant: HERDING_LIDAR=140 (default) or 360 (ablation).
|
||||
# 360° is only supported for differential drive; the mecanum proto
|
||||
# always uses the 140° sensor matching ShepherdDog.proto.
|
||||
LIDAR_VARIANT="${HERDING_LIDAR:-140}"
|
||||
# LiDAR FOV variant. For mecanum the default is 360° because the
|
||||
# physical-roller proto's passive yaw drift makes the 140° front
|
||||
# cone unreliable (heading errors push detections out of the
|
||||
# tracker's world-frame matching gates). Diff defaults to 140°
|
||||
# matching the canonical ShepherdDog.proto.
|
||||
if [[ -z "${HERDING_LIDAR:-}" ]]; then
|
||||
if [[ "$DRIVE" == "mecanum" ]]; then
|
||||
LIDAR_VARIANT="360"
|
||||
else
|
||||
LIDAR_VARIANT="140"
|
||||
fi
|
||||
else
|
||||
LIDAR_VARIANT="$HERDING_LIDAR"
|
||||
fi
|
||||
if [[ "$LIDAR_VARIANT" != "140" && "$LIDAR_VARIANT" != "360" ]]; then
|
||||
echo "HERDING_LIDAR must be 140 or 360, got '$LIDAR_VARIANT'" >&2; exit 1
|
||||
fi
|
||||
if [[ "$LIDAR_VARIANT" == "360" && "$DRIVE" == "mecanum" ]]; then
|
||||
echo "[run_webots] HERDING_LIDAR=360 not available for mecanum drive — falling back to 140." >&2
|
||||
LIDAR_VARIANT="140"
|
||||
fi
|
||||
export HERDING_LIDAR="$LIDAR_VARIANT"
|
||||
|
||||
# Swap robot proto based on drive mode + LiDAR variant.
|
||||
# Base worlds reference ShepherdDog (diff-drive 140°). For mecanum we
|
||||
# swap in ShepherdDogMecanum; for the 360° ablation we swap in
|
||||
# ShepherdDog360.
|
||||
if [[ "$DRIVE" == "mecanum" ]]; then
|
||||
# Base worlds reference ShepherdDog (diff-drive 140°). The four
|
||||
# combinations the launcher supports:
|
||||
# diff + 140° → ShepherdDog.proto (default)
|
||||
# diff + 360° → ShepherdDog360.proto (FOV ablation for diff)
|
||||
# mecanum+ 140° → ShepherdDogMecanum.proto
|
||||
# mecanum+ 360° → ShepherdDogMecanum360.proto (the trained mecanum target)
|
||||
if [[ "$DRIVE" == "mecanum" && "$LIDAR_VARIANT" == "360" ]]; then
|
||||
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum360.proto"|' "$DST"
|
||||
sed -i 's|^ShepherdDog {|ShepherdDogMecanum360 {|' "$DST"
|
||||
elif [[ "$DRIVE" == "mecanum" ]]; then
|
||||
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
|
||||
sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
|
||||
elif [[ "$LIDAR_VARIANT" == "360" ]]; then
|
||||
@@ -115,13 +127,12 @@ elif [[ "$LIDAR_VARIANT" == "360" ]]; then
|
||||
sed -i 's|^ShepherdDog {|ShepherdDog360 {|' "$DST"
|
||||
fi
|
||||
if [[ "$DRIVE" == "mecanum" ]]; then
|
||||
# Inject mecanum roller contact properties. The proto's rollers are
|
||||
# split into two contact materials so that we can keep the friction
|
||||
# axes oriented along each roller's free-spin direction — but with
|
||||
# physical roller hinges (no longer plain cylinder wheels) the
|
||||
# ground contact is via the capsules and standard friction works.
|
||||
# Slightly bumped coulombFriction keeps the rollers gripping during
|
||||
# mecanum strafing.
|
||||
# Wheel-ground friction. The chassis is driven kinematically by
|
||||
# the Supervisor (see drive_mecanum in controllers/shepherd_dog),
|
||||
# so these properties only affect wheel-spin visuals, not the
|
||||
# robot's motion. coulombFriction 2.0 plus a small
|
||||
# forceDependentSlip keeps the rollers from locking up against
|
||||
# the ground.
|
||||
python3 -c "
|
||||
with open('$DST', 'r') as f:
|
||||
txt = f.read()
|
||||
|
||||
+15
-8
@@ -179,19 +179,26 @@ def main():
|
||||
f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
|
||||
|
||||
from herding.config import (
|
||||
HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS,
|
||||
HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
|
||||
DomainRandomConfig, RobotConfig,
|
||||
)
|
||||
if args.use_webots_preset:
|
||||
# Pick the drive-matched Webots preset — for mecanum we use the
|
||||
# variant that simulates the physical-roller proto's strafe
|
||||
# efficiency and forward bleed so the policy trains under the
|
||||
# same imperfect kinematics it sees at deployment.
|
||||
base = HERDING_MEC_WEBOTS if args.drive_mode == "mecanum" else HERDING_WEBOTS
|
||||
# Mecanum uses the 360° preset (the deployable mecanum target);
|
||||
# diff drive keeps the canonical 140° preset.
|
||||
if args.drive_mode == "mecanum":
|
||||
base = HERDING_MEC_WEBOTS_360
|
||||
preset_name = "HERDING_MEC_WEBOTS_360"
|
||||
else:
|
||||
base = HERDING_WEBOTS
|
||||
preset_name = "HERDING_WEBOTS"
|
||||
# Small compass noise for mecanum training (robustness margin
|
||||
# for the Webots compass sensor).
|
||||
compass_std = 0.1 if args.drive_mode == "mecanum" else 0.0
|
||||
herding_cfg = base.replace(
|
||||
domain_random=DomainRandomConfig(
|
||||
fp_rate=args.fp_rate,
|
||||
wheel_slip_std=args.wheel_slip_std,
|
||||
compass_noise_std=compass_std,
|
||||
),
|
||||
robot=RobotConfig(
|
||||
action_smooth=args.action_smooth,
|
||||
@@ -199,10 +206,10 @@ def main():
|
||||
strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed,
|
||||
),
|
||||
)
|
||||
preset_name = "HERDING_MEC_WEBOTS" if args.drive_mode == "mecanum" else "HERDING_WEBOTS"
|
||||
print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} "
|
||||
f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} "
|
||||
f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f}")
|
||||
f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f} "
|
||||
f"compass_noise={compass_std}")
|
||||
else:
|
||||
herding_cfg = None
|
||||
if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+36
-20
@@ -276,37 +276,53 @@ def main() -> None:
|
||||
print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
|
||||
|
||||
from herding.config import (
|
||||
HerdingConfig, HERDING_MEC_WEBOTS, DomainRandomConfig, RobotConfig,
|
||||
HerdingConfig, HERDING_MEC_WEBOTS_360, DomainRandomConfig, RobotConfig,
|
||||
)
|
||||
herding_cfg = None
|
||||
# When fine-tuning a mecanum policy we always apply the Webots
|
||||
# roller-hinge calibration to the gym kinematics (strafe efficiency
|
||||
# and bleed). Without this, the RL agent updates against the
|
||||
# textbook X-pattern and fails on deployment.
|
||||
# Mecanum always trains under HERDING_MEC_WEBOTS_360 (360° LiDAR +
|
||||
# kinematic-matched strafe scaling + small compass-noise DR).
|
||||
is_mecanum = (drive_mode == "mecanum")
|
||||
if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
|
||||
if is_mecanum:
|
||||
base_robot = HERDING_MEC_WEBOTS.robot
|
||||
strafe_eff = base_robot.strafe_efficiency
|
||||
strafe_bleed = base_robot.strafe_to_forward_bleed
|
||||
base = HERDING_MEC_WEBOTS_360
|
||||
strafe_eff = base.robot.strafe_efficiency
|
||||
strafe_bleed = base.robot.strafe_to_forward_bleed
|
||||
compass_std = 0.1 # heading robustness DR
|
||||
else:
|
||||
base = None
|
||||
strafe_eff = 1.0
|
||||
strafe_bleed = 0.0
|
||||
herding_cfg = HerdingConfig(
|
||||
domain_random=DomainRandomConfig(
|
||||
fp_rate=args.fp_rate,
|
||||
wheel_slip_std=args.wheel_slip_std,
|
||||
),
|
||||
robot=RobotConfig(
|
||||
action_smooth=args.action_smooth,
|
||||
strafe_efficiency=strafe_eff,
|
||||
strafe_to_forward_bleed=strafe_bleed,
|
||||
),
|
||||
)
|
||||
compass_std = 0.0
|
||||
if is_mecanum:
|
||||
herding_cfg = base.replace(
|
||||
domain_random=DomainRandomConfig(
|
||||
fp_rate=args.fp_rate,
|
||||
wheel_slip_std=args.wheel_slip_std,
|
||||
compass_noise_std=compass_std,
|
||||
),
|
||||
robot=RobotConfig(
|
||||
action_smooth=args.action_smooth,
|
||||
strafe_efficiency=strafe_eff,
|
||||
strafe_to_forward_bleed=strafe_bleed,
|
||||
),
|
||||
)
|
||||
else:
|
||||
herding_cfg = HerdingConfig(
|
||||
domain_random=DomainRandomConfig(
|
||||
fp_rate=args.fp_rate,
|
||||
wheel_slip_std=args.wheel_slip_std,
|
||||
),
|
||||
robot=RobotConfig(
|
||||
action_smooth=args.action_smooth,
|
||||
strafe_efficiency=strafe_eff,
|
||||
strafe_to_forward_bleed=strafe_bleed,
|
||||
),
|
||||
)
|
||||
print(f"[rl] domain-random: fp_rate={args.fp_rate} "
|
||||
f"action_smooth={args.action_smooth} "
|
||||
f"wheel_slip_std={args.wheel_slip_std} "
|
||||
f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f}")
|
||||
f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f} "
|
||||
f"compass_noise={compass_std}")
|
||||
|
||||
env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
|
||||
difficulty=args.difficulty,
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user