Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and
fixed a cluster of bugs blocking the BC/RL transfer, then explored
training-side mitigations for the residual perception gap.

Bug fixes:
- Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL
  fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution
  and stalling PPO at 0% success across 1.46M+ steps.
- controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching
  controllers under system python3 (no numpy) and they were crashing
  silently. Pinned to the conda tir env.
- herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0,
  max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom
  FPs near the gate from latching as permanently-penned tracks.
- herding/perception/sheep_tracker.py: penned tracks now decay at
  forget_steps × 8 instead of living forever. Adds get_positions
  min_freshness filter for deploy-time use.

Training/eval matches deployment:
- training/bc/collect.py: --dagger-policy flag for DAgger rollouts
  (policy drives, teacher labels) + --use-webots-preset for matched
  140° tracker + DR config.
- controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when
  BC/RL sees empty sheep_positions — recovers from FOV gaps.

Tooling:
- tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc).
- tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the
  perception-gap diagnosis matrix.
- protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation
  comparison. Canonical proto stays at 140° per project spec.

Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained
in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field
60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show
12%→38% progression on gym HERDING_WEBOTS proxy but did not close
to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or
learned tracker per the project-state memory.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Johnny Fernandes
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
+2
View File
@@ -0,0 +1,2 @@
[python]
COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3
+2
View File
@@ -0,0 +1,2 @@
[python]
COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3
+116 -12
View File
@@ -87,11 +87,20 @@ from herding.perception.lidar_perception import detections_from_scan
from herding.perception.sheep_tracker import SheepTracker
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
from herding.world.geometry import (
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
DOG_SOUTH_LIMIT,
PEN_ENTRY, is_penned_position,
)
from herding.config import HERDING_WEBOTS, RobotConfig
# Robot physical constants come from RobotConfig so they stay in sync with
# the training environment. The Webots preset uses action_smooth=0.55.
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x
DOG_WHEEL_BASE_Y = _ROBOT_CFG.wheel_base_y
DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
DOG_MAX_LINEAR = _ROBOT_CFG.max_linear
# ---------------------------------------------------------------------------
@@ -102,6 +111,13 @@ MODE = (os.environ.get("HERDING_MODE")
or _runtime_cfg.get("HERDING_MODE")
or "bc").lower()
# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
USE_GT_PERCEPTION = bool(int(
os.environ.get("HERDING_USE_GT")
or _runtime_cfg.get("HERDING_USE_GT", "0")
))
def _resolve_policy_dir(mode: str) -> str:
"""Where to look for the trained policy for the given mode.
@@ -141,7 +157,7 @@ def _resolve_policy_dir(mode: str) -> str:
return env_dir or primary
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
if MODE not in _VALID_MODES:
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
MODE = "strombom"
@@ -173,7 +189,7 @@ print(f"[dog] drive mode={DRIVE_MODE}")
# Control parameters
# ---------------------------------------------------------------------------
ACTION_SMOOTH = 0.55 # EMA on (vx, vy) — kills frame-to-frame jitter
ACTION_SMOOTH = _ROBOT_CFG.action_smooth # EMA on (vx, vy) — kills frame-to-frame jitter
RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")
@@ -264,7 +280,7 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")
lidar = robot.getDevice("lidar"); lidar.enable(timestep)
tracker = SheepTracker()
tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)
# Cosmetic ear motors — animated; not used by control.
left_ear = robot.getDevice("left ear motor")
@@ -300,6 +316,76 @@ _run_done = False
prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
step_count = 0
# ---------------------------------------------------------------------------
# Calibration mode — apply fixed action, measure GPS displacement, compare
# against gym kinematics prediction, write results to calibrate_mecanum.log.
# ---------------------------------------------------------------------------
if MODE == "calibrate":
import json as _json
_calib_vx = float(os.environ.get("CALIB_VX", "0.5"))
_calib_vy = float(os.environ.get("CALIB_VY", "0.0"))
_calib_om = float(os.environ.get("CALIB_OM", "0.0"))
_calib_n = int(os.environ.get("CALIB_N_STEPS", "150"))
_log_path = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
# Settle for 5 steps so GPS stabilises.
for _ in range(5):
robot.step(timestep)
_pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
_n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
# Gym-predicted displacement using shared kinematics.
from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_kinematics_step
from herding.world.geometry import WEBOTS_DT as _DT
_xg, _yg, _hg = _x0, _y0, _h0
for _ in range(_calib_n):
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
_calib_vx, _calib_vy, _calib_om, _hg,
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
wheel_base=DOG_WHEEL_BASE,
)
_xg, _yg, _hg = mecanum_kinematics_step(
_xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
)
# Run actual Webots steps.
for _ in range(_calib_n):
_nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
_calib_vx, _calib_vy, _calib_om, _h,
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
wheel_base=DOG_WHEEL_BASE,
)
if DRIVE_MODE == "mecanum":
drive_mecanum(_calib_vx, _calib_vy, _calib_om,
fl_motor, fr_motor, rl_motor, rr_motor,
compass, MOTOR_MAX)
robot.step(timestep)
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
_T = _calib_n * _DT
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
_result = (
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
f"steps={_calib_n}\n"
f" gym displacement: dx={_xg-_x0:.4f} dy={_yg-_y0:.4f} "
f"(vx={_vx_g:.3f} vy={_vy_g:.3f} m/s)\n"
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
)
print(_result)
with open(_log_path, "a") as _f:
_f.write(_result + "\n")
# Write the run-done sentinel so run_webots.sh closes Webots cleanly.
with open(RUN_DONE_FILE, "w") as _f:
_f.write("calibrate\n")
import sys as _sys; _sys.exit(0)
while robot.step(timestep) != -1:
step_count += 1
@@ -321,8 +407,18 @@ while robot.step(timestep) != -1:
# ---- LiDAR perception → tracker → active sheep positions ----
ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading)
sheep_positions = tracker.update(detections)
detections = detections_from_scan(
ranges, dog_xy[0], dog_xy[1], dog_heading,
detection_cfg=HERDING_WEBOTS.detection,
lidar_cfg=HERDING_WEBOTS.lidar,
)
if USE_GT_PERCEPTION and _gt_sheep:
# Bypass tracker: feed GT emitter positions directly to policy/teacher.
sheep_positions = {k: v for k, v in _gt_sheep.items()
if not is_penned_position(v[0], v[1])}
tracker.update(detections) # still advance tracker for diagnostics
else:
sheep_positions = tracker.update(detections)
sheep_xy_list = list(sheep_positions.values())
sheep_penned_list = [False] * len(sheep_xy_list)
@@ -331,10 +427,18 @@ while robot.step(timestep) != -1:
# ---- Action selection ----
omega = 0.0
if MODE in ("bc", "rl") and policy_handle is not None:
action = policy_handle.predict(single_obs)
vx, vy = float(action[0]), float(action[1])
if DRIVE_MODE == "mecanum" and len(action) >= 3:
omega = float(action[2])
if not sheep_positions:
# BC/RL never saw "empty obs during operation" in training (empty
# obs only happened at episode end), so the policy outputs ~zero
# and the dog gets stuck. Fall back to a fixed scan rotation
# until tracker recovers some sheep.
vx, vy = 0.0, 0.6
omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
else:
action = policy_handle.predict(single_obs)
vx, vy = float(action[0]), float(action[1])
if DRIVE_MODE == "mecanum" and len(action) >= 3:
omega = float(action[2])
else:
result = analytic_teacher(
dog_xy, dog_heading, sheep_positions, PEN_ENTRY,