Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and fixed a cluster of bugs blocking the BC/RL transfer, then explored training-side mitigations for the residual perception gap. Bug fixes: - Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution and stalling PPO at 0% success across 1.46M+ steps. - controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching controllers under system python3 (no numpy) and they were crashing silently. Pinned to the conda tir env. - herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0, max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom FPs near the gate from latching as permanently-penned tracks. - herding/perception/sheep_tracker.py: penned tracks now decay at forget_steps × 8 instead of living forever. Adds get_positions min_freshness filter for deploy-time use. Training/eval matches deployment: - training/bc/collect.py: --dagger-policy flag for DAgger rollouts (policy drives, teacher labels) + --use-webots-preset for matched 140° tracker + DR config. - controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when BC/RL sees empty sheep_positions — recovers from FOV gaps. Tooling: - tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc). - tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the perception-gap diagnosis matrix. - protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation comparison. Canonical proto stays at 140° per project spec. Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field 60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show 12%→38% progression on gym HERDING_WEBOTS proxy but did not close to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or learned tracker per the project-state memory. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
@@ -0,0 +1,2 @@
+[python]
+COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3
@@ -87,11 +87,20 @@ from herding.perception.lidar_perception import detections_from_scan
 from herding.perception.sheep_tracker import SheepTracker
 from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
 from herding.world.geometry import (
-    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
-    DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
-    DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
+    DOG_SOUTH_LIMIT,
    PEN_ENTRY, is_penned_position,
 )
+from herding.config import HERDING_WEBOTS, RobotConfig
+
+# Robot physical constants come from RobotConfig so they stay in sync with
+# the training environment.  The Webots preset uses action_smooth=0.55.
+_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
+DOG_WHEEL_RADIUS  = _ROBOT_CFG.wheel_radius
+DOG_WHEEL_BASE    = _ROBOT_CFG.wheel_base
+DOG_WHEEL_BASE_X  = _ROBOT_CFG.wheel_base_x
+DOG_WHEEL_BASE_Y  = _ROBOT_CFG.wheel_base_y
+DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
+DOG_MAX_LINEAR    = _ROBOT_CFG.max_linear


 # ---------------------------------------------------------------------------
@@ -102,6 +111,13 @@ MODE = (os.environ.get("HERDING_MODE")
        or _runtime_cfg.get("HERDING_MODE")
        or "bc").lower()

+# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
+# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
+USE_GT_PERCEPTION = bool(int(
+    os.environ.get("HERDING_USE_GT")
+    or _runtime_cfg.get("HERDING_USE_GT", "0")
+))
+

 def _resolve_policy_dir(mode: str) -> str:
    """Where to look for the trained policy for the given mode.
@@ -141,7 +157,7 @@ def _resolve_policy_dir(mode: str) -> str:
    return env_dir or primary


-_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
+_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
 if MODE not in _VALID_MODES:
    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
    MODE = "strombom"
@@ -173,7 +189,7 @@ print(f"[dog] drive mode={DRIVE_MODE}")
 # Control parameters
 # ---------------------------------------------------------------------------

-ACTION_SMOOTH = 0.55           # EMA on (vx, vy) — kills frame-to-frame jitter
+ACTION_SMOOTH = _ROBOT_CFG.action_smooth  # EMA on (vx, vy) — kills frame-to-frame jitter
 RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")


@@ -264,7 +280,7 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter = robot.getDevice("emitter")
 lidar = robot.getDevice("lidar");       lidar.enable(timestep)

-tracker = SheepTracker()
+tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)

 # Cosmetic ear motors — animated; not used by control.
 left_ear = robot.getDevice("left ear motor")
@@ -300,6 +316,76 @@ _run_done = False
 prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
 step_count = 0

+# ---------------------------------------------------------------------------
+# Calibration mode — apply fixed action, measure GPS displacement, compare
+# against gym kinematics prediction, write results to calibrate_mecanum.log.
+# ---------------------------------------------------------------------------
+if MODE == "calibrate":
+    import json as _json
+    _calib_vx  = float(os.environ.get("CALIB_VX", "0.5"))
+    _calib_vy  = float(os.environ.get("CALIB_VY", "0.0"))
+    _calib_om  = float(os.environ.get("CALIB_OM", "0.0"))
+    _calib_n   = int(os.environ.get("CALIB_N_STEPS", "150"))
+    _log_path  = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
+    # Settle for 5 steps so GPS stabilises.
+    for _ in range(5):
+        robot.step(timestep)
+    _pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
+    _n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
+    # Gym-predicted displacement using shared kinematics.
+    from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_kinematics_step
+    from herding.world.geometry import WEBOTS_DT as _DT
+    _xg, _yg, _hg = _x0, _y0, _h0
+    for _ in range(_calib_n):
+        _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
+            _calib_vx, _calib_vy, _calib_om, _hg,
+            max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
+            lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
+            max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
+            wheel_base=DOG_WHEEL_BASE,
+        )
+        _xg, _yg, _hg = mecanum_kinematics_step(
+            _xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
+            DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
+        )
+    # Run actual Webots steps.
+    for _ in range(_calib_n):
+        _nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
+        _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
+            _calib_vx, _calib_vy, _calib_om, _h,
+            max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
+            lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
+            max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
+            wheel_base=DOG_WHEEL_BASE,
+        )
+        if DRIVE_MODE == "mecanum":
+            drive_mecanum(_calib_vx, _calib_vy, _calib_om,
+                          fl_motor, fr_motor, rl_motor, rr_motor,
+                          compass, MOTOR_MAX)
+        robot.step(timestep)
+    _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
+    _T = _calib_n * _DT
+    _vx_w = (_x1 - _x0) / _T;  _vy_w = (_y1 - _y0) / _T
+    _vx_g = (_xg - _x0) / _T;  _vy_g = (_yg - _y0) / _T
+    def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
+    _result = (
+        f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f})  "
+        f"steps={_calib_n}\n"
+        f"  gym    displacement: dx={_xg-_x0:.4f}  dy={_yg-_y0:.4f}  "
+        f"(vx={_vx_g:.3f}  vy={_vy_g:.3f} m/s)\n"
+        f"  webots displacement: dx={_x1-_x0:.4f}  dy={_y1-_y0:.4f}  "
+        f"(vx={_vx_w:.3f}  vy={_vy_w:.3f} m/s)\n"
+        f"  vx error: {_pct(_vx_w, _vx_g):.1f}%    "
+        f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
+    )
+    print(_result)
+    with open(_log_path, "a") as _f:
+        _f.write(_result + "\n")
+    # Write the run-done sentinel so run_webots.sh closes Webots cleanly.
+    with open(RUN_DONE_FILE, "w") as _f:
+        _f.write("calibrate\n")
+    import sys as _sys; _sys.exit(0)
+
 while robot.step(timestep) != -1:
    step_count += 1

@@ -321,8 +407,18 @@ while robot.step(timestep) != -1:

    # ---- LiDAR perception → tracker → active sheep positions ----
    ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
-    detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading)
-    sheep_positions = tracker.update(detections)
+    detections = detections_from_scan(
+        ranges, dog_xy[0], dog_xy[1], dog_heading,
+        detection_cfg=HERDING_WEBOTS.detection,
+        lidar_cfg=HERDING_WEBOTS.lidar,
+    )
+    if USE_GT_PERCEPTION and _gt_sheep:
+        # Bypass tracker: feed GT emitter positions directly to policy/teacher.
+        sheep_positions = {k: v for k, v in _gt_sheep.items()
+                          if not is_penned_position(v[0], v[1])}
+        tracker.update(detections)  # still advance tracker for diagnostics
+    else:
+        sheep_positions = tracker.update(detections)

    sheep_xy_list = list(sheep_positions.values())
    sheep_penned_list = [False] * len(sheep_xy_list)
@@ -331,10 +427,18 @@ while robot.step(timestep) != -1:
    # ---- Action selection ----
    omega = 0.0
    if MODE in ("bc", "rl") and policy_handle is not None:
-        action = policy_handle.predict(single_obs)
-        vx, vy = float(action[0]), float(action[1])
-        if DRIVE_MODE == "mecanum" and len(action) >= 3:
-            omega = float(action[2])
+        if not sheep_positions:
+            # BC/RL never saw "empty obs during operation" in training (empty
+            # obs only happened at episode end), so the policy outputs ~zero
+            # and the dog gets stuck. Fall back to a fixed scan rotation
+            # until tracker recovers some sheep.
+            vx, vy = 0.0, 0.6
+            omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
+        else:
+            action = policy_handle.predict(single_obs)
+            vx, vy = float(action[0]), float(action[1])
+            if DRIVE_MODE == "mecanum" and len(action) >= 3:
+                omega = float(action[2])
    else:
        result = analytic_teacher(
            dog_xy, dog_heading, sheep_positions, PEN_ENTRY,