Webots sim-to-real fixes, DAgger pipeline, 360° proto variant
Today's session worked across the full Webots delivery stack — found and
fixed a cluster of bugs blocking the BC/RL transfer, then explored
training-side mitigations for the residual perception gap.
Bug fixes:
- Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL
fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution
and stalling PPO at 0% success across 1.46M+ steps.
- controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching
controllers under system python3 (no numpy) and they were crashing
silently. Pinned to the conda tir env.
- herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0,
max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom
FPs near the gate from latching as permanently-penned tracks.
- herding/perception/sheep_tracker.py: penned tracks now decay at
forget_steps × 8 instead of living forever. Adds get_positions
min_freshness filter for deploy-time use.
Training/eval matches deployment:
- training/bc/collect.py: --dagger-policy flag for DAgger rollouts
(policy drives, teacher labels) + --use-webots-preset for matched
140° tracker + DR config.
- controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when
BC/RL sees empty sheep_positions — recovers from FOV gaps.
Tooling:
- tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc).
- tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the
perception-gap diagnosis matrix.
- protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation
comparison. Canonical proto stays at 140° per project spec.
Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained
in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field
60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show
12%→38% progression on gym HERDING_WEBOTS proxy but did not close
to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or
learned tracker per the project-state memory.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
[python]
|
||||
COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3
|
||||
@@ -87,11 +87,20 @@ from herding.perception.lidar_perception import detections_from_scan
|
||||
from herding.perception.sheep_tracker import SheepTracker
|
||||
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
|
||||
from herding.world.geometry import (
|
||||
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
|
||||
DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
|
||||
DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
|
||||
DOG_SOUTH_LIMIT,
|
||||
PEN_ENTRY, is_penned_position,
|
||||
)
|
||||
from herding.config import HERDING_WEBOTS, RobotConfig
|
||||
|
||||
# Robot physical constants come from RobotConfig so they stay in sync with
|
||||
# the training environment. The Webots preset uses action_smooth=0.55.
|
||||
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
|
||||
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
|
||||
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
|
||||
DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x
|
||||
DOG_WHEEL_BASE_Y = _ROBOT_CFG.wheel_base_y
|
||||
DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
|
||||
DOG_MAX_LINEAR = _ROBOT_CFG.max_linear
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -102,6 +111,13 @@ MODE = (os.environ.get("HERDING_MODE")
|
||||
or _runtime_cfg.get("HERDING_MODE")
|
||||
or "bc").lower()
|
||||
|
||||
# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
|
||||
# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
|
||||
USE_GT_PERCEPTION = bool(int(
|
||||
os.environ.get("HERDING_USE_GT")
|
||||
or _runtime_cfg.get("HERDING_USE_GT", "0")
|
||||
))
|
||||
|
||||
|
||||
def _resolve_policy_dir(mode: str) -> str:
|
||||
"""Where to look for the trained policy for the given mode.
|
||||
@@ -141,7 +157,7 @@ def _resolve_policy_dir(mode: str) -> str:
|
||||
return env_dir or primary
|
||||
|
||||
|
||||
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
|
||||
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
|
||||
if MODE not in _VALID_MODES:
|
||||
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
||||
MODE = "strombom"
|
||||
@@ -173,7 +189,7 @@ print(f"[dog] drive mode={DRIVE_MODE}")
|
||||
# Control parameters
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ACTION_SMOOTH = 0.55 # EMA on (vx, vy) — kills frame-to-frame jitter
|
||||
ACTION_SMOOTH = _ROBOT_CFG.action_smooth # EMA on (vx, vy) — kills frame-to-frame jitter
|
||||
RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")
|
||||
|
||||
|
||||
@@ -264,7 +280,7 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
lidar = robot.getDevice("lidar"); lidar.enable(timestep)
|
||||
|
||||
tracker = SheepTracker()
|
||||
tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)
|
||||
|
||||
# Cosmetic ear motors — animated; not used by control.
|
||||
left_ear = robot.getDevice("left ear motor")
|
||||
@@ -300,6 +316,76 @@ _run_done = False
|
||||
prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
|
||||
step_count = 0
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Calibration mode — apply fixed action, measure GPS displacement, compare
|
||||
# against gym kinematics prediction, write results to calibrate_mecanum.log.
|
||||
# ---------------------------------------------------------------------------
|
||||
if MODE == "calibrate":
|
||||
import json as _json
|
||||
_calib_vx = float(os.environ.get("CALIB_VX", "0.5"))
|
||||
_calib_vy = float(os.environ.get("CALIB_VY", "0.0"))
|
||||
_calib_om = float(os.environ.get("CALIB_OM", "0.0"))
|
||||
_calib_n = int(os.environ.get("CALIB_N_STEPS", "150"))
|
||||
_log_path = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
|
||||
# Settle for 5 steps so GPS stabilises.
|
||||
for _ in range(5):
|
||||
robot.step(timestep)
|
||||
_pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
|
||||
_n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
|
||||
# Gym-predicted displacement using shared kinematics.
|
||||
from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_kinematics_step
|
||||
from herding.world.geometry import WEBOTS_DT as _DT
|
||||
_xg, _yg, _hg = _x0, _y0, _h0
|
||||
for _ in range(_calib_n):
|
||||
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
|
||||
_calib_vx, _calib_vy, _calib_om, _hg,
|
||||
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
|
||||
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
|
||||
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
|
||||
wheel_base=DOG_WHEEL_BASE,
|
||||
)
|
||||
_xg, _yg, _hg = mecanum_kinematics_step(
|
||||
_xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
|
||||
DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
|
||||
)
|
||||
# Run actual Webots steps.
|
||||
for _ in range(_calib_n):
|
||||
_nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
|
||||
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
|
||||
_calib_vx, _calib_vy, _calib_om, _h,
|
||||
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
|
||||
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
|
||||
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
|
||||
wheel_base=DOG_WHEEL_BASE,
|
||||
)
|
||||
if DRIVE_MODE == "mecanum":
|
||||
drive_mecanum(_calib_vx, _calib_vy, _calib_om,
|
||||
fl_motor, fr_motor, rl_motor, rr_motor,
|
||||
compass, MOTOR_MAX)
|
||||
robot.step(timestep)
|
||||
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
|
||||
_T = _calib_n * _DT
|
||||
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
|
||||
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
|
||||
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
|
||||
_result = (
|
||||
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
|
||||
f"steps={_calib_n}\n"
|
||||
f" gym displacement: dx={_xg-_x0:.4f} dy={_yg-_y0:.4f} "
|
||||
f"(vx={_vx_g:.3f} vy={_vy_g:.3f} m/s)\n"
|
||||
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
|
||||
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
|
||||
f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
|
||||
f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
|
||||
)
|
||||
print(_result)
|
||||
with open(_log_path, "a") as _f:
|
||||
_f.write(_result + "\n")
|
||||
# Write the run-done sentinel so run_webots.sh closes Webots cleanly.
|
||||
with open(RUN_DONE_FILE, "w") as _f:
|
||||
_f.write("calibrate\n")
|
||||
import sys as _sys; _sys.exit(0)
|
||||
|
||||
while robot.step(timestep) != -1:
|
||||
step_count += 1
|
||||
|
||||
@@ -321,8 +407,18 @@ while robot.step(timestep) != -1:
|
||||
|
||||
# ---- LiDAR perception → tracker → active sheep positions ----
|
||||
ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
|
||||
detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading)
|
||||
sheep_positions = tracker.update(detections)
|
||||
detections = detections_from_scan(
|
||||
ranges, dog_xy[0], dog_xy[1], dog_heading,
|
||||
detection_cfg=HERDING_WEBOTS.detection,
|
||||
lidar_cfg=HERDING_WEBOTS.lidar,
|
||||
)
|
||||
if USE_GT_PERCEPTION and _gt_sheep:
|
||||
# Bypass tracker: feed GT emitter positions directly to policy/teacher.
|
||||
sheep_positions = {k: v for k, v in _gt_sheep.items()
|
||||
if not is_penned_position(v[0], v[1])}
|
||||
tracker.update(detections) # still advance tracker for diagnostics
|
||||
else:
|
||||
sheep_positions = tracker.update(detections)
|
||||
|
||||
sheep_xy_list = list(sheep_positions.values())
|
||||
sheep_penned_list = [False] * len(sheep_xy_list)
|
||||
@@ -331,10 +427,18 @@ while robot.step(timestep) != -1:
|
||||
# ---- Action selection ----
|
||||
omega = 0.0
|
||||
if MODE in ("bc", "rl") and policy_handle is not None:
|
||||
action = policy_handle.predict(single_obs)
|
||||
vx, vy = float(action[0]), float(action[1])
|
||||
if DRIVE_MODE == "mecanum" and len(action) >= 3:
|
||||
omega = float(action[2])
|
||||
if not sheep_positions:
|
||||
# BC/RL never saw "empty obs during operation" in training (empty
|
||||
# obs only happened at episode end), so the policy outputs ~zero
|
||||
# and the dog gets stuck. Fall back to a fixed scan rotation
|
||||
# until tracker recovers some sheep.
|
||||
vx, vy = 0.0, 0.6
|
||||
omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
|
||||
else:
|
||||
action = policy_handle.predict(single_obs)
|
||||
vx, vy = float(action[0]), float(action[1])
|
||||
if DRIVE_MODE == "mecanum" and len(action) >= 3:
|
||||
omega = float(action[2])
|
||||
else:
|
||||
result = analytic_teacher(
|
||||
dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
|
||||
|
||||
Reference in New Issue
Block a user