Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and
fixed a cluster of bugs blocking the BC/RL transfer, then explored
training-side mitigations for the residual perception gap.

Bug fixes:
- Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL
  fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution
  and stalling PPO at 0% success across 1.46M+ steps.
- controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching
  controllers under system python3 (no numpy) and they were crashing
  silently. Pinned to the conda tir env.
- herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0,
  max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom
  FPs near the gate from latching as permanently-penned tracks.
- herding/perception/sheep_tracker.py: penned tracks now decay at
  forget_steps × 8 instead of living forever. Adds get_positions
  min_freshness filter for deploy-time use.

Training/eval matches deployment:
- training/bc/collect.py: --dagger-policy flag for DAgger rollouts
  (policy drives, teacher labels) + --use-webots-preset for matched
  140° tracker + DR config.
- controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when
  BC/RL sees empty sheep_positions — recovers from FOV gaps.

Tooling:
- tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc).
- tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the
  perception-gap diagnosis matrix.
- protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation
  comparison. Canonical proto stays at 140° per project spec.

Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained
in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field
60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show
12%→38% progression on gym HERDING_WEBOTS proxy but did not close
to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or
learned tracker per the project-state memory.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Johnny Fernandes
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
+77 -1
View File
@@ -47,6 +47,7 @@ from herding.perception.lidar_sim import simulate_scan
from herding.perception.obs import OBS_DIM, build_obs
from herding.perception.sheep_tracker import SheepTracker
from herding.control.strombom import compute_action as strombom_action
from herding.config import HerdingConfig
class HerdingEnv(gym.Env):
@@ -87,13 +88,24 @@ class HerdingEnv(gym.Env):
use_lidar: bool = True,
frame_stack: int = 1,
drive_mode: str = "differential",
herding_cfg: Optional[HerdingConfig] = None,
):
super().__init__()
# Store the config; fall back to defaults when None.
self._herding_cfg = herding_cfg
# Apply robot config overrides — these shadow the class attributes
# so that per-instance configuration is possible without touching
# the class-level defaults used by unconfigured instances.
if herding_cfg is not None:
self.ACTION_SMOOTH = herding_cfg.robot.action_smooth
# ``use_lidar=True`` (default): obs and imitation-reward teacher
# see only LiDAR-perceived positions via a tracker, matching the
# Webots controller. ``False`` exposes ground truth for ablation.
self._use_lidar = bool(use_lidar)
self._tracker = SheepTracker() if self._use_lidar else None
tracker_cfg = herding_cfg.tracker if herding_cfg is not None else None
self._tracker = SheepTracker(tracker_cfg=tracker_cfg) if self._use_lidar else None
self._np_rng_lidar: Optional[np.random.Generator] = None
# Frame stacking: the policy receives the last K obs concatenated,
@@ -261,6 +273,14 @@ class HerdingEnv(gym.Env):
vx, vy = float(self.smoothed_action[0]), float(self.smoothed_action[1])
omega = float(self.smoothed_action[2]) if self._action_dim >= 3 else 0.0
# Domain randomisation: compass (heading) noise.
dr = (self._herding_cfg.domain_random
if self._herding_cfg is not None else None)
slip_std = dr.wheel_slip_std if dr is not None else 0.0
if dr is not None and dr.compass_noise_std > 0.0 and self._np_rng_lidar is not None:
self.dog_heading = float(self.dog_heading + self._np_rng_lidar.normal(
0.0, dr.compass_noise_std))
# Safety supervisor — dog stays north of the gate.
if self.dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
vx, vy = 0.0, 1.0
@@ -282,6 +302,8 @@ class HerdingEnv(gym.Env):
DOG_WHEEL_RADIUS,
DOG_WHEEL_BASE_X / 2.0, DOG_WHEEL_BASE_Y / 2.0,
WEBOTS_DT,
slip_std=slip_std,
rng=self._np_rng_lidar,
)
else:
wL, wR = velocity_to_wheels(
@@ -294,6 +316,8 @@ class HerdingEnv(gym.Env):
self.dog_x, self.dog_y, self.dog_heading = kinematics_step(
self.dog_x, self.dog_y, self.dog_heading,
wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT,
slip_std=slip_std,
rng=self._np_rng_lidar,
)
self.dog_x, self.dog_y = clip_to_field(self.dog_x, self.dog_y, margin=0.3)
# Extra constraint: dog stays north of the gate area.
@@ -460,16 +484,68 @@ class HerdingEnv(gym.Env):
for i in range(self.n_sheep)]
def _update_tracker(self) -> None:
lidar_cfg = (self._herding_cfg.lidar
if self._herding_cfg is not None else None)
detection_cfg = (self._herding_cfg.detection
if self._herding_cfg is not None else None)
ranges = simulate_scan(
self.dog_x, self.dog_y, self.dog_heading,
self._all_sheep_xy(),
rng=self._np_rng_lidar,
lidar_cfg=lidar_cfg,
)
detections = detections_from_scan(
ranges, self.dog_x, self.dog_y, self.dog_heading,
detection_cfg=detection_cfg,
lidar_cfg=lidar_cfg,
)
# Domain randomisation: inject false-positive detections near static
# features to mimic the spurious clusters Webots' physical LiDAR
# produces from real 3D geometry (walls, posts, fence rails).
dr = (self._herding_cfg.domain_random
if self._herding_cfg is not None else None)
if dr is not None and dr.fp_rate > 0.0 and self._np_rng_lidar is not None:
detections = list(detections)
detections.extend(
self._sample_false_positives(dr.fp_rate, dr.fp_std_pos))
self._tracker.update(detections)
# Static feature anchor points used for FP injection.
# The rectangular list covers gate posts and field corners; the round
# list covers just the gate posts (the circular wall is handled separately).
_FP_ANCHORS_RECT = (
(10.0, -15.0), (13.0, -15.0), # gate posts
(15.0, 15.0), (15.0, -15.0),
(-15.0, 15.0), (-15.0, -15.0), # field corners
(15.0, 0.0), (-15.0, 0.0), # mid-wall returns
(0.0, 15.0), (0.0, -15.0),
)
_FP_ANCHORS_ROUND = (
(0.0, -15.0), # gate centre
(-1.5, -15.0), (1.5, -15.0), # gate posts
(0.0, 15.0), # north wall
(10.6, -10.6), (-10.6, -10.6), # circular wall quadrants
)
def _sample_false_positives(
self, fp_rate: float, fp_std: float,
) -> list[tuple[float, float]]:
"""Return a list of spurious (x, y) detections for this step."""
from herding.world.geometry import FIELD_SHAPE
anchors = (self._FP_ANCHORS_ROUND
if FIELD_SHAPE == "field_round"
else self._FP_ANCHORS_RECT)
n_fps = int(self._np_rng_lidar.poisson(fp_rate))
if n_fps == 0:
return []
fps = []
chosen = self._np_rng_lidar.integers(0, len(anchors), size=n_fps)
noise = self._np_rng_lidar.normal(0.0, fp_std, size=(n_fps, 2))
for k in range(n_fps):
ax, ay = anchors[chosen[k]]
fps.append((float(ax + noise[k, 0]), float(ay + noise[k, 1])))
return fps
def perceived_positions(self) -> dict[str, tuple[float, float]]:
"""What the controller would "see" this step: tracker output in
LiDAR mode, ground truth in privileged mode. Used by demo