Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and
fixed a cluster of bugs blocking the BC/RL transfer, then explored
training-side mitigations for the residual perception gap.

Bug fixes:
- Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL
  fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution
  and stalling PPO at 0% success across 1.46M+ steps.
- controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching
  controllers under system python3 (no numpy) and they were crashing
  silently. Pinned to the conda tir env.
- herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0,
  max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom
  FPs near the gate from latching as permanently-penned tracks.
- herding/perception/sheep_tracker.py: penned tracks now decay at
  forget_steps × 8 instead of living forever. Adds get_positions
  min_freshness filter for deploy-time use.

Training/eval matches deployment:
- training/bc/collect.py: --dagger-policy flag for DAgger rollouts
  (policy drives, teacher labels) + --use-webots-preset for matched
  140° tracker + DR config.
- controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when
  BC/RL sees empty sheep_positions — recovers from FOV gaps.

Tooling:
- tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc).
- tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the
  perception-gap diagnosis matrix.
- protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation
  comparison. Canonical proto stays at 140° per project spec.

Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained
in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field
60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show
12%→38% progression on gym HERDING_WEBOTS proxy but did not close
to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or
learned tracker per the project-state memory.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Johnny Fernandes
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
+91 -21
View File
@@ -22,6 +22,10 @@ plane south (``is_penned_position``). Penned tracks are excluded from
from __future__ import annotations
import math
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from herding.config import TrackerConfig
from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position
@@ -56,16 +60,21 @@ class Track:
"""Not-a-property in the hot loop — callers pass current step."""
raise NotImplementedError
def predicted_position(self, current_step: int) -> tuple[float, float]:
def predicted_position(
self,
current_step: int,
predict_steps: int = PREDICT_STEPS,
velocity_clamp: float = VELOCITY_CLAMP,
) -> tuple[float, float]:
"""Extrapolated position using constant velocity, clamped."""
dt = current_step - self.last_seen
if dt <= 0 or dt > PREDICT_STEPS:
if dt <= 0 or dt > predict_steps:
return self.x, self.y
speed = math.hypot(self.vx, self.vy)
if speed < 1e-4:
return self.x, self.y
# Clamp extrapolation distance.
max_d = VELOCITY_CLAMP * dt * 0.016 # steps → seconds
max_d = velocity_clamp * dt * 0.016 # steps → seconds
d = min(speed * dt * 0.016, max_d)
return (
self.x + d * (self.vx / speed),
@@ -93,10 +102,36 @@ class SheepTracker:
Each track is a :class:`Track` with position, velocity estimate,
last-seen step, and penned flag.
Pass a :class:`~herding.config.TrackerConfig` to override any
module-level defaults without changing this file.
"""
def __init__(self, gate: float = GATE_M):
self.gate = gate
def __init__(
self,
gate: float = GATE_M,
tracker_cfg: "TrackerConfig | None" = None,
):
if tracker_cfg is not None:
self.gate = tracker_cfg.gate_m
self._reacquire_gate = tracker_cfg.reacquire_gate_m
self._reacquire_min_age = tracker_cfg.reacquire_min_age
self._penned_gate = tracker_cfg.penned_gate_m
self._forget_steps = tracker_cfg.forget_steps
self._predict_steps = tracker_cfg.predict_steps
self._velocity_clamp = tracker_cfg.velocity_clamp
self._max_new_per_step = tracker_cfg.max_new_tracks_per_step
self._pen_latch_depth = tracker_cfg.pen_latch_depth
else:
self.gate = gate
self._reacquire_gate = REACQUIRE_GATE_M
self._reacquire_min_age = REACQUIRE_MIN_AGE
self._penned_gate = PENNED_GATE_M
self._forget_steps = FORGET_STEPS
self._predict_steps = PREDICT_STEPS
self._velocity_clamp = VELOCITY_CLAMP
self._max_new_per_step = MAX_ACTIVE_TRACKS
self._pen_latch_depth = 0.0
self._tracks: dict[int, Track] = {}
self._next_id = 0
self.step = 0
@@ -119,8 +154,8 @@ class SheepTracker:
active_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
for tid in active_tids:
track = self._tracks[tid]
# Use predicted position for matching.
tx, ty = track.predicted_position(self.step)
tx, ty = track.predicted_position(
self.step, self._predict_steps, self._velocity_clamp)
best_j, best_d = -1, self.gate
for j, (dx, dy) in enumerate(detections):
if j in det_used:
@@ -140,10 +175,11 @@ class SheepTracker:
if tid in updated_tids:
continue
track = self._tracks[tid]
if (self.step - track.last_seen) < REACQUIRE_MIN_AGE:
if (self.step - track.last_seen) < self._reacquire_min_age:
continue
tx, ty = track.predicted_position(self.step)
best_j, best_d = -1, REACQUIRE_GATE_M
tx, ty = track.predicted_position(
self.step, self._predict_steps, self._velocity_clamp)
best_j, best_d = -1, self._reacquire_gate
for j, (dx, dy) in enumerate(detections):
if j in det_used:
continue
@@ -161,7 +197,7 @@ class SheepTracker:
penned_tids = [tid for tid, t in self._tracks.items() if t.penned]
for tid in penned_tids:
track = self._tracks[tid]
best_j, best_d = -1, PENNED_GATE_M
best_j, best_d = -1, self._penned_gate
for j, (dx, dy) in enumerate(detections):
if j in det_used:
continue
@@ -174,25 +210,35 @@ class SheepTracker:
track.update(dx, dy, self.step)
det_used.add(best_j)
# Spawn new tracks for unmatched detections.
# Spawn new tracks for unmatched detections — rate-capped.
spawned = 0
for j, (dx, dy) in enumerate(detections):
if j in det_used:
continue
penned = in_pen(dx, dy) or is_penned_position(dx, dy)
if spawned >= self._max_new_per_step:
break
penned = self._is_penned(dx, dy)
self._tracks[self._next_id] = Track(dx, dy, self.step, penned)
self._next_id += 1
spawned += 1
# Promote active tracks whose current estimate crosses the gate.
for track in self._tracks.values():
if track.penned:
continue
px, py = track.predicted_position(self.step)
if is_penned_position(px, py):
px, py = track.predicted_position(
self.step, self._predict_steps, self._velocity_clamp)
if self._is_penned(px, py):
track.penned = True
# Forget stale active tracks; penned tracks live forever.
# Forget stale active tracks; penned tracks decay too but at a
# longer horizon (real penned sheep are still observed occasionally
# when the dog faces south; pure FPs at gate posts stop being
# detected once the dog drives away).
penned_forget = self._forget_steps * 8
stale = [tid for tid, t in self._tracks.items()
if not t.penned and (self.step - t.last_seen) > FORGET_STEPS]
if (not t.penned and (self.step - t.last_seen) > self._forget_steps)
or (t.penned and (self.step - t.last_seen) > penned_forget)]
for tid in stale:
del self._tracks[tid]
@@ -206,18 +252,42 @@ class SheepTracker:
return self.get_positions()
def get_positions(self) -> dict[str, tuple[float, float]]:
def _is_penned(self, x: float, y: float) -> bool:
"""Check whether a position should be considered penned.
Uses ``pen_latch_depth`` to require the position to be that many
metres past the gate line before latching. Increasing the depth
prevents gate-area LiDAR false positives (gate hardware reflections
at y ≈ -15) from being permanently latched as penned tracks.
"""
from herding.world.geometry import GATE_Y
# Apply depth threshold to both in_pen and is_penned_position so
# that any position in the gate column must clear GATE_Y - depth.
threshold = GATE_Y - self._pen_latch_depth
return (in_pen(x, y) or is_penned_position(x, y)) and y <= threshold
def get_positions(self, min_freshness: int | None = None) -> dict[str, tuple[float, float]]:
"""Active (not-penned) tracks as a ``{name: (x, y)}`` dict.
For tracks currently being predicted (occluded but within
PREDICT_STEPS), returns the extrapolated position so the teacher
predict_steps), returns the extrapolated position so the teacher
sees a smooth estimate.
``min_freshness`` (optional, deploy-only): drop tracks whose
last_seen is older than ``step - min_freshness``. Real sheep in
FOV are detected nearly every step; phantom tracks from sporadic
Webots FPs stop being re-observed and decay. Default ``None``
preserves training behaviour (extrapolated tracks visible).
"""
result = {}
for tid, track in self._tracks.items():
if track.penned:
continue
px, py = track.predicted_position(self.step)
if (min_freshness is not None
and self.step - track.last_seen > min_freshness):
continue
px, py = track.predicted_position(
self.step, self._predict_steps, self._velocity_clamp)
result[f"t{tid}"] = (px, py)
return result
@@ -234,4 +304,4 @@ class SheepTracker:
"""Number of active tracks currently being extrapolated (not directly observed)."""
return sum(1 for t in self._tracks.values()
if not t.penned and (self.step - t.last_seen) > 0
and (self.step - t.last_seen) <= PREDICT_STEPS)
and (self.step - t.last_seen) <= self._predict_steps)