Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and fixed a cluster of bugs blocking the BC/RL transfer, then explored training-side mitigations for the residual perception gap. Bug fixes: - Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution and stalling PPO at 0% success across 1.46M+ steps. - controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching controllers under system python3 (no numpy) and they were crashing silently. Pinned to the conda tir env. - herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0, max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom FPs near the gate from latching as permanently-penned tracks. - herding/perception/sheep_tracker.py: penned tracks now decay at forget_steps × 8 instead of living forever. Adds get_positions min_freshness filter for deploy-time use. Training/eval matches deployment: - training/bc/collect.py: --dagger-policy flag for DAgger rollouts (policy drives, teacher labels) + --use-webots-preset for matched 140° tracker + DR config. - controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when BC/RL sees empty sheep_positions — recovers from FOV gaps. Tooling: - tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc). - tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the perception-gap diagnosis matrix. - protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation comparison. Canonical proto stays at 140° per project spec. Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field 60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show 12%→38% progression on gym HERDING_WEBOTS proxy but did not close to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or learned tracker per the project-state memory. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
@@ -21,9 +21,13 @@ The downstream tracker handles association across frames.
 from __future__ import annotations

 import math
+from typing import TYPE_CHECKING

 import numpy as np

+if TYPE_CHECKING:
+    from herding.config import DetectionConfig, LidarConfig
+
 from herding.world.geometry import (
    FIELD_SHAPE, FIELD_ROUND_R,
    FIELD_X, FIELD_Y, GATE_X, GATE_Y,
@@ -79,21 +83,22 @@ def _in_field_region(cx: float, cy: float) -> bool:
            FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)


-def _near_wall(cx: float, cy: float) -> bool:
+def _near_wall(cx: float, cy: float, wall_reject: float = WALL_REJECT) -> bool:
    """True if the detection is too close to a wall to be a sheep."""
    if FIELD_SHAPE == "field_round":
        r = math.hypot(cx, cy)
-        return r > FIELD_ROUND_R - WALL_REJECT
+        return r > FIELD_ROUND_R - wall_reject
    return (
-        cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or
-        cy > FIELD_Y[1] - WALL_REJECT or
-        (cy < FIELD_Y[0] + WALL_REJECT and not (PEN_X[0] <= cx <= PEN_X[1]))
+        cx > FIELD_X[1] - wall_reject or cx < FIELD_X[0] + wall_reject or
+        cy > FIELD_Y[1] - wall_reject or
+        (cy < FIELD_Y[0] + wall_reject and not (PEN_X[0] <= cx <= PEN_X[1]))
    )


 def _split_cluster_by_range(
    points: list[tuple[float, float]],
    range_vals: list[float],
+    split_range_gap: float = SPLIT_RANGE_GAP,
 ) -> list[list[tuple[float, float]]]:
    """Split a cluster at range-profile local maxima (gaps between sheep).

@@ -108,7 +113,7 @@ def _split_cluster_by_range(
    # Find the maximum range (the dip/gap between sheep).
    r_max = max(range_vals)
    # If the range variation is small, it's a single target.
-    if r_max - r_min < SPLIT_RANGE_GAP:
+    if r_max - r_min < split_range_gap:
        return [points]
    # Find the split point: the index with the maximum range.
    split_idx = range_vals.index(r_max)
@@ -124,7 +129,7 @@ def _split_cluster_by_range(
        (right, range_vals[split_idx + 1:]),
    ]:
        if len(sub_pts) >= 1:
-            result.extend(_split_cluster_by_range(sub_pts, sub_ranges))
+            result.extend(_split_cluster_by_range(sub_pts, sub_ranges, split_range_gap))
    return result if result else [points]


@@ -132,14 +137,43 @@ def detections_from_scan(
    ranges: np.ndarray,
    dog_x: float, dog_y: float, dog_heading: float,
    max_range: float = LIDAR_MAX_RANGE,
+    detection_cfg: "DetectionConfig | None" = None,
+    lidar_cfg: "LidarConfig | None" = None,
 ) -> list[tuple[float, float]]:
-    """Return list of (x, y) world-frame sheep position estimates."""
+    """Return list of (x, y) world-frame sheep position estimates.
+
+    Pass ``detection_cfg`` to override clustering/filtering thresholds, or
+    ``lidar_cfg`` to inform the function of a non-default FOV (the number of
+    rays and FOV are inferred from the length of ``ranges`` and
+    ``lidar_cfg.fov_rad`` respectively).
+    """
+    # Resolve parameters — fall back to module-level constants when no cfg.
+    if detection_cfg is not None:
+        gap_thr = detection_cfg.gap_threshold
+        max_span = detection_cfg.max_cluster_span
+        hit_eps = detection_cfg.range_hit_eps
+        split_gap = detection_cfg.split_range_gap
+        wall_rej = detection_cfg.wall_reject
+        static_rej = detection_cfg.static_reject
+    else:
+        gap_thr = GAP_THRESHOLD
+        max_span = MAX_CLUSTER_SPAN
+        hit_eps = RANGE_HIT_EPS
+        split_gap = SPLIT_RANGE_GAP
+        wall_rej = WALL_REJECT
+        static_rej = STATIC_REJECT
+
+    sheep_r = lidar_cfg.sheep_radius if lidar_cfg is not None else SHEEP_RADIUS
+    fov = lidar_cfg.fov_rad if lidar_cfg is not None else LIDAR_FOV
+    if lidar_cfg is not None:
+        max_range = lidar_cfg.max_range
+
    ranges = np.asarray(ranges, dtype=np.float32)
    n_rays = ranges.shape[0]
    if n_rays == 0:
        return []
-    angles = ray_angles(n_rays, LIDAR_FOV)
-    hit = ranges < max_range - RANGE_HIT_EPS
+    angles = ray_angles(n_rays, fov)
+    hit = ranges < max_range - hit_eps

    world_a = dog_heading + angles
    px = dog_x + ranges * np.cos(world_a)
@@ -159,7 +193,7 @@ def detections_from_scan(
            prev_xy = None
            continue
        pt = (float(px[i]), float(py[i]), float(ranges[i]))
-        if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > GAP_THRESHOLD:
+        if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > gap_thr:
            clusters.append(current)
            current = []
        current.append(pt)
@@ -174,7 +208,7 @@ def detections_from_scan(

        # Multi-peak splitting.
        if len(cluster) >= 4:
-            sub_clusters = _split_cluster_by_range(points_xy, range_vals)
+            sub_clusters = _split_cluster_by_range(points_xy, range_vals, split_gap)
        else:
            sub_clusters = [points_xy]

@@ -185,24 +219,24 @@ def detections_from_scan(
            ys = [p[1] for p in sub]
            cx, cy = sum(xs) / len(xs), sum(ys) / len(ys)
            span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
-            if span > MAX_CLUSTER_SPAN:
+            if span > max_span:
                continue
            # Rays hit the front edge of the sheep; offset outward by
-            # SHEEP_RADIUS along the dog→cluster direction.
+            # sheep_radius along the dog→cluster direction.
            dx, dy = cx - dog_x, cy - dog_y
            d = math.hypot(dx, dy)
            if d > 1e-3:
-                cx += SHEEP_RADIUS * dx / d
-                cy += SHEEP_RADIUS * dy / d
+                cx += sheep_r * dx / d
+                cy += sheep_r * dy / d
            in_main = _in_field_region(cx, cy)
            in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
                             GATE_Y - 1.0 < cy < GATE_Y + 0.2)
            if not (in_main or in_gate_strip):
                continue
-            if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT
+            if any(math.hypot(cx - fx, cy - fy) < static_rej
                   for fx, fy in _STATIC_FEATURES):
                continue
-            if _near_wall(cx, cy):
+            if _near_wall(cx, cy, wall_rej):
                continue
            detections.append((cx, cy))
    return detections
@@ -2,20 +2,25 @@

 Raycasts against sheep (discs) and static world geometry. For rectangular
 fields this is axis-aligned walls + gate posts; for round fields it is a
-circular wall + gate posts. The env reproduces the false-positive cluster
-distribution Webots produces from real 3D geometry.
+circular wall + gate posts.

-Returns a range array matching the Webots Lidar device:
-180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise.
-See ``protos/ShepherdDog.proto``.
+The module-level constants (``LIDAR_N_RAYS``, ``LIDAR_FOV``, etc.) reflect
+the original 360°/360-ray oracle configuration.  Pass a
+:class:`~herding.config.LidarConfig` to :func:`simulate_scan` to use a
+different spec (e.g. :data:`~herding.config.LIDAR_WEBOTS` for 180-ray/140°
+matching the ShepherdDog.proto hardware).
 """

 from __future__ import annotations

 import math
+from typing import TYPE_CHECKING

 import numpy as np

+if TYPE_CHECKING:
+    from herding.config import LidarConfig
+
 from herding.world.geometry import (
    FIELD_SHAPE, FIELD_ROUND_R,
    FIELD_X, FIELD_Y,
@@ -192,14 +197,30 @@ def simulate_scan(
    noise: float = LIDAR_NOISE,
    max_range: float = LIDAR_MAX_RANGE,
    rng: np.random.Generator | None = None,
+    lidar_cfg: "LidarConfig | None" = None,
 ) -> np.ndarray:
    """Return a (N,) float32 range array. No-hit entries equal ``max_range``.

    ``sheep_xy`` is every sheep (penned or active) in the scene.
+
+    Pass ``lidar_cfg`` to override the module-level defaults for a single
+    call (e.g. to use :data:`~herding.config.LIDAR_WEBOTS`).
    """
-    ch, sh = math.cos(dog_heading), math.sin(dog_heading)
-    cos_w = ch * _COS - sh * _SIN
-    sin_w = sh * _COS + ch * _SIN
+    if lidar_cfg is not None:
+        n_rays = lidar_cfg.n_rays
+        fov = lidar_cfg.fov_rad
+        max_range = lidar_cfg.max_range
+        noise = lidar_cfg.noise_std
+        sheep_r2 = lidar_cfg.sheep_radius ** 2
+        angles = ray_angles(n_rays, fov)
+        ch, sh = math.cos(dog_heading), math.sin(dog_heading)
+        cos_w = ch * np.cos(angles) - sh * np.sin(angles)
+        sin_w = sh * np.cos(angles) + ch * np.sin(angles)
+    else:
+        sheep_r2 = SHEEP_RADIUS ** 2
+        ch, sh = math.cos(dog_heading), math.sin(dog_heading)
+        cos_w = ch * _COS - sh * _SIN
+        sin_w = sh * _COS + ch * _SIN

    best = _raycast_static(dog_x, dog_y, cos_w, sin_w)

@@ -209,9 +230,8 @@ def simulate_scan(
        t = np.outer(sx, cos_w) + np.outer(sy, sin_w)
        s_dist2 = (sx ** 2 + sy ** 2)[:, None]
        perp2 = s_dist2 - t ** 2
-        R2 = SHEEP_RADIUS ** 2
-        hit = (perp2 < R2) & (t > 0.0)
-        half = np.sqrt(np.clip(R2 - perp2, 0.0, None))
+        hit = (perp2 < sheep_r2) & (t > 0.0)
+        half = np.sqrt(np.clip(sheep_r2 - perp2, 0.0, None))
        candidate = np.where(hit, t - half, np.inf)
        nearest = candidate.min(axis=0)
        np.minimum(best, nearest, out=best)
@@ -22,6 +22,10 @@ plane south (``is_penned_position``). Penned tracks are excluded from
 from __future__ import annotations

 import math
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from herding.config import TrackerConfig

 from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position

@@ -56,16 +60,21 @@ class Track:
        """Not-a-property in the hot loop — callers pass current step."""
        raise NotImplementedError

-    def predicted_position(self, current_step: int) -> tuple[float, float]:
+    def predicted_position(
+        self,
+        current_step: int,
+        predict_steps: int = PREDICT_STEPS,
+        velocity_clamp: float = VELOCITY_CLAMP,
+    ) -> tuple[float, float]:
        """Extrapolated position using constant velocity, clamped."""
        dt = current_step - self.last_seen
-        if dt <= 0 or dt > PREDICT_STEPS:
+        if dt <= 0 or dt > predict_steps:
            return self.x, self.y
        speed = math.hypot(self.vx, self.vy)
        if speed < 1e-4:
            return self.x, self.y
        # Clamp extrapolation distance.
-        max_d = VELOCITY_CLAMP * dt * 0.016  # steps → seconds
+        max_d = velocity_clamp * dt * 0.016  # steps → seconds
        d = min(speed * dt * 0.016, max_d)
        return (
            self.x + d * (self.vx / speed),
@@ -93,10 +102,36 @@ class SheepTracker:

    Each track is a :class:`Track` with position, velocity estimate,
    last-seen step, and penned flag.
+
+    Pass a :class:`~herding.config.TrackerConfig` to override any
+    module-level defaults without changing this file.
    """

-    def __init__(self, gate: float = GATE_M):
-        self.gate = gate
+    def __init__(
+        self,
+        gate: float = GATE_M,
+        tracker_cfg: "TrackerConfig | None" = None,
+    ):
+        if tracker_cfg is not None:
+            self.gate = tracker_cfg.gate_m
+            self._reacquire_gate = tracker_cfg.reacquire_gate_m
+            self._reacquire_min_age = tracker_cfg.reacquire_min_age
+            self._penned_gate = tracker_cfg.penned_gate_m
+            self._forget_steps = tracker_cfg.forget_steps
+            self._predict_steps = tracker_cfg.predict_steps
+            self._velocity_clamp = tracker_cfg.velocity_clamp
+            self._max_new_per_step = tracker_cfg.max_new_tracks_per_step
+            self._pen_latch_depth = tracker_cfg.pen_latch_depth
+        else:
+            self.gate = gate
+            self._reacquire_gate = REACQUIRE_GATE_M
+            self._reacquire_min_age = REACQUIRE_MIN_AGE
+            self._penned_gate = PENNED_GATE_M
+            self._forget_steps = FORGET_STEPS
+            self._predict_steps = PREDICT_STEPS
+            self._velocity_clamp = VELOCITY_CLAMP
+            self._max_new_per_step = MAX_ACTIVE_TRACKS
+            self._pen_latch_depth = 0.0
        self._tracks: dict[int, Track] = {}
        self._next_id = 0
        self.step = 0
@@ -119,8 +154,8 @@ class SheepTracker:
        active_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
        for tid in active_tids:
            track = self._tracks[tid]
-            # Use predicted position for matching.
-            tx, ty = track.predicted_position(self.step)
+            tx, ty = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
            best_j, best_d = -1, self.gate
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
@@ -140,10 +175,11 @@ class SheepTracker:
            if tid in updated_tids:
                continue
            track = self._tracks[tid]
-            if (self.step - track.last_seen) < REACQUIRE_MIN_AGE:
+            if (self.step - track.last_seen) < self._reacquire_min_age:
                continue
-            tx, ty = track.predicted_position(self.step)
-            best_j, best_d = -1, REACQUIRE_GATE_M
+            tx, ty = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
+            best_j, best_d = -1, self._reacquire_gate
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
                    continue
@@ -161,7 +197,7 @@ class SheepTracker:
        penned_tids = [tid for tid, t in self._tracks.items() if t.penned]
        for tid in penned_tids:
            track = self._tracks[tid]
-            best_j, best_d = -1, PENNED_GATE_M
+            best_j, best_d = -1, self._penned_gate
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
                    continue
@@ -174,25 +210,35 @@ class SheepTracker:
                track.update(dx, dy, self.step)
                det_used.add(best_j)

-        # Spawn new tracks for unmatched detections.
+        # Spawn new tracks for unmatched detections — rate-capped.
+        spawned = 0
        for j, (dx, dy) in enumerate(detections):
            if j in det_used:
                continue
-            penned = in_pen(dx, dy) or is_penned_position(dx, dy)
+            if spawned >= self._max_new_per_step:
+                break
+            penned = self._is_penned(dx, dy)
            self._tracks[self._next_id] = Track(dx, dy, self.step, penned)
            self._next_id += 1
+            spawned += 1

        # Promote active tracks whose current estimate crosses the gate.
        for track in self._tracks.values():
            if track.penned:
                continue
-            px, py = track.predicted_position(self.step)
-            if is_penned_position(px, py):
+            px, py = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
+            if self._is_penned(px, py):
                track.penned = True

-        # Forget stale active tracks; penned tracks live forever.
+        # Forget stale active tracks; penned tracks decay too but at a
+        # longer horizon (real penned sheep are still observed occasionally
+        # when the dog faces south; pure FPs at gate posts stop being
+        # detected once the dog drives away).
+        penned_forget = self._forget_steps * 8
        stale = [tid for tid, t in self._tracks.items()
-                 if not t.penned and (self.step - t.last_seen) > FORGET_STEPS]
+                 if (not t.penned and (self.step - t.last_seen) > self._forget_steps)
+                 or (t.penned and (self.step - t.last_seen) > penned_forget)]
        for tid in stale:
            del self._tracks[tid]

@@ -206,18 +252,42 @@ class SheepTracker:

        return self.get_positions()

-    def get_positions(self) -> dict[str, tuple[float, float]]:
+    def _is_penned(self, x: float, y: float) -> bool:
+        """Check whether a position should be considered penned.
+
+        Uses ``pen_latch_depth`` to require the position to be that many
+        metres past the gate line before latching.  Increasing the depth
+        prevents gate-area LiDAR false positives (gate hardware reflections
+        at y ≈ -15) from being permanently latched as penned tracks.
+        """
+        from herding.world.geometry import GATE_Y
+        # Apply depth threshold to both in_pen and is_penned_position so
+        # that any position in the gate column must clear GATE_Y - depth.
+        threshold = GATE_Y - self._pen_latch_depth
+        return (in_pen(x, y) or is_penned_position(x, y)) and y <= threshold
+
+    def get_positions(self, min_freshness: int | None = None) -> dict[str, tuple[float, float]]:
        """Active (not-penned) tracks as a ``{name: (x, y)}`` dict.

        For tracks currently being predicted (occluded but within
-        PREDICT_STEPS), returns the extrapolated position so the teacher
+        predict_steps), returns the extrapolated position so the teacher
        sees a smooth estimate.
+
+        ``min_freshness`` (optional, deploy-only): drop tracks whose
+        last_seen is older than ``step - min_freshness``. Real sheep in
+        FOV are detected nearly every step; phantom tracks from sporadic
+        Webots FPs stop being re-observed and decay. Default ``None``
+        preserves training behaviour (extrapolated tracks visible).
        """
        result = {}
        for tid, track in self._tracks.items():
            if track.penned:
                continue
-            px, py = track.predicted_position(self.step)
+            if (min_freshness is not None
+                    and self.step - track.last_seen > min_freshness):
+                continue
+            px, py = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
            result[f"t{tid}"] = (px, py)
        return result

@@ -234,4 +304,4 @@ class SheepTracker:
        """Number of active tracks currently being extrapolated (not directly observed)."""
        return sum(1 for t in self._tracks.values()
                   if not t.penned and (self.step - t.last_seen) > 0
-                   and (self.step - t.last_seen) <= PREDICT_STEPS)
+                   and (self.step - t.last_seen) <= self._predict_steps)