Checkpoint 6

2026-05-11 10:35:48 +01:00
parent b457155538
commit fce0e0c786
27 changed files with 194 additions and 704 deletions
@@ -0,0 +1,132 @@
+"""Active-perception wrapper for the analytic shepherding teachers.
+
+Under LiDAR (partial observability), the tracker starts empty — the
+dog hasn't seen any sheep yet. A naive Strömbom call returns
+``(0, 0, "idle")`` and the dog stops. The student then learns "do
+nothing when the tracker is empty," which is a fatal local optimum.
+
+This wrapper replaces the idle case with a **scan action**: a unit
+vector 90° CCW from the dog's current forward direction. Passed
+through ``velocity_to_wheels`` it produces a fast in-place rotation
+(``cos(err)`` clamp drives forward speed to ~0 because the target is
+orthogonal to the heading). The dog spins for the first
+``initial_scan_steps`` steps of every episode regardless of tracker
+state, and re-enters scan whenever the tracker goes empty mid-episode.
+
+Once enough sheep are tracked, control hands over to the underlying
+analytic teacher (Strömbom or Sequential), which now operates on a
+populated tracker dict. Both teacher and student see the same
+LiDAR-perceived view — there's no information asymmetry, so the
+student can in principle achieve the teacher's full performance.
+"""
+
+from __future__ import annotations
+
+import math
+
+from herding.control.modulation import modulate_speed_near_sheep
+
+
+INITIAL_SCAN_STEPS = 80    # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
+EXPLORE_SPEED = 0.7        # m/s-ish unit (action norm) used when walking blind
+
+# Debounce on tracker emptiness — a single empty frame between
+# detections is not enough reason to abandon the drive and start
+# scanning. Require this many consecutive empty frames first.
+EMPTY_DEBOUNCE_STEPS = 8
+
+
+class ActiveScanTeacher:
+    """Stateful wrapper. Construct one per episode; call ``reset()``
+    between episodes if reusing the instance.
+
+    Call signature::
+
+        vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
+
+    Note the extra ``dog_heading`` arg — required to compute the
+    rotation direction. The base teachers (Strömbom, Sequential)
+    don't use heading; we strip it before passing them through.
+    """
+
+    def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
+        self.base = base_action_fn
+        self.initial_scan = int(initial_scan_steps)
+        self.reset()
+
+    def reset(self) -> None:
+        self.step = 0
+        self.empty_streak = 0
+        self.last_action: tuple[float, float] = (0.0, 0.0)
+
+    @staticmethod
+    def _scan_action(dog_heading: float) -> tuple[float, float]:
+        # Target = current_heading + π. velocity_to_wheels gets err=π,
+        # so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
+        # cos(err) clamps the forward speed to ~0. Maximum in-place
+        # rotation under this controller; one full rotation in ~60 steps.
+        target = dog_heading + math.pi
+        return math.cos(target), math.sin(target)
+
+    @staticmethod
+    def _explore_action(dog_xy) -> tuple[float, float]:
+        """Walk back toward the field centre when nothing is in view.
+
+        At difficulty=1 sheep can spawn up to ~18 m from origin while
+        the LiDAR has a 12 m range, so an in-place scan from a corner
+        can return zero hits. Walking toward (0, 0) shrinks the
+        max-distance-to-any-sheep and the scanner cone sweeps along
+        the path, eventually picking sheep up.
+        """
+        dx, dy = -dog_xy[0], -dog_xy[1]
+        d = math.hypot(dx, dy)
+        if d < 0.5:
+            # At the centre — fall through to a scan instead.
+            return 0.0, 0.0
+        return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
+
+    def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target):
+        self.step += 1
+        n_visible = len(sheep_positions)
+
+        # Track empty-streak for the explore debounce.
+        if n_visible == 0:
+            self.empty_streak += 1
+        else:
+            self.empty_streak = 0
+
+        # Phase 1: opening rotation, regardless of tracker state.
+        if self.step <= self.initial_scan:
+            vx, vy = self._scan_action(dog_heading)
+            self.last_action = (vx, vy)
+            return vx, vy, "scan_initial"
+
+        # Phase 2: tracker has been empty for a while — walk back to the
+        # centre while the LiDAR keeps sweeping. The debounce prevents
+        # this from firing every time the tracker briefly blinks to zero
+        # (which causes the "dog starts going away from sheep" symptom).
+        if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
+            ex, ey = self._explore_action(dog_xy)
+            if ex == 0.0 and ey == 0.0:
+                vx, vy = self._scan_action(dog_heading)
+                mode = "scan_at_centre"
+            else:
+                vx, vy = ex, ey
+                mode = "explore"
+            self.last_action = (vx, vy)
+            return vx, vy, mode
+
+        # Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
+        # hold the previous action so the dog doesn't lurch.
+        if n_visible == 0:
+            vx, vy = self.last_action
+            return vx, vy, "hold"
+
+        # Phase 3: hand to the underlying analytic teacher, then apply
+        # the shared near-sheep speed modulation (centralised in
+        # herding.control so the BC student, Strömbom, Sequential and
+        # the DAgger teacher all behave identically near sheep).
+        vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
+        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
+        self.last_action = (vx, vy)
+        return vx, vy, mode
@@ -0,0 +1,53 @@
+"""Shared low-level control helpers used by every dog mode.
+
+Centralised here so the BC student, Strömbom, Sequential, and the DAgger
+teacher all apply identical post-processing to their action outputs.
+The downstream wheel-velocity layer (``herding.diffdrive``) is unchanged.
+"""
+
+from __future__ import annotations
+
+import math
+
+
+# Speed-modulation: scale action magnitude down when close to the
+# nearest sheep. Stops the dog from charging in at full speed and
+# scattering the flock. Action norm linearly ramps from MIN_SPEED at
+# distance 0 to 1.0 at SLOW_NEAR_SHEEP.
+SLOW_NEAR_SHEEP = 2.5
+MIN_SPEED = 0.30
+
+
+def modulate_speed_near_sheep(
+    vx: float, vy: float,
+    dog_xy: tuple[float, float],
+    sheep_positions,
+    slow_dist: float = SLOW_NEAR_SHEEP,
+    min_scale: float = MIN_SPEED,
+) -> tuple[float, float]:
+    """Scale (vx, vy) magnitude down when close to the nearest sheep.
+
+    ``sheep_positions`` accepts either a ``{name: (x, y)}`` dict
+    (matching what the trackers emit) or an iterable of ``(x, y)``
+    tuples. Empty input → action returned unchanged.
+
+    The intent direction is preserved; only magnitude is reduced. With
+    ``slow_dist=2.5`` and ``min_scale=0.3``, an action that started at
+    norm 1 is multiplied by 0.3 right next to a sheep, by 0.65 at 1 m
+    away, and by 1.0 once the nearest sheep is ≥ 2.5 m off.
+    """
+    if not sheep_positions:
+        return vx, vy
+    if hasattr(sheep_positions, "values"):
+        positions = sheep_positions.values()
+    else:
+        positions = sheep_positions
+    nearest = float("inf")
+    for sx, sy in positions:
+        d = math.hypot(sx - dog_xy[0], sy - dog_xy[1])
+        if d < nearest:
+            nearest = d
+    if nearest >= slow_dist or nearest == float("inf"):
+        return vx, vy
+    scale = min_scale + (1.0 - min_scale) * (nearest / slow_dist)
+    return vx * scale, vy * scale
@@ -0,0 +1,98 @@
+"""Sequential single-target shepherd dog algorithm.
+
+Strömbom drives the flock's centre of mass; with N sheep and a narrow
+3 m gate, this fails because the flock is wider than the gate and CoM
+driving abandons stragglers. Real sheepdogs solve this differently:
+they pick *one* sheep at a time, drive it through, return for the next.
+
+This module implements that "pin-and-push" approach.
+
+Algorithm (one step):
+1. Active sheep = those still in the field (not yet penned).
+2. Target = the active sheep currently closest to the pen entry.
+3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
+   directly behind the target relative to the goal.
+4. Output unit vector pointing the dog at the drive position.
+
+Once the target crosses the gate it latches as penned and is removed
+from the active set; the next-closest unpenned sheep becomes the
+target. The algorithm naturally "queues" sheep through the gate.
+
+Empirically (with our flocking dynamics) this scales linearly with
+flock size and works up to at least n=10 within a 15 000-step budget.
+"""
+
+import math
+
+from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen
+
+
+DELTA_DRIVE = 1.5     # standoff behind the target sheep
+APPROACH_GAIN = 1.0   # action magnitude scale (1 = full speed)
+
+
+def _unit(x, y):
+    d = math.hypot(x, y)
+    if d < 1e-6:
+        return 0.0, 0.0
+    return x / d, y / d
+
+
+def _is_active(x, y) -> bool:
+    return (not in_pen(x, y)) and y > GATE_Y
+
+
+def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Return ``(vx, vy, mode)`` where mode encodes the current target.
+
+    Compatible with the Strömbom call signature so it can be drop-in
+    swapped in the dog controller and the env's imitation reward.
+    """
+    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle"
+
+    # Pick target = sheep closest to pen entry. Stable choice: as one
+    # sheep approaches and crosses the gate it stays the target until
+    # latched; then the next-closest takes over.
+    name, sx, sy = min(
+        active,
+        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
+    )
+
+    # Drive position behind the target along the (target → pen) line.
+    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
+    tx = sx + DELTA_DRIVE * ux
+    ty = sy + DELTA_DRIVE * uy
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
+
+
+def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Debug variant returning ``(vx, vy, mode, debug_dict)``."""
+    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle", {
+            "n_active": 0, "target_name": "",
+            "target_x": 0.0, "target_y": 0.0,
+            "drive_x": dog_xy[0], "drive_y": dog_xy[1],
+        }
+
+    name, sx, sy = min(
+        active,
+        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
+    )
+
+    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
+    tx = sx + DELTA_DRIVE * ux
+    ty = sy + DELTA_DRIVE * uy
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+
+    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
+        "n_active": len(active), "target_name": name,
+        "target_x": sx, "target_y": sy,
+        "drive_x": tx, "drive_y": ty,
+    }
@@ -0,0 +1,114 @@
+"""Strömbom collect/drive heuristic for the shepherd dog.
+
+Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
+updated for the external pen layout. Used as a baseline controller and
+as the fallback when the RL policy isn't available.
+
+Reference: Strömbom et al. 2014, "Solving the shepherding problem".
+"""
+
+import math
+
+from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen
+
+# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
+# the original (4.0 / 2.5) because the new external pen sits ~26 m from
+# typical sheep spawn locations — at the old 4 m standoff, the flee force
+# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
+# the path inside the 3000-step episode budget.
+#
+# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
+# the dog stays in *drive* mode much longer. With our tighter cohesion
+# (flocking_sim.py), partially-collected flocks consolidate naturally
+# during a drive, and we don't waste 80% of the time budget on a slow
+# "collect" pre-phase.
+F_FACTOR = 4.0
+DELTA_COLLECT = 1.5
+DELTA_DRIVE = 2.0
+
+
+def _unit(x, y):
+    d = math.hypot(x, y)
+    if d < 1e-6:
+        return 0.0, 0.0
+    return x / d, y / d
+
+
+def _is_active(x, y) -> bool:
+    """A sheep is "active" if it's still in the field — not in or below
+    the gate plane (we treat anything south of the gate as committed to
+    the pen and stop trying to herd it)."""
+    return (not in_pen(x, y)) and y > GATE_Y
+
+
+def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
+
+    ``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
+    Webots controller's representation).
+    """
+    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle"
+
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)
+
+    if radius > F_FACTOR * math.sqrt(n):
+        # Collect: aim at a point behind the furthest sheep, opposite the CoM.
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+    else:
+        # Drive: aim at a point behind the flock CoM relative to the goal.
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    return ax, ay, mode
+
+
+def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Variant of compute_action that also returns a small debug dict.
+
+    Kept for parity with the legacy controller's CSV logger.
+    """
+    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle", {
+            "n_active": 0, "radius": 0.0, "threshold": 0.0,
+            "com_x": 0.0, "com_y": 0.0,
+            "target_x": dog_xy[0], "target_y": dog_xy[1],
+        }
+
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)
+    threshold = F_FACTOR * math.sqrt(n)
+
+    if radius > threshold:
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+    else:
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    dbg = {
+        "n_active": n, "radius": radius, "threshold": threshold,
+        "com_x": com_x, "com_y": com_y,
+        "target_x": tx, "target_y": ty,
+    }
+    return ax, ay, mode, dbg