Checkpoint 7

2026-05-11 12:21:51 +01:00
parent fce0e0c786
commit a01a5c9cef
34 changed files with 1266 additions and 1038 deletions
@@ -1,23 +1,19 @@
-"""Active-perception wrapper for the analytic shepherding teachers.
+"""Active-perception wrapper for the analytic shepherd teachers.

-Under LiDAR (partial observability), the tracker starts empty — the
-dog hasn't seen any sheep yet. A naive Strömbom call returns
-``(0, 0, "idle")`` and the dog stops. The student then learns "do
-nothing when the tracker is empty," which is a fatal local optimum.
+Under partial-observability LiDAR perception the tracker starts empty
+— a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
+stops. This wrapper interleaves the underlying teacher with two
+exploration behaviours:

-This wrapper replaces the idle case with a **scan action**: a unit
-vector 90° CCW from the dog's current forward direction. Passed
-through ``velocity_to_wheels`` it produces a fast in-place rotation
-(``cos(err)`` clamp drives forward speed to ~0 because the target is
-orthogonal to the heading). The dog spins for the first
-``initial_scan_steps`` steps of every episode regardless of tracker
-state, and re-enters scan whenever the tracker goes empty mid-episode.
+* opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
+  guaranteeing the LiDAR sweeps a full circle before driving;
+* walk-to-centre when the tracker has been empty for at least
+  ``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
+  beyond the 12 m LiDAR range).

-Once enough sheep are tracked, control hands over to the underlying
-analytic teacher (Strömbom or Sequential), which now operates on a
-populated tracker dict. Both teacher and student see the same
-LiDAR-perceived view — there's no information asymmetry, so the
-student can in principle achieve the teacher's full performance.
+When the tracker has detections the base teacher's action is used,
+post-processed by ``modulate_speed_near_sheep`` so the dog doesn't
+charge the flock.
 """

 from __future__ import annotations
@@ -27,26 +23,17 @@ import math
 from herding.control.modulation import modulate_speed_near_sheep


-INITIAL_SCAN_STEPS = 80    # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
-EXPLORE_SPEED = 0.7        # m/s-ish unit (action norm) used when walking blind
-
-# Debounce on tracker emptiness — a single empty frame between
-# detections is not enough reason to abandon the drive and start
-# scanning. Require this many consecutive empty frames first.
-EMPTY_DEBOUNCE_STEPS = 8
+INITIAL_SCAN_STEPS = 80         # ≈1.3 s — covers one full rotation
+EXPLORE_SPEED = 0.7             # action norm while walking blind
+EMPTY_DEBOUNCE_STEPS = 8        # consecutive empty frames before exploring


 class ActiveScanTeacher:
-    """Stateful wrapper. Construct one per episode; call ``reset()``
-    between episodes if reusing the instance.
+    """Stateful wrapper. Construct one per episode (or call ``reset``).

    Call signature::

        vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
-
-    Note the extra ``dog_heading`` arg — required to compute the
-    rotation direction. The base teachers (Strömbom, Sequential)
-    don't use heading; we strip it before passing them through.
    """

    def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
@@ -61,27 +48,17 @@ class ActiveScanTeacher:

    @staticmethod
    def _scan_action(dog_heading: float) -> tuple[float, float]:
-        # Target = current_heading + π. velocity_to_wheels gets err=π,
-        # so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
-        # cos(err) clamps the forward speed to ~0. Maximum in-place
-        # rotation under this controller; one full rotation in ~60 steps.
+        # Target opposite to current heading; velocity_to_wheels'
+        # cos(err) clamp drives forward speed to ~0 → in-place rotation.
        target = dog_heading + math.pi
        return math.cos(target), math.sin(target)

    @staticmethod
    def _explore_action(dog_xy) -> tuple[float, float]:
-        """Walk back toward the field centre when nothing is in view.
-
-        At difficulty=1 sheep can spawn up to ~18 m from origin while
-        the LiDAR has a 12 m range, so an in-place scan from a corner
-        can return zero hits. Walking toward (0, 0) shrinks the
-        max-distance-to-any-sheep and the scanner cone sweeps along
-        the path, eventually picking sheep up.
-        """
+        """Walk toward (0, 0) while the LiDAR keeps sweeping."""
        dx, dy = -dog_xy[0], -dog_xy[1]
        d = math.hypot(dx, dy)
        if d < 0.5:
-            # At the centre — fall through to a scan instead.
            return 0.0, 0.0
        return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d

@@ -89,22 +66,18 @@ class ActiveScanTeacher:
        self.step += 1
        n_visible = len(sheep_positions)

-        # Track empty-streak for the explore debounce.
        if n_visible == 0:
            self.empty_streak += 1
        else:
            self.empty_streak = 0

-        # Phase 1: opening rotation, regardless of tracker state.
+        # Phase 1: opening rotation.
        if self.step <= self.initial_scan:
            vx, vy = self._scan_action(dog_heading)
            self.last_action = (vx, vy)
            return vx, vy, "scan_initial"

-        # Phase 2: tracker has been empty for a while — walk back to the
-        # centre while the LiDAR keeps sweeping. The debounce prevents
-        # this from firing every time the tracker briefly blinks to zero
-        # (which causes the "dog starts going away from sheep" symptom).
+        # Phase 2: walk-to-centre after a sustained empty tracker.
        if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
            ex, ey = self._explore_action(dog_xy)
            if ex == 0.0 and ey == 0.0:
@@ -116,16 +89,13 @@ class ActiveScanTeacher:
            self.last_action = (vx, vy)
            return vx, vy, mode

-        # Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
-        # hold the previous action so the dog doesn't lurch.
+        # Phase 2b: brief tracker blink — hold the previous action.
        if n_visible == 0:
            vx, vy = self.last_action
            return vx, vy, "hold"

-        # Phase 3: hand to the underlying analytic teacher, then apply
-        # the shared near-sheep speed modulation (centralised in
-        # herding.control so the BC student, Strömbom, Sequential and
-        # the DAgger teacher all behave identically near sheep).
+        # Phase 3: hand off to the underlying analytic teacher, then
+        # apply the shared near-sheep speed modulation.
        vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
        self.last_action = (vx, vy)
@@ -1,8 +1,8 @@
-"""Shared low-level control helpers used by every dog mode.
+"""Shared action post-processing.

-Centralised here so the BC student, Strömbom, Sequential, and the DAgger
-teacher all apply identical post-processing to their action outputs.
-The downstream wheel-velocity layer (``herding.diffdrive``) is unchanged.
+Every dog mode routes its action through ``modulate_speed_near_sheep``
+so the magnitude is reduced near sheep — direction (intent) is
+preserved.
 """

 from __future__ import annotations
@@ -10,12 +10,8 @@ from __future__ import annotations
 import math


-# Speed-modulation: scale action magnitude down when close to the
-# nearest sheep. Stops the dog from charging in at full speed and
-# scattering the flock. Action norm linearly ramps from MIN_SPEED at
-# distance 0 to 1.0 at SLOW_NEAR_SHEEP.
-SLOW_NEAR_SHEEP = 2.5
-MIN_SPEED = 0.30
+SLOW_NEAR_SHEEP = 2.5  # m — distance below which action norm is scaled down
+MIN_SPEED = 0.30       # action norm at zero distance


 def modulate_speed_near_sheep(
@@ -25,16 +21,9 @@ def modulate_speed_near_sheep(
    slow_dist: float = SLOW_NEAR_SHEEP,
    min_scale: float = MIN_SPEED,
 ) -> tuple[float, float]:
-    """Scale (vx, vy) magnitude down when close to the nearest sheep.
-
-    ``sheep_positions`` accepts either a ``{name: (x, y)}`` dict
-    (matching what the trackers emit) or an iterable of ``(x, y)``
-    tuples. Empty input → action returned unchanged.
-
-    The intent direction is preserved; only magnitude is reduced. With
-    ``slow_dist=2.5`` and ``min_scale=0.3``, an action that started at
-    norm 1 is multiplied by 0.3 right next to a sheep, by 0.65 at 1 m
-    away, and by 1.0 once the nearest sheep is ≥ 2.5 m off.
+    """Linearly ramp action magnitude from ``min_scale`` at distance 0
+    to 1.0 at ``slow_dist``. ``sheep_positions`` may be a
+    ``{name: (x, y)}`` dict or an iterable of ``(x, y)`` tuples.
    """
    if not sheep_positions:
        return vx, vy
@@ -1,25 +1,9 @@
-"""Sequential single-target shepherd dog algorithm.
+"""Sequential "pin-and-push" shepherd-dog controller.

-Strömbom drives the flock's centre of mass; with N sheep and a narrow
-3 m gate, this fails because the flock is wider than the gate and CoM
-driving abandons stragglers. Real sheepdogs solve this differently:
-they pick *one* sheep at a time, drive it through, return for the next.
-
-This module implements that "pin-and-push" approach.
-
-Algorithm (one step):
-1. Active sheep = those still in the field (not yet penned).
-2. Target = the active sheep currently closest to the pen entry.
-3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
-   directly behind the target relative to the goal.
-4. Output unit vector pointing the dog at the drive position.
-
-Once the target crosses the gate it latches as penned and is removed
-from the active set; the next-closest unpenned sheep becomes the
-target. The algorithm naturally "queues" sheep through the gate.
-
-Empirically (with our flocking dynamics) this scales linearly with
-flock size and works up to at least n=10 within a 15 000-step budget.
+Single-target alternative to Strömbom: each step, target the sheep
+closest to the pen, park behind it, drive it through; once it latches
+penned the next-closest sheep becomes the target. Naturally queues
+the flock through a narrow gate.
 """

 import math
@@ -43,25 +27,17 @@ def _is_active(x, y) -> bool:


 def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Return ``(vx, vy, mode)`` where mode encodes the current target.
-
-    Compatible with the Strömbom call signature so it can be drop-in
-    swapped in the dog controller and the env's imitation reward.
-    """
+    """Return ``(vx, vy, mode)`` — same call signature as Strömbom."""
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"

-    # Pick target = sheep closest to pen entry. Stable choice: as one
-    # sheep approaches and crosses the gate it stays the target until
-    # latched; then the next-closest takes over.
    name, sx, sy = min(
        active,
        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
    )

-    # Drive position behind the target along the (target → pen) line.
    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
    tx = sx + DELTA_DRIVE * ux
    ty = sy + DELTA_DRIVE * uy
@@ -71,7 +47,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):


 def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Debug variant returning ``(vx, vy, mode, debug_dict)``."""
+    """``compute_action`` plus a debug dict (target, drive point)."""
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
@@ -1,30 +1,20 @@
-"""Strömbom collect/drive heuristic for the shepherd dog.
+"""Strömbom (2014) collect/drive heuristic for the shepherd dog.

-Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
-updated for the external pen layout. Used as a baseline controller and
-as the fallback when the RL policy isn't available.
+When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
+to a point behind the furthest sheep and pushes it back toward the
+flock CoM. Otherwise it drives, parking behind the CoM relative to
+the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.

-Reference: Strömbom et al. 2014, "Solving the shepherding problem".
+Reference: Strömbom et al. 2014, "Solving the shepherding problem."
 """

 import math

 from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen

-# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
-# the original (4.0 / 2.5) because the new external pen sits ~26 m from
-# typical sheep spawn locations — at the old 4 m standoff, the flee force
-# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
-# the path inside the 3000-step episode budget.
-#
-# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
-# the dog stays in *drive* mode much longer. With our tighter cohesion
-# (flocking_sim.py), partially-collected flocks consolidate naturally
-# during a drive, and we don't waste 80% of the time budget on a slow
-# "collect" pre-phase.
-F_FACTOR = 4.0
-DELTA_COLLECT = 1.5
-DELTA_DRIVE = 2.0
+F_FACTOR = 4.0       # collect/drive threshold scaled by √n
+DELTA_COLLECT = 1.5  # drive-position offset behind the furthest sheep
+DELTA_DRIVE = 2.0    # drive-position offset behind the flock CoM


 def _unit(x, y):
@@ -35,18 +25,12 @@ def _unit(x, y):


 def _is_active(x, y) -> bool:
-    """A sheep is "active" if it's still in the field — not in or below
-    the gate plane (we treat anything south of the gate as committed to
-    the pen and stop trying to herd it)."""
+    """A sheep still in the field counts; one south of the gate doesn't."""
    return (not in_pen(x, y)) and y > GATE_Y


 def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
-
-    ``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
-    Webots controller's representation).
-    """
+    """Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"
@@ -58,14 +42,14 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    radius = max(dists)

    if radius > F_FACTOR * math.sqrt(n):
-        # Collect: aim at a point behind the furthest sheep, opposite the CoM.
+        # Collect: aim behind the furthest sheep, opposite the CoM.
        idx = max(range(n), key=lambda i: dists[i])
        sx, sy = active[idx]
        ux, uy = _unit(sx - com_x, sy - com_y)
        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
        mode = "collect"
    else:
-        # Drive: aim at a point behind the flock CoM relative to the goal.
+        # Drive: aim behind the CoM, opposite the pen.
        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
        mode = "drive"
@@ -75,10 +59,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):


 def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Variant of compute_action that also returns a small debug dict.
-
-    Kept for parity with the legacy controller's CSV logger.
-    """
+    """``compute_action`` plus a small debug dict (CoM, target, radius)."""
    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle", {