Checkpoint 8

2026-05-12 22:41:03 +01:00
parent a01a5c9cef
commit 5c2ee4bba5
31 changed files with 3189 additions and 380 deletions
@@ -33,7 +33,11 @@ class ActiveScanTeacher:

    Call signature::

-        vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
+        vx, vy, omega, mode = teacher(dog_xy, dog_heading, sheep_positions,
+                                      pen_target, drive_mode="differential")
+
+    ``omega`` is the yaw-rate intent (mecanum only); 0.0 for differential
+    drive and during blind exploration phases.
    """

    def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
@@ -62,7 +66,8 @@ class ActiveScanTeacher:
            return 0.0, 0.0
        return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d

-    def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target):
+    def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target,
+                 drive_mode="differential"):
        self.step += 1
        n_visible = len(sheep_positions)

@@ -75,7 +80,7 @@ class ActiveScanTeacher:
        if self.step <= self.initial_scan:
            vx, vy = self._scan_action(dog_heading)
            self.last_action = (vx, vy)
-            return vx, vy, "scan_initial"
+            return vx, vy, 0.0, "scan_initial"

        # Phase 2: walk-to-centre after a sustained empty tracker.
        if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
@@ -87,16 +92,31 @@ class ActiveScanTeacher:
                vx, vy = ex, ey
                mode = "explore"
            self.last_action = (vx, vy)
-            return vx, vy, mode
+            return vx, vy, 0.0, mode

        # Phase 2b: brief tracker blink — hold the previous action.
        if n_visible == 0:
            vx, vy = self.last_action
-            return vx, vy, "hold"
+            return vx, vy, 0.0, "hold"

        # Phase 3: hand off to the underlying analytic teacher, then
        # apply the shared near-sheep speed modulation.
-        vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
+        # Handle both old-style (dog_xy, sheep, pen) and new-style
+        # (dog_xy, heading, sheep, pen, drive_mode) teachers.
+        try:
+            result = self.base(dog_xy, dog_heading, sheep_positions,
+                               pen_target, drive_mode)
+        except TypeError:
+            try:
+                result = self.base(dog_xy, dog_heading, sheep_positions,
+                                   pen_target)
+            except TypeError:
+                result = self.base(dog_xy, sheep_positions, pen_target)
+        if len(result) == 4:
+            vx, vy, omega, mode = result
+        else:
+            vx, vy, mode = result
+            omega = 0.0
        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
        self.last_action = (vx, vy)
-        return vx, vy, mode
+        return vx, vy, omega, mode
@@ -0,0 +1,187 @@
+"""Universal shepherd teacher — Strömbom core + mecanum omega + straggler recovery.
+
+The core collect/drive logic is **identical** to :mod:`strombom` (same
+``F_FACTOR``, ``DELTA_COLLECT``, ``DELTA_DRIVE`` thresholds and target
+computation) so it inherits the proven ~100 % success rate at n ≤ 8.
+Two additions make it useful as a universal teacher:
+
+1. **Omega for mecanum.**  When ``drive_mode="mecanum"``, the teacher
+   outputs a non-zero ``omega`` channel so the dog **faces the
+   direction of travel**.  During collect the dog faces the target
+   sheep; during drive it faces the pen.  This gives the BC student a
+   real rotation signal to learn from.
+
+2. **Last-straggler recovery.**  When exactly one sheep remains active
+   and it is near the gate, the dog positions itself behind that
+   straggler (opposite the gate) and pushes it straight through.  This
+   handles the edge case where the last sheep circles the gate posts.
+
+Call signature::
+
+    vx, vy, omega, mode = compute_action(
+        dog_xy, dog_heading, sheep_positions, pen_target,
+        drive_mode="differential",
+    )
+
+For differential drive ``omega`` is always 0.0 and can be ignored.
+"""
+
+import math
+
+from herding.world.geometry import (
+    PEN_ENTRY, GATE_X, GATE_Y, in_pen,
+)
+
+# ---------------------------------------------------------------------------
+# Tuning constants — match Strömbom exactly for proven success rates.
+# ---------------------------------------------------------------------------
+
+F_FACTOR = 4.0          # collect/drive threshold scaled by √n
+DELTA_COLLECT = 1.5      # standoff behind the furthest sheep
+DELTA_DRIVE = 2.0        # standoff behind flock CoM
+
+# Omega gain for mecanum (how strongly the dog turns to face target)
+OMEGA_GAIN = 0.6
+
+# Recovery: push the last straggler straight through the gate.
+RECOVERY_GATE_DIST = 6.0  # only when straggler is this close to gate centre
+RECOVERY_PUSH_DIST = 1.2   # stand-off behind straggler, away from gate
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _unit(x, y):
+    d = math.hypot(x, y)
+    if d < 1e-6:
+        return 0.0, 0.0
+    return x / d, y / d
+
+
+def _is_active(x, y) -> bool:
+    return (not in_pen(x, y)) and y > GATE_Y
+
+
+def _angle_diff(a, b):
+    """Signed shortest angular difference a - b, in [-π, π]."""
+    return math.atan2(math.sin(a - b), math.cos(a - b))
+
+
+def _gate_center():
+    """Centre of the gate opening."""
+    return (0.5 * (GATE_X[0] + GATE_X[1]), GATE_Y)
+
+
+# ---------------------------------------------------------------------------
+# Core teacher
+# ---------------------------------------------------------------------------
+
+def compute_action(dog_xy, dog_heading, sheep_positions,
+                   pen_target=PEN_ENTRY, drive_mode="differential"):
+    """Return ``(vx, vy, omega, mode)``.
+
+    Parameters
+    ----------
+    dog_xy : (float, float)
+        Dog position in world frame.
+    dog_heading : float
+        Dog heading in world frame (rad), 0 = +x axis.
+    sheep_positions : dict[str, (float, float)]
+        Visible sheep positions.
+    pen_target : (float, float)
+        Centre of the pen gate (defaults to geometry.PEN_ENTRY).
+    drive_mode : str
+        ``"differential"`` or ``"mecanum"``.
+
+    Returns
+    -------
+    vx, vy : float
+        Velocity intent in [-1, 1].
+    omega : float
+        Yaw intent in [-1, 1] (0 for differential).
+    mode : str
+        Phase label: ``"idle"``, ``"collect"``, ``"drive"``, ``"recovery"``.
+    """
+    active = [(x, y) for (x, y) in sheep_positions.values()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, 0.0, "idle"
+
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)
+
+    # ---- Last-straggler recovery (single sheep circling near gate) ----
+    gc = _gate_center()
+    if n == 1:
+        sx, sy = active[0]
+        d_to_gate = math.hypot(sx - gc[0], sy - gc[1])
+        if d_to_gate < RECOVERY_GATE_DIST:
+            dx_g = sx - gc[0]
+            dy_g = sy - gc[1]
+            d_g = math.hypot(dx_g, dy_g)
+            if d_g > 0.3:
+                ux, uy = dx_g / d_g, dy_g / d_g
+            else:
+                ux, uy = 0.0, 1.0
+            tx = sx + RECOVERY_PUSH_DIST * ux
+            ty = sy + RECOVERY_PUSH_DIST * uy
+            ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+            mode = "recovery"
+            face_target = (sx, sy)
+            omega = 0.0
+            if drive_mode == "mecanum":
+                desired = math.atan2(
+                    face_target[1] - dog_xy[1],
+                    face_target[0] - dog_xy[0],
+                )
+                err = _angle_diff(desired, dog_heading)
+                omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
+            return ax, ay, omega, mode
+
+    # ---- Standard Strömbom collect/drive (proven core) ----
+    if radius > F_FACTOR * math.sqrt(n):
+        # Collect: aim behind the furthest sheep, opposite the CoM.
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+        face_target = (sx, sy)
+    else:
+        # Drive: aim behind the CoM, opposite the pen.
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"
+        face_target = pen_target
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+
+    # ---- Omega (mecanum only) ----
+    omega = 0.0
+    if drive_mode == "mecanum" and mode != "idle":
+        desired_heading = math.atan2(
+            face_target[1] - dog_xy[1],
+            face_target[0] - dog_xy[0],
+        )
+        err = _angle_diff(desired_heading, dog_heading)
+        omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
+
+    return ax, ay, omega, mode
+
+
+def compute_action_diff(dog_xy, dog_heading, sheep_positions,
+                        pen_target=PEN_ENTRY):
+    """Compatibility wrapper returning ``(vx, vy, mode)`` — same as Strömbom.
+
+    Use this when plugging into existing differential-drive code that
+    doesn't expect omega.
+    """
+    vx, vy, _omega, mode = compute_action(
+        dog_xy, dog_heading, sheep_positions, pen_target,
+        drive_mode="differential",
+    )
+    return vx, vy, mode