Checkpoint 2

2026-05-07 22:00:10 +01:00
parent 90aa3bbcb4
commit 1bb9415414
37 changed files with 3068 additions and 2912 deletions
@@ -0,0 +1,98 @@
+"""Sequential single-target shepherd dog algorithm.
+
+Strömbom drives the flock's centre of mass; with N sheep and a narrow
+3 m gate, this fails because the flock is wider than the gate and CoM
+driving abandons stragglers. Real sheepdogs solve this differently:
+they pick *one* sheep at a time, drive it through, return for the next.
+
+This module implements that "pin-and-push" approach.
+
+Algorithm (one step):
+1. Active sheep = those still in the field (not yet penned).
+2. Target = the active sheep currently closest to the pen entry.
+3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
+   directly behind the target relative to the goal.
+4. Output unit vector pointing the dog at the drive position.
+
+Once the target crosses the gate it latches as penned and is removed
+from the active set; the next-closest unpenned sheep becomes the
+target. The algorithm naturally "queues" sheep through the gate.
+
+Empirically (with our flocking dynamics) this scales linearly with
+flock size and works up to at least n=10 within a 15 000-step budget.
+"""
+
+import math
+
+from herding.geometry import GATE_Y, PEN_ENTRY, in_pen
+
+
+DELTA_DRIVE = 1.5     # standoff behind the target sheep
+APPROACH_GAIN = 1.0   # action magnitude scale (1 = full speed)
+
+
+def _unit(x, y):
+    d = math.hypot(x, y)
+    if d < 1e-6:
+        return 0.0, 0.0
+    return x / d, y / d
+
+
+def _is_active(x, y) -> bool:
+    return (not in_pen(x, y)) and y > GATE_Y
+
+
+def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Return ``(vx, vy, mode)`` where mode encodes the current target.
+
+    Compatible with the Strömbom call signature so it can be drop-in
+    swapped in the dog controller and the env's imitation reward.
+    """
+    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle"
+
+    # Pick target = sheep closest to pen entry. Stable choice: as one
+    # sheep approaches and crosses the gate it stays the target until
+    # latched; then the next-closest takes over.
+    name, sx, sy = min(
+        active,
+        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
+    )
+
+    # Drive position behind the target along the (target → pen) line.
+    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
+    tx = sx + DELTA_DRIVE * ux
+    ty = sy + DELTA_DRIVE * uy
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
+
+
+def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Debug variant returning ``(vx, vy, mode, debug_dict)``."""
+    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle", {
+            "n_active": 0, "target_name": "",
+            "target_x": 0.0, "target_y": 0.0,
+            "drive_x": dog_xy[0], "drive_y": dog_xy[1],
+        }
+
+    name, sx, sy = min(
+        active,
+        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
+    )
+
+    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
+    tx = sx + DELTA_DRIVE * ux
+    ty = sy + DELTA_DRIVE * uy
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+
+    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
+        "n_active": len(active), "target_name": name,
+        "target_x": sx, "target_y": sy,
+        "drive_x": tx, "drive_y": ty,
+    }