TIR_PROJ/herding/sequential.py

"""Sequential single-target shepherd dog algorithm.

Strömbom drives the flock's centre of mass; with N sheep and a narrow
3 m gate, this fails because the flock is wider than the gate and CoM
driving abandons stragglers. Real sheepdogs solve this differently:
they pick *one* sheep at a time, drive it through, return for the next.

This module implements that "pin-and-push" approach.

Algorithm (one step):
1. Active sheep = those still in the field (not yet penned).
2. Target = the active sheep currently closest to the pen entry.
3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
   directly behind the target relative to the goal.
4. Output unit vector pointing the dog at the drive position.

Once the target crosses the gate it latches as penned and is removed
from the active set; the next-closest unpenned sheep becomes the
target. The algorithm naturally "queues" sheep through the gate.

Empirically (with our flocking dynamics) this scales linearly with
flock size and works up to at least n=10 within a 15 000-step budget.
"""

import math

from herding.geometry import GATE_Y, PEN_ENTRY, in_pen


DELTA_DRIVE = 1.5     # standoff behind the target sheep
APPROACH_GAIN = 1.0   # action magnitude scale (1 = full speed)


def _unit(x, y):
    d = math.hypot(x, y)
    if d < 1e-6:
        return 0.0, 0.0
    return x / d, y / d


def _is_active(x, y) -> bool:
    return (not in_pen(x, y)) and y > GATE_Y


def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    """Return ``(vx, vy, mode)`` where mode encodes the current target.

    Compatible with the Strömbom call signature so it can be drop-in
    swapped in the dog controller and the env's imitation reward.
    """
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"

    # Pick target = sheep closest to pen entry. Stable choice: as one
    # sheep approaches and crosses the gate it stays the target until
    # latched; then the next-closest takes over.
    name, sx, sy = min(
        active,
        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
    )

    # Drive position behind the target along the (target → pen) line.
    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
    tx = sx + DELTA_DRIVE * ux
    ty = sy + DELTA_DRIVE * uy

    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"


def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    """Debug variant returning ``(vx, vy, mode, debug_dict)``."""
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle", {
            "n_active": 0, "target_name": "",
            "target_x": 0.0, "target_y": 0.0,
            "drive_x": dog_xy[0], "drive_y": dog_xy[1],
        }

    name, sx, sy = min(
        active,
        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
    )

    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
    tx = sx + DELTA_DRIVE * ux
    ty = sy + DELTA_DRIVE * uy
    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])

    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
        "n_active": len(active), "target_name": name,
        "target_x": sx, "target_y": sy,
        "drive_x": tx, "drive_y": ty,
    }