"""Universal shepherd teacher — Strömbom core + mecanum omega + straggler recovery.

The core collect/drive logic is **identical** to :mod:`strombom` (same
``F_FACTOR``, ``DELTA_COLLECT``, ``DELTA_DRIVE`` thresholds and target
computation) so it inherits the proven ~100 % success rate at n ≤ 8.
Two additions make it useful as a universal teacher:

1. **Omega for mecanum.**  When ``drive_mode="mecanum"``, the teacher
   outputs a non-zero ``omega`` channel so the dog **faces the
   direction of travel**.  During collect the dog faces the target
   sheep; during drive it faces the pen.  This gives the BC student a
   real rotation signal to learn from.

2. **Last-straggler recovery.**  When exactly one sheep remains active
   and it is near the gate, the dog positions itself behind that
   straggler (opposite the gate) and pushes it straight through.  This
   handles the edge case where the last sheep circles the gate posts.

Call signature::

    vx, vy, omega, mode = compute_action(
        dog_xy, dog_heading, sheep_positions, pen_target,
        drive_mode="differential",
    )

For differential drive ``omega`` is always 0.0 and can be ignored.
"""

import math

from herding.world.geometry import (
    PEN_ENTRY, GATE_X, GATE_Y, in_pen,
)

# ---------------------------------------------------------------------------
# Tuning constants — match Strömbom exactly for proven success rates.
# ---------------------------------------------------------------------------

F_FACTOR = 4.0          # collect/drive threshold scaled by √n
DELTA_COLLECT = 1.5      # standoff behind the furthest sheep
DELTA_DRIVE = 2.0        # standoff behind flock CoM

# Omega gain for mecanum (how strongly the dog turns to face target)
OMEGA_GAIN = 0.6

# Recovery: push the last straggler straight through the gate.
RECOVERY_GATE_DIST = 6.0  # only when straggler is this close to gate centre
RECOVERY_PUSH_DIST = 1.2   # stand-off behind straggler, away from gate


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _unit(x, y):
    d = math.hypot(x, y)
    if d < 1e-6:
        return 0.0, 0.0
    return x / d, y / d


def _is_active(x, y) -> bool:
    return (not in_pen(x, y)) and y > GATE_Y


def _angle_diff(a, b):
    """Signed shortest angular difference a - b, in [-π, π]."""
    return math.atan2(math.sin(a - b), math.cos(a - b))


def _gate_center():
    """Centre of the gate opening."""
    return (0.5 * (GATE_X[0] + GATE_X[1]), GATE_Y)


# ---------------------------------------------------------------------------
# Core teacher
# ---------------------------------------------------------------------------

def compute_action(dog_xy, dog_heading, sheep_positions,
                   pen_target=PEN_ENTRY, drive_mode="differential"):
    """Return ``(vx, vy, omega, mode)``.

    Parameters
    ----------
    dog_xy : (float, float)
        Dog position in world frame.
    dog_heading : float
        Dog heading in world frame (rad), 0 = +x axis.
    sheep_positions : dict[str, (float, float)]
        Visible sheep positions.
    pen_target : (float, float)
        Centre of the pen gate (defaults to geometry.PEN_ENTRY).
    drive_mode : str
        ``"differential"`` or ``"mecanum"``.

    Returns
    -------
    vx, vy : float
        Velocity intent in [-1, 1].
    omega : float
        Yaw intent in [-1, 1] (0 for differential).
    mode : str
        Phase label: ``"idle"``, ``"collect"``, ``"drive"``, ``"recovery"``.
    """
    active = [(x, y) for (x, y) in sheep_positions.values()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, 0.0, "idle"

    n = len(active)
    com_x = sum(p[0] for p in active) / n
    com_y = sum(p[1] for p in active) / n
    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
    radius = max(dists)

    # ---- Last-straggler recovery (single sheep circling near gate) ----
    gc = _gate_center()
    if n == 1:
        sx, sy = active[0]
        d_to_gate = math.hypot(sx - gc[0], sy - gc[1])
        if d_to_gate < RECOVERY_GATE_DIST:
            dx_g = sx - gc[0]
            dy_g = sy - gc[1]
            d_g = math.hypot(dx_g, dy_g)
            if d_g > 0.3:
                ux, uy = dx_g / d_g, dy_g / d_g
            else:
                ux, uy = 0.0, 1.0
            tx = sx + RECOVERY_PUSH_DIST * ux
            ty = sy + RECOVERY_PUSH_DIST * uy
            ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
            mode = "recovery"
            face_target = (sx, sy)
            omega = 0.0
            if drive_mode == "mecanum":
                desired = math.atan2(
                    face_target[1] - dog_xy[1],
                    face_target[0] - dog_xy[0],
                )
                err = _angle_diff(desired, dog_heading)
                omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
            return ax, ay, omega, mode

    # ---- Standard Strömbom collect/drive (proven core) ----
    if radius > F_FACTOR * math.sqrt(n):
        # Collect: aim behind the furthest sheep, opposite the CoM.
        idx = max(range(n), key=lambda i: dists[i])
        sx, sy = active[idx]
        ux, uy = _unit(sx - com_x, sy - com_y)
        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
        mode = "collect"
        face_target = (sx, sy)
    else:
        # Drive: aim behind the CoM, opposite the pen.
        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
        mode = "drive"
        face_target = pen_target

    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])

    # ---- Omega (mecanum only) ----
    omega = 0.0
    if drive_mode == "mecanum" and mode != "idle":
        desired_heading = math.atan2(
            face_target[1] - dog_xy[1],
            face_target[0] - dog_xy[0],
        )
        err = _angle_diff(desired_heading, dog_heading)
        omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))

    return ax, ay, omega, mode


def compute_action_diff(dog_xy, dog_heading, sheep_positions,
                        pen_target=PEN_ENTRY):
    """Compatibility wrapper returning ``(vx, vy, mode)`` — same as Strömbom.

    Use this when plugging into existing differential-drive code that
    doesn't expect omega.
    """
    vx, vy, _omega, mode = compute_action(
        dog_xy, dog_heading, sheep_positions, pen_target,
        drive_mode="differential",
    )
    return vx, vy, mode