TIR_PROJ/herding/control/active_scan.py

"""Active-perception wrapper for the analytic shepherding teachers.

Under LiDAR (partial observability), the tracker starts empty — the
dog hasn't seen any sheep yet. A naive Strömbom call returns
``(0, 0, "idle")`` and the dog stops. The student then learns "do
nothing when the tracker is empty," which is a fatal local optimum.

This wrapper replaces the idle case with a **scan action**: a unit
vector 90° CCW from the dog's current forward direction. Passed
through ``velocity_to_wheels`` it produces a fast in-place rotation
(``cos(err)`` clamp drives forward speed to ~0 because the target is
orthogonal to the heading). The dog spins for the first
``initial_scan_steps`` steps of every episode regardless of tracker
state, and re-enters scan whenever the tracker goes empty mid-episode.

Once enough sheep are tracked, control hands over to the underlying
analytic teacher (Strömbom or Sequential), which now operates on a
populated tracker dict. Both teacher and student see the same
LiDAR-perceived view — there's no information asymmetry, so the
student can in principle achieve the teacher's full performance.
"""

from __future__ import annotations

import math

from herding.control.modulation import modulate_speed_near_sheep


INITIAL_SCAN_STEPS = 80    # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
EXPLORE_SPEED = 0.7        # m/s-ish unit (action norm) used when walking blind

# Debounce on tracker emptiness — a single empty frame between
# detections is not enough reason to abandon the drive and start
# scanning. Require this many consecutive empty frames first.
EMPTY_DEBOUNCE_STEPS = 8


class ActiveScanTeacher:
    """Stateful wrapper. Construct one per episode; call ``reset()``
    between episodes if reusing the instance.

    Call signature::

        vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)

    Note the extra ``dog_heading`` arg — required to compute the
    rotation direction. The base teachers (Strömbom, Sequential)
    don't use heading; we strip it before passing them through.
    """

    def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
        self.base = base_action_fn
        self.initial_scan = int(initial_scan_steps)
        self.reset()

    def reset(self) -> None:
        self.step = 0
        self.empty_streak = 0
        self.last_action: tuple[float, float] = (0.0, 0.0)

    @staticmethod
    def _scan_action(dog_heading: float) -> tuple[float, float]:
        # Target = current_heading + π. velocity_to_wheels gets err=π,
        # so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
        # cos(err) clamps the forward speed to ~0. Maximum in-place
        # rotation under this controller; one full rotation in ~60 steps.
        target = dog_heading + math.pi
        return math.cos(target), math.sin(target)

    @staticmethod
    def _explore_action(dog_xy) -> tuple[float, float]:
        """Walk back toward the field centre when nothing is in view.

        At difficulty=1 sheep can spawn up to ~18 m from origin while
        the LiDAR has a 12 m range, so an in-place scan from a corner
        can return zero hits. Walking toward (0, 0) shrinks the
        max-distance-to-any-sheep and the scanner cone sweeps along
        the path, eventually picking sheep up.
        """
        dx, dy = -dog_xy[0], -dog_xy[1]
        d = math.hypot(dx, dy)
        if d < 0.5:
            # At the centre — fall through to a scan instead.
            return 0.0, 0.0
        return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d

    def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target):
        self.step += 1
        n_visible = len(sheep_positions)

        # Track empty-streak for the explore debounce.
        if n_visible == 0:
            self.empty_streak += 1
        else:
            self.empty_streak = 0

        # Phase 1: opening rotation, regardless of tracker state.
        if self.step <= self.initial_scan:
            vx, vy = self._scan_action(dog_heading)
            self.last_action = (vx, vy)
            return vx, vy, "scan_initial"

        # Phase 2: tracker has been empty for a while — walk back to the
        # centre while the LiDAR keeps sweeping. The debounce prevents
        # this from firing every time the tracker briefly blinks to zero
        # (which causes the "dog starts going away from sheep" symptom).
        if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
            ex, ey = self._explore_action(dog_xy)
            if ex == 0.0 and ey == 0.0:
                vx, vy = self._scan_action(dog_heading)
                mode = "scan_at_centre"
            else:
                vx, vy = ex, ey
                mode = "explore"
            self.last_action = (vx, vy)
            return vx, vy, mode

        # Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
        # hold the previous action so the dog doesn't lurch.
        if n_visible == 0:
            vx, vy = self.last_action
            return vx, vy, "hold"

        # Phase 3: hand to the underlying analytic teacher, then apply
        # the shared near-sheep speed modulation (centralised in
        # herding.control so the BC student, Strömbom, Sequential and
        # the DAgger teacher all behave identically near sheep).
        vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
        self.last_action = (vx, vy)
        return vx, vy, mode