"""Active-perception wrapper for the analytic shepherding teachers. Under LiDAR (partial observability), the tracker starts empty — the dog hasn't seen any sheep yet. A naive Strömbom call returns ``(0, 0, "idle")`` and the dog stops. The student then learns "do nothing when the tracker is empty," which is a fatal local optimum. This wrapper replaces the idle case with a **scan action**: a unit vector 90° CCW from the dog's current forward direction. Passed through ``velocity_to_wheels`` it produces a fast in-place rotation (``cos(err)`` clamp drives forward speed to ~0 because the target is orthogonal to the heading). The dog spins for the first ``initial_scan_steps`` steps of every episode regardless of tracker state, and re-enters scan whenever the tracker goes empty mid-episode. Once enough sheep are tracked, control hands over to the underlying analytic teacher (Strömbom or Sequential), which now operates on a populated tracker dict. Both teacher and student see the same LiDAR-perceived view — there's no information asymmetry, so the student can in principle achieve the teacher's full performance. """ from __future__ import annotations import math from herding.control.modulation import modulate_speed_near_sheep INITIAL_SCAN_STEPS = 80 # ≈1.3 s at dt=16 ms — full rotation at the +π turn target. EXPLORE_SPEED = 0.7 # m/s-ish unit (action norm) used when walking blind # Debounce on tracker emptiness — a single empty frame between # detections is not enough reason to abandon the drive and start # scanning. Require this many consecutive empty frames first. EMPTY_DEBOUNCE_STEPS = 8 class ActiveScanTeacher: """Stateful wrapper. Construct one per episode; call ``reset()`` between episodes if reusing the instance. Call signature:: vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target) Note the extra ``dog_heading`` arg — required to compute the rotation direction. The base teachers (Strömbom, Sequential) don't use heading; we strip it before passing them through. """ def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS): self.base = base_action_fn self.initial_scan = int(initial_scan_steps) self.reset() def reset(self) -> None: self.step = 0 self.empty_streak = 0 self.last_action: tuple[float, float] = (0.0, 0.0) @staticmethod def _scan_action(dog_heading: float) -> tuple[float, float]: # Target = current_heading + π. velocity_to_wheels gets err=π, # so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and # cos(err) clamps the forward speed to ~0. Maximum in-place # rotation under this controller; one full rotation in ~60 steps. target = dog_heading + math.pi return math.cos(target), math.sin(target) @staticmethod def _explore_action(dog_xy) -> tuple[float, float]: """Walk back toward the field centre when nothing is in view. At difficulty=1 sheep can spawn up to ~18 m from origin while the LiDAR has a 12 m range, so an in-place scan from a corner can return zero hits. Walking toward (0, 0) shrinks the max-distance-to-any-sheep and the scanner cone sweeps along the path, eventually picking sheep up. """ dx, dy = -dog_xy[0], -dog_xy[1] d = math.hypot(dx, dy) if d < 0.5: # At the centre — fall through to a scan instead. return 0.0, 0.0 return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target): self.step += 1 n_visible = len(sheep_positions) # Track empty-streak for the explore debounce. if n_visible == 0: self.empty_streak += 1 else: self.empty_streak = 0 # Phase 1: opening rotation, regardless of tracker state. if self.step <= self.initial_scan: vx, vy = self._scan_action(dog_heading) self.last_action = (vx, vy) return vx, vy, "scan_initial" # Phase 2: tracker has been empty for a while — walk back to the # centre while the LiDAR keeps sweeping. The debounce prevents # this from firing every time the tracker briefly blinks to zero # (which causes the "dog starts going away from sheep" symptom). if self.empty_streak >= EMPTY_DEBOUNCE_STEPS: ex, ey = self._explore_action(dog_xy) if ex == 0.0 and ey == 0.0: vx, vy = self._scan_action(dog_heading) mode = "scan_at_centre" else: vx, vy = ex, ey mode = "explore" self.last_action = (vx, vy) return vx, vy, mode # Phase 2b: tracker just blinked empty for