Checkpoint 7
This commit is contained in:
@@ -1,23 +1,19 @@
|
||||
"""Active-perception wrapper for the analytic shepherding teachers.
|
||||
"""Active-perception wrapper for the analytic shepherd teachers.
|
||||
|
||||
Under LiDAR (partial observability), the tracker starts empty — the
|
||||
dog hasn't seen any sheep yet. A naive Strömbom call returns
|
||||
``(0, 0, "idle")`` and the dog stops. The student then learns "do
|
||||
nothing when the tracker is empty," which is a fatal local optimum.
|
||||
Under partial-observability LiDAR perception the tracker starts empty
|
||||
— a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
|
||||
stops. This wrapper interleaves the underlying teacher with two
|
||||
exploration behaviours:
|
||||
|
||||
This wrapper replaces the idle case with a **scan action**: a unit
|
||||
vector 90° CCW from the dog's current forward direction. Passed
|
||||
through ``velocity_to_wheels`` it produces a fast in-place rotation
|
||||
(``cos(err)`` clamp drives forward speed to ~0 because the target is
|
||||
orthogonal to the heading). The dog spins for the first
|
||||
``initial_scan_steps`` steps of every episode regardless of tracker
|
||||
state, and re-enters scan whenever the tracker goes empty mid-episode.
|
||||
* opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
|
||||
guaranteeing the LiDAR sweeps a full circle before driving;
|
||||
* walk-to-centre when the tracker has been empty for at least
|
||||
``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
|
||||
beyond the 12 m LiDAR range).
|
||||
|
||||
Once enough sheep are tracked, control hands over to the underlying
|
||||
analytic teacher (Strömbom or Sequential), which now operates on a
|
||||
populated tracker dict. Both teacher and student see the same
|
||||
LiDAR-perceived view — there's no information asymmetry, so the
|
||||
student can in principle achieve the teacher's full performance.
|
||||
When the tracker has detections the base teacher's action is used,
|
||||
post-processed by ``modulate_speed_near_sheep`` so the dog doesn't
|
||||
charge the flock.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -27,26 +23,17 @@ import math
|
||||
from herding.control.modulation import modulate_speed_near_sheep
|
||||
|
||||
|
||||
INITIAL_SCAN_STEPS = 80 # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
|
||||
EXPLORE_SPEED = 0.7 # m/s-ish unit (action norm) used when walking blind
|
||||
|
||||
# Debounce on tracker emptiness — a single empty frame between
|
||||
# detections is not enough reason to abandon the drive and start
|
||||
# scanning. Require this many consecutive empty frames first.
|
||||
EMPTY_DEBOUNCE_STEPS = 8
|
||||
INITIAL_SCAN_STEPS = 80 # ≈1.3 s — covers one full rotation
|
||||
EXPLORE_SPEED = 0.7 # action norm while walking blind
|
||||
EMPTY_DEBOUNCE_STEPS = 8 # consecutive empty frames before exploring
|
||||
|
||||
|
||||
class ActiveScanTeacher:
|
||||
"""Stateful wrapper. Construct one per episode; call ``reset()``
|
||||
between episodes if reusing the instance.
|
||||
"""Stateful wrapper. Construct one per episode (or call ``reset``).
|
||||
|
||||
Call signature::
|
||||
|
||||
vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
|
||||
|
||||
Note the extra ``dog_heading`` arg — required to compute the
|
||||
rotation direction. The base teachers (Strömbom, Sequential)
|
||||
don't use heading; we strip it before passing them through.
|
||||
"""
|
||||
|
||||
def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
|
||||
@@ -61,27 +48,17 @@ class ActiveScanTeacher:
|
||||
|
||||
@staticmethod
|
||||
def _scan_action(dog_heading: float) -> tuple[float, float]:
|
||||
# Target = current_heading + π. velocity_to_wheels gets err=π,
|
||||
# so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
|
||||
# cos(err) clamps the forward speed to ~0. Maximum in-place
|
||||
# rotation under this controller; one full rotation in ~60 steps.
|
||||
# Target opposite to current heading; velocity_to_wheels'
|
||||
# cos(err) clamp drives forward speed to ~0 → in-place rotation.
|
||||
target = dog_heading + math.pi
|
||||
return math.cos(target), math.sin(target)
|
||||
|
||||
@staticmethod
|
||||
def _explore_action(dog_xy) -> tuple[float, float]:
|
||||
"""Walk back toward the field centre when nothing is in view.
|
||||
|
||||
At difficulty=1 sheep can spawn up to ~18 m from origin while
|
||||
the LiDAR has a 12 m range, so an in-place scan from a corner
|
||||
can return zero hits. Walking toward (0, 0) shrinks the
|
||||
max-distance-to-any-sheep and the scanner cone sweeps along
|
||||
the path, eventually picking sheep up.
|
||||
"""
|
||||
"""Walk toward (0, 0) while the LiDAR keeps sweeping."""
|
||||
dx, dy = -dog_xy[0], -dog_xy[1]
|
||||
d = math.hypot(dx, dy)
|
||||
if d < 0.5:
|
||||
# At the centre — fall through to a scan instead.
|
||||
return 0.0, 0.0
|
||||
return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
|
||||
|
||||
@@ -89,22 +66,18 @@ class ActiveScanTeacher:
|
||||
self.step += 1
|
||||
n_visible = len(sheep_positions)
|
||||
|
||||
# Track empty-streak for the explore debounce.
|
||||
if n_visible == 0:
|
||||
self.empty_streak += 1
|
||||
else:
|
||||
self.empty_streak = 0
|
||||
|
||||
# Phase 1: opening rotation, regardless of tracker state.
|
||||
# Phase 1: opening rotation.
|
||||
if self.step <= self.initial_scan:
|
||||
vx, vy = self._scan_action(dog_heading)
|
||||
self.last_action = (vx, vy)
|
||||
return vx, vy, "scan_initial"
|
||||
|
||||
# Phase 2: tracker has been empty for a while — walk back to the
|
||||
# centre while the LiDAR keeps sweeping. The debounce prevents
|
||||
# this from firing every time the tracker briefly blinks to zero
|
||||
# (which causes the "dog starts going away from sheep" symptom).
|
||||
# Phase 2: walk-to-centre after a sustained empty tracker.
|
||||
if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
|
||||
ex, ey = self._explore_action(dog_xy)
|
||||
if ex == 0.0 and ey == 0.0:
|
||||
@@ -116,16 +89,13 @@ class ActiveScanTeacher:
|
||||
self.last_action = (vx, vy)
|
||||
return vx, vy, mode
|
||||
|
||||
# Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
|
||||
# hold the previous action so the dog doesn't lurch.
|
||||
# Phase 2b: brief tracker blink — hold the previous action.
|
||||
if n_visible == 0:
|
||||
vx, vy = self.last_action
|
||||
return vx, vy, "hold"
|
||||
|
||||
# Phase 3: hand to the underlying analytic teacher, then apply
|
||||
# the shared near-sheep speed modulation (centralised in
|
||||
# herding.control so the BC student, Strömbom, Sequential and
|
||||
# the DAgger teacher all behave identically near sheep).
|
||||
# Phase 3: hand off to the underlying analytic teacher, then
|
||||
# apply the shared near-sheep speed modulation.
|
||||
vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
|
||||
vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
|
||||
self.last_action = (vx, vy)
|
||||
|
||||
Reference in New Issue
Block a user