Checkpoint 6

This commit is contained in:
Johnny Fernandes
2026-05-11 10:35:48 +01:00
parent b457155538
commit fce0e0c786
27 changed files with 194 additions and 704 deletions
View File
+132
View File
@@ -0,0 +1,132 @@
"""Active-perception wrapper for the analytic shepherding teachers.
Under LiDAR (partial observability), the tracker starts empty — the
dog hasn't seen any sheep yet. A naive Strömbom call returns
``(0, 0, "idle")`` and the dog stops. The student then learns "do
nothing when the tracker is empty," which is a fatal local optimum.
This wrapper replaces the idle case with a **scan action**: a unit
vector 90° CCW from the dog's current forward direction. Passed
through ``velocity_to_wheels`` it produces a fast in-place rotation
(``cos(err)`` clamp drives forward speed to ~0 because the target is
orthogonal to the heading). The dog spins for the first
``initial_scan_steps`` steps of every episode regardless of tracker
state, and re-enters scan whenever the tracker goes empty mid-episode.
Once enough sheep are tracked, control hands over to the underlying
analytic teacher (Strömbom or Sequential), which now operates on a
populated tracker dict. Both teacher and student see the same
LiDAR-perceived view — there's no information asymmetry, so the
student can in principle achieve the teacher's full performance.
"""
from __future__ import annotations
import math
from herding.control.modulation import modulate_speed_near_sheep
INITIAL_SCAN_STEPS = 80 # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
EXPLORE_SPEED = 0.7 # m/s-ish unit (action norm) used when walking blind
# Debounce on tracker emptiness — a single empty frame between
# detections is not enough reason to abandon the drive and start
# scanning. Require this many consecutive empty frames first.
EMPTY_DEBOUNCE_STEPS = 8
class ActiveScanTeacher:
"""Stateful wrapper. Construct one per episode; call ``reset()``
between episodes if reusing the instance.
Call signature::
vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
Note the extra ``dog_heading`` arg — required to compute the
rotation direction. The base teachers (Strömbom, Sequential)
don't use heading; we strip it before passing them through.
"""
def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
self.base = base_action_fn
self.initial_scan = int(initial_scan_steps)
self.reset()
def reset(self) -> None:
self.step = 0
self.empty_streak = 0
self.last_action: tuple[float, float] = (0.0, 0.0)
@staticmethod
def _scan_action(dog_heading: float) -> tuple[float, float]:
# Target = current_heading + π. velocity_to_wheels gets err=π,
# so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
# cos(err) clamps the forward speed to ~0. Maximum in-place
# rotation under this controller; one full rotation in ~60 steps.
target = dog_heading + math.pi
return math.cos(target), math.sin(target)
@staticmethod
def _explore_action(dog_xy) -> tuple[float, float]:
"""Walk back toward the field centre when nothing is in view.
At difficulty=1 sheep can spawn up to ~18 m from origin while
the LiDAR has a 12 m range, so an in-place scan from a corner
can return zero hits. Walking toward (0, 0) shrinks the
max-distance-to-any-sheep and the scanner cone sweeps along
the path, eventually picking sheep up.
"""
dx, dy = -dog_xy[0], -dog_xy[1]
d = math.hypot(dx, dy)
if d < 0.5:
# At the centre — fall through to a scan instead.
return 0.0, 0.0
return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target):
self.step += 1
n_visible = len(sheep_positions)
# Track empty-streak for the explore debounce.
if n_visible == 0:
self.empty_streak += 1
else:
self.empty_streak = 0
# Phase 1: opening rotation, regardless of tracker state.
if self.step <= self.initial_scan:
vx, vy = self._scan_action(dog_heading)
self.last_action = (vx, vy)
return vx, vy, "scan_initial"
# Phase 2: tracker has been empty for a while — walk back to the
# centre while the LiDAR keeps sweeping. The debounce prevents
# this from firing every time the tracker briefly blinks to zero
# (which causes the "dog starts going away from sheep" symptom).
if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
ex, ey = self._explore_action(dog_xy)
if ex == 0.0 and ey == 0.0:
vx, vy = self._scan_action(dog_heading)
mode = "scan_at_centre"
else:
vx, vy = ex, ey
mode = "explore"
self.last_action = (vx, vy)
return vx, vy, mode
# Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
# hold the previous action so the dog doesn't lurch.
if n_visible == 0:
vx, vy = self.last_action
return vx, vy, "hold"
# Phase 3: hand to the underlying analytic teacher, then apply
# the shared near-sheep speed modulation (centralised in
# herding.control so the BC student, Strömbom, Sequential and
# the DAgger teacher all behave identically near sheep).
vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
self.last_action = (vx, vy)
return vx, vy, mode
+53
View File
@@ -0,0 +1,53 @@
"""Shared low-level control helpers used by every dog mode.
Centralised here so the BC student, Strömbom, Sequential, and the DAgger
teacher all apply identical post-processing to their action outputs.
The downstream wheel-velocity layer (``herding.diffdrive``) is unchanged.
"""
from __future__ import annotations
import math
# Speed-modulation: scale action magnitude down when close to the
# nearest sheep. Stops the dog from charging in at full speed and
# scattering the flock. Action norm linearly ramps from MIN_SPEED at
# distance 0 to 1.0 at SLOW_NEAR_SHEEP.
SLOW_NEAR_SHEEP = 2.5
MIN_SPEED = 0.30
def modulate_speed_near_sheep(
vx: float, vy: float,
dog_xy: tuple[float, float],
sheep_positions,
slow_dist: float = SLOW_NEAR_SHEEP,
min_scale: float = MIN_SPEED,
) -> tuple[float, float]:
"""Scale (vx, vy) magnitude down when close to the nearest sheep.
``sheep_positions`` accepts either a ``{name: (x, y)}`` dict
(matching what the trackers emit) or an iterable of ``(x, y)``
tuples. Empty input → action returned unchanged.
The intent direction is preserved; only magnitude is reduced. With
``slow_dist=2.5`` and ``min_scale=0.3``, an action that started at
norm 1 is multiplied by 0.3 right next to a sheep, by 0.65 at 1 m
away, and by 1.0 once the nearest sheep is ≥ 2.5 m off.
"""
if not sheep_positions:
return vx, vy
if hasattr(sheep_positions, "values"):
positions = sheep_positions.values()
else:
positions = sheep_positions
nearest = float("inf")
for sx, sy in positions:
d = math.hypot(sx - dog_xy[0], sy - dog_xy[1])
if d < nearest:
nearest = d
if nearest >= slow_dist or nearest == float("inf"):
return vx, vy
scale = min_scale + (1.0 - min_scale) * (nearest / slow_dist)
return vx * scale, vy * scale
+98
View File
@@ -0,0 +1,98 @@
"""Sequential single-target shepherd dog algorithm.
Strömbom drives the flock's centre of mass; with N sheep and a narrow
3 m gate, this fails because the flock is wider than the gate and CoM
driving abandons stragglers. Real sheepdogs solve this differently:
they pick *one* sheep at a time, drive it through, return for the next.
This module implements that "pin-and-push" approach.
Algorithm (one step):
1. Active sheep = those still in the field (not yet penned).
2. Target = the active sheep currently closest to the pen entry.
3. Drive position = ``target + Δ · unit(target pen_entry)`` —
directly behind the target relative to the goal.
4. Output unit vector pointing the dog at the drive position.
Once the target crosses the gate it latches as penned and is removed
from the active set; the next-closest unpenned sheep becomes the
target. The algorithm naturally "queues" sheep through the gate.
Empirically (with our flocking dynamics) this scales linearly with
flock size and works up to at least n=10 within a 15 000-step budget.
"""
import math
from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen
DELTA_DRIVE = 1.5 # standoff behind the target sheep
APPROACH_GAIN = 1.0 # action magnitude scale (1 = full speed)
def _unit(x, y):
d = math.hypot(x, y)
if d < 1e-6:
return 0.0, 0.0
return x / d, y / d
def _is_active(x, y) -> bool:
return (not in_pen(x, y)) and y > GATE_Y
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Return ``(vx, vy, mode)`` where mode encodes the current target.
Compatible with the Strömbom call signature so it can be drop-in
swapped in the dog controller and the env's imitation reward.
"""
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
if _is_active(x, y)]
if not active:
return 0.0, 0.0, "idle"
# Pick target = sheep closest to pen entry. Stable choice: as one
# sheep approaches and crosses the gate it stays the target until
# latched; then the next-closest takes over.
name, sx, sy = min(
active,
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
)
# Drive position behind the target along the (target → pen) line.
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
tx = sx + DELTA_DRIVE * ux
ty = sy + DELTA_DRIVE * uy
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Debug variant returning ``(vx, vy, mode, debug_dict)``."""
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
if _is_active(x, y)]
if not active:
return 0.0, 0.0, "idle", {
"n_active": 0, "target_name": "",
"target_x": 0.0, "target_y": 0.0,
"drive_x": dog_xy[0], "drive_y": dog_xy[1],
}
name, sx, sy = min(
active,
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
)
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
tx = sx + DELTA_DRIVE * ux
ty = sy + DELTA_DRIVE * uy
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
"n_active": len(active), "target_name": name,
"target_x": sx, "target_y": sy,
"drive_x": tx, "drive_y": ty,
}
+114
View File
@@ -0,0 +1,114 @@
"""Strömbom collect/drive heuristic for the shepherd dog.
Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
updated for the external pen layout. Used as a baseline controller and
as the fallback when the RL policy isn't available.
Reference: Strömbom et al. 2014, "Solving the shepherding problem".
"""
import math
from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen
# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
# the original (4.0 / 2.5) because the new external pen sits ~26 m from
# typical sheep spawn locations — at the old 4 m standoff, the flee force
# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
# the path inside the 3000-step episode budget.
#
# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
# the dog stays in *drive* mode much longer. With our tighter cohesion
# (flocking_sim.py), partially-collected flocks consolidate naturally
# during a drive, and we don't waste 80% of the time budget on a slow
# "collect" pre-phase.
F_FACTOR = 4.0
DELTA_COLLECT = 1.5
DELTA_DRIVE = 2.0
def _unit(x, y):
d = math.hypot(x, y)
if d < 1e-6:
return 0.0, 0.0
return x / d, y / d
def _is_active(x, y) -> bool:
"""A sheep is "active" if it's still in the field — not in or below
the gate plane (we treat anything south of the gate as committed to
the pen and stop trying to herd it)."""
return (not in_pen(x, y)) and y > GATE_Y
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
Webots controller's representation).
"""
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
if not active:
return 0.0, 0.0, "idle"
n = len(active)
com_x = sum(p[0] for p in active) / n
com_y = sum(p[1] for p in active) / n
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
radius = max(dists)
if radius > F_FACTOR * math.sqrt(n):
# Collect: aim at a point behind the furthest sheep, opposite the CoM.
idx = max(range(n), key=lambda i: dists[i])
sx, sy = active[idx]
ux, uy = _unit(sx - com_x, sy - com_y)
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
mode = "collect"
else:
# Drive: aim at a point behind the flock CoM relative to the goal.
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
mode = "drive"
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
return ax, ay, mode
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Variant of compute_action that also returns a small debug dict.
Kept for parity with the legacy controller's CSV logger.
"""
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
if not active:
return 0.0, 0.0, "idle", {
"n_active": 0, "radius": 0.0, "threshold": 0.0,
"com_x": 0.0, "com_y": 0.0,
"target_x": dog_xy[0], "target_y": dog_xy[1],
}
n = len(active)
com_x = sum(p[0] for p in active) / n
com_y = sum(p[1] for p in active) / n
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
radius = max(dists)
threshold = F_FACTOR * math.sqrt(n)
if radius > threshold:
idx = max(range(n), key=lambda i: dists[i])
sx, sy = active[idx]
ux, uy = _unit(sx - com_x, sy - com_y)
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
mode = "collect"
else:
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
mode = "drive"
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
dbg = {
"n_active": n, "radius": radius, "threshold": threshold,
"com_x": com_x, "com_y": com_y,
"target_x": tx, "target_y": ty,
}
return ax, ay, mode, dbg