Checkpoint 7
This commit is contained in:
@@ -1,23 +1,19 @@
|
||||
"""Active-perception wrapper for the analytic shepherding teachers.
|
||||
"""Active-perception wrapper for the analytic shepherd teachers.
|
||||
|
||||
Under LiDAR (partial observability), the tracker starts empty — the
|
||||
dog hasn't seen any sheep yet. A naive Strömbom call returns
|
||||
``(0, 0, "idle")`` and the dog stops. The student then learns "do
|
||||
nothing when the tracker is empty," which is a fatal local optimum.
|
||||
Under partial-observability LiDAR perception the tracker starts empty
|
||||
— a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
|
||||
stops. This wrapper interleaves the underlying teacher with two
|
||||
exploration behaviours:
|
||||
|
||||
This wrapper replaces the idle case with a **scan action**: a unit
|
||||
vector 90° CCW from the dog's current forward direction. Passed
|
||||
through ``velocity_to_wheels`` it produces a fast in-place rotation
|
||||
(``cos(err)`` clamp drives forward speed to ~0 because the target is
|
||||
orthogonal to the heading). The dog spins for the first
|
||||
``initial_scan_steps`` steps of every episode regardless of tracker
|
||||
state, and re-enters scan whenever the tracker goes empty mid-episode.
|
||||
* opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
|
||||
guaranteeing the LiDAR sweeps a full circle before driving;
|
||||
* walk-to-centre when the tracker has been empty for at least
|
||||
``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
|
||||
beyond the 12 m LiDAR range).
|
||||
|
||||
Once enough sheep are tracked, control hands over to the underlying
|
||||
analytic teacher (Strömbom or Sequential), which now operates on a
|
||||
populated tracker dict. Both teacher and student see the same
|
||||
LiDAR-perceived view — there's no information asymmetry, so the
|
||||
student can in principle achieve the teacher's full performance.
|
||||
When the tracker has detections the base teacher's action is used,
|
||||
post-processed by ``modulate_speed_near_sheep`` so the dog doesn't
|
||||
charge the flock.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -27,26 +23,17 @@ import math
|
||||
from herding.control.modulation import modulate_speed_near_sheep
|
||||
|
||||
|
||||
INITIAL_SCAN_STEPS = 80 # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
|
||||
EXPLORE_SPEED = 0.7 # m/s-ish unit (action norm) used when walking blind
|
||||
|
||||
# Debounce on tracker emptiness — a single empty frame between
|
||||
# detections is not enough reason to abandon the drive and start
|
||||
# scanning. Require this many consecutive empty frames first.
|
||||
EMPTY_DEBOUNCE_STEPS = 8
|
||||
INITIAL_SCAN_STEPS = 80 # ≈1.3 s — covers one full rotation
|
||||
EXPLORE_SPEED = 0.7 # action norm while walking blind
|
||||
EMPTY_DEBOUNCE_STEPS = 8 # consecutive empty frames before exploring
|
||||
|
||||
|
||||
class ActiveScanTeacher:
|
||||
"""Stateful wrapper. Construct one per episode; call ``reset()``
|
||||
between episodes if reusing the instance.
|
||||
"""Stateful wrapper. Construct one per episode (or call ``reset``).
|
||||
|
||||
Call signature::
|
||||
|
||||
vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
|
||||
|
||||
Note the extra ``dog_heading`` arg — required to compute the
|
||||
rotation direction. The base teachers (Strömbom, Sequential)
|
||||
don't use heading; we strip it before passing them through.
|
||||
"""
|
||||
|
||||
def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
|
||||
@@ -61,27 +48,17 @@ class ActiveScanTeacher:
|
||||
|
||||
@staticmethod
|
||||
def _scan_action(dog_heading: float) -> tuple[float, float]:
|
||||
# Target = current_heading + π. velocity_to_wheels gets err=π,
|
||||
# so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
|
||||
# cos(err) clamps the forward speed to ~0. Maximum in-place
|
||||
# rotation under this controller; one full rotation in ~60 steps.
|
||||
# Target opposite to current heading; velocity_to_wheels'
|
||||
# cos(err) clamp drives forward speed to ~0 → in-place rotation.
|
||||
target = dog_heading + math.pi
|
||||
return math.cos(target), math.sin(target)
|
||||
|
||||
@staticmethod
|
||||
def _explore_action(dog_xy) -> tuple[float, float]:
|
||||
"""Walk back toward the field centre when nothing is in view.
|
||||
|
||||
At difficulty=1 sheep can spawn up to ~18 m from origin while
|
||||
the LiDAR has a 12 m range, so an in-place scan from a corner
|
||||
can return zero hits. Walking toward (0, 0) shrinks the
|
||||
max-distance-to-any-sheep and the scanner cone sweeps along
|
||||
the path, eventually picking sheep up.
|
||||
"""
|
||||
"""Walk toward (0, 0) while the LiDAR keeps sweeping."""
|
||||
dx, dy = -dog_xy[0], -dog_xy[1]
|
||||
d = math.hypot(dx, dy)
|
||||
if d < 0.5:
|
||||
# At the centre — fall through to a scan instead.
|
||||
return 0.0, 0.0
|
||||
return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
|
||||
|
||||
@@ -89,22 +66,18 @@ class ActiveScanTeacher:
|
||||
self.step += 1
|
||||
n_visible = len(sheep_positions)
|
||||
|
||||
# Track empty-streak for the explore debounce.
|
||||
if n_visible == 0:
|
||||
self.empty_streak += 1
|
||||
else:
|
||||
self.empty_streak = 0
|
||||
|
||||
# Phase 1: opening rotation, regardless of tracker state.
|
||||
# Phase 1: opening rotation.
|
||||
if self.step <= self.initial_scan:
|
||||
vx, vy = self._scan_action(dog_heading)
|
||||
self.last_action = (vx, vy)
|
||||
return vx, vy, "scan_initial"
|
||||
|
||||
# Phase 2: tracker has been empty for a while — walk back to the
|
||||
# centre while the LiDAR keeps sweeping. The debounce prevents
|
||||
# this from firing every time the tracker briefly blinks to zero
|
||||
# (which causes the "dog starts going away from sheep" symptom).
|
||||
# Phase 2: walk-to-centre after a sustained empty tracker.
|
||||
if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
|
||||
ex, ey = self._explore_action(dog_xy)
|
||||
if ex == 0.0 and ey == 0.0:
|
||||
@@ -116,16 +89,13 @@ class ActiveScanTeacher:
|
||||
self.last_action = (vx, vy)
|
||||
return vx, vy, mode
|
||||
|
||||
# Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
|
||||
# hold the previous action so the dog doesn't lurch.
|
||||
# Phase 2b: brief tracker blink — hold the previous action.
|
||||
if n_visible == 0:
|
||||
vx, vy = self.last_action
|
||||
return vx, vy, "hold"
|
||||
|
||||
# Phase 3: hand to the underlying analytic teacher, then apply
|
||||
# the shared near-sheep speed modulation (centralised in
|
||||
# herding.control so the BC student, Strömbom, Sequential and
|
||||
# the DAgger teacher all behave identically near sheep).
|
||||
# Phase 3: hand off to the underlying analytic teacher, then
|
||||
# apply the shared near-sheep speed modulation.
|
||||
vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
|
||||
vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
|
||||
self.last_action = (vx, vy)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Shared low-level control helpers used by every dog mode.
|
||||
"""Shared action post-processing.
|
||||
|
||||
Centralised here so the BC student, Strömbom, Sequential, and the DAgger
|
||||
teacher all apply identical post-processing to their action outputs.
|
||||
The downstream wheel-velocity layer (``herding.diffdrive``) is unchanged.
|
||||
Every dog mode routes its action through ``modulate_speed_near_sheep``
|
||||
so the magnitude is reduced near sheep — direction (intent) is
|
||||
preserved.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -10,12 +10,8 @@ from __future__ import annotations
|
||||
import math
|
||||
|
||||
|
||||
# Speed-modulation: scale action magnitude down when close to the
|
||||
# nearest sheep. Stops the dog from charging in at full speed and
|
||||
# scattering the flock. Action norm linearly ramps from MIN_SPEED at
|
||||
# distance 0 to 1.0 at SLOW_NEAR_SHEEP.
|
||||
SLOW_NEAR_SHEEP = 2.5
|
||||
MIN_SPEED = 0.30
|
||||
SLOW_NEAR_SHEEP = 2.5 # m — distance below which action norm is scaled down
|
||||
MIN_SPEED = 0.30 # action norm at zero distance
|
||||
|
||||
|
||||
def modulate_speed_near_sheep(
|
||||
@@ -25,16 +21,9 @@ def modulate_speed_near_sheep(
|
||||
slow_dist: float = SLOW_NEAR_SHEEP,
|
||||
min_scale: float = MIN_SPEED,
|
||||
) -> tuple[float, float]:
|
||||
"""Scale (vx, vy) magnitude down when close to the nearest sheep.
|
||||
|
||||
``sheep_positions`` accepts either a ``{name: (x, y)}`` dict
|
||||
(matching what the trackers emit) or an iterable of ``(x, y)``
|
||||
tuples. Empty input → action returned unchanged.
|
||||
|
||||
The intent direction is preserved; only magnitude is reduced. With
|
||||
``slow_dist=2.5`` and ``min_scale=0.3``, an action that started at
|
||||
norm 1 is multiplied by 0.3 right next to a sheep, by 0.65 at 1 m
|
||||
away, and by 1.0 once the nearest sheep is ≥ 2.5 m off.
|
||||
"""Linearly ramp action magnitude from ``min_scale`` at distance 0
|
||||
to 1.0 at ``slow_dist``. ``sheep_positions`` may be a
|
||||
``{name: (x, y)}`` dict or an iterable of ``(x, y)`` tuples.
|
||||
"""
|
||||
if not sheep_positions:
|
||||
return vx, vy
|
||||
|
||||
@@ -1,25 +1,9 @@
|
||||
"""Sequential single-target shepherd dog algorithm.
|
||||
"""Sequential "pin-and-push" shepherd-dog controller.
|
||||
|
||||
Strömbom drives the flock's centre of mass; with N sheep and a narrow
|
||||
3 m gate, this fails because the flock is wider than the gate and CoM
|
||||
driving abandons stragglers. Real sheepdogs solve this differently:
|
||||
they pick *one* sheep at a time, drive it through, return for the next.
|
||||
|
||||
This module implements that "pin-and-push" approach.
|
||||
|
||||
Algorithm (one step):
|
||||
1. Active sheep = those still in the field (not yet penned).
|
||||
2. Target = the active sheep currently closest to the pen entry.
|
||||
3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
|
||||
directly behind the target relative to the goal.
|
||||
4. Output unit vector pointing the dog at the drive position.
|
||||
|
||||
Once the target crosses the gate it latches as penned and is removed
|
||||
from the active set; the next-closest unpenned sheep becomes the
|
||||
target. The algorithm naturally "queues" sheep through the gate.
|
||||
|
||||
Empirically (with our flocking dynamics) this scales linearly with
|
||||
flock size and works up to at least n=10 within a 15 000-step budget.
|
||||
Single-target alternative to Strömbom: each step, target the sheep
|
||||
closest to the pen, park behind it, drive it through; once it latches
|
||||
penned the next-closest sheep becomes the target. Naturally queues
|
||||
the flock through a narrow gate.
|
||||
"""
|
||||
|
||||
import math
|
||||
@@ -43,25 +27,17 @@ def _is_active(x, y) -> bool:
|
||||
|
||||
|
||||
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Return ``(vx, vy, mode)`` where mode encodes the current target.
|
||||
|
||||
Compatible with the Strömbom call signature so it can be drop-in
|
||||
swapped in the dog controller and the env's imitation reward.
|
||||
"""
|
||||
"""Return ``(vx, vy, mode)`` — same call signature as Strömbom."""
|
||||
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
|
||||
if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle"
|
||||
|
||||
# Pick target = sheep closest to pen entry. Stable choice: as one
|
||||
# sheep approaches and crosses the gate it stays the target until
|
||||
# latched; then the next-closest takes over.
|
||||
name, sx, sy = min(
|
||||
active,
|
||||
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
|
||||
)
|
||||
|
||||
# Drive position behind the target along the (target → pen) line.
|
||||
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
|
||||
tx = sx + DELTA_DRIVE * ux
|
||||
ty = sy + DELTA_DRIVE * uy
|
||||
@@ -71,7 +47,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
|
||||
|
||||
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Debug variant returning ``(vx, vy, mode, debug_dict)``."""
|
||||
"""``compute_action`` plus a debug dict (target, drive point)."""
|
||||
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
|
||||
if _is_active(x, y)]
|
||||
if not active:
|
||||
|
||||
+14
-33
@@ -1,30 +1,20 @@
|
||||
"""Strömbom collect/drive heuristic for the shepherd dog.
|
||||
"""Strömbom (2014) collect/drive heuristic for the shepherd dog.
|
||||
|
||||
Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
|
||||
updated for the external pen layout. Used as a baseline controller and
|
||||
as the fallback when the RL policy isn't available.
|
||||
When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
|
||||
to a point behind the furthest sheep and pushes it back toward the
|
||||
flock CoM. Otherwise it drives, parking behind the CoM relative to
|
||||
the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.
|
||||
|
||||
Reference: Strömbom et al. 2014, "Solving the shepherding problem".
|
||||
Reference: Strömbom et al. 2014, "Solving the shepherding problem."
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen
|
||||
|
||||
# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
|
||||
# the original (4.0 / 2.5) because the new external pen sits ~26 m from
|
||||
# typical sheep spawn locations — at the old 4 m standoff, the flee force
|
||||
# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
|
||||
# the path inside the 3000-step episode budget.
|
||||
#
|
||||
# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
|
||||
# the dog stays in *drive* mode much longer. With our tighter cohesion
|
||||
# (flocking_sim.py), partially-collected flocks consolidate naturally
|
||||
# during a drive, and we don't waste 80% of the time budget on a slow
|
||||
# "collect" pre-phase.
|
||||
F_FACTOR = 4.0
|
||||
DELTA_COLLECT = 1.5
|
||||
DELTA_DRIVE = 2.0
|
||||
F_FACTOR = 4.0 # collect/drive threshold scaled by √n
|
||||
DELTA_COLLECT = 1.5 # drive-position offset behind the furthest sheep
|
||||
DELTA_DRIVE = 2.0 # drive-position offset behind the flock CoM
|
||||
|
||||
|
||||
def _unit(x, y):
|
||||
@@ -35,18 +25,12 @@ def _unit(x, y):
|
||||
|
||||
|
||||
def _is_active(x, y) -> bool:
|
||||
"""A sheep is "active" if it's still in the field — not in or below
|
||||
the gate plane (we treat anything south of the gate as committed to
|
||||
the pen and stop trying to herd it)."""
|
||||
"""A sheep still in the field counts; one south of the gate doesn't."""
|
||||
return (not in_pen(x, y)) and y > GATE_Y
|
||||
|
||||
|
||||
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
|
||||
|
||||
``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
|
||||
Webots controller's representation).
|
||||
"""
|
||||
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
|
||||
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle"
|
||||
@@ -58,14 +42,14 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
radius = max(dists)
|
||||
|
||||
if radius > F_FACTOR * math.sqrt(n):
|
||||
# Collect: aim at a point behind the furthest sheep, opposite the CoM.
|
||||
# Collect: aim behind the furthest sheep, opposite the CoM.
|
||||
idx = max(range(n), key=lambda i: dists[i])
|
||||
sx, sy = active[idx]
|
||||
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||
mode = "collect"
|
||||
else:
|
||||
# Drive: aim at a point behind the flock CoM relative to the goal.
|
||||
# Drive: aim behind the CoM, opposite the pen.
|
||||
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||
mode = "drive"
|
||||
@@ -75,10 +59,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
|
||||
|
||||
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Variant of compute_action that also returns a small debug dict.
|
||||
|
||||
Kept for parity with the legacy controller's CSV logger.
|
||||
"""
|
||||
"""``compute_action`` plus a small debug dict (CoM, target, radius)."""
|
||||
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle", {
|
||||
|
||||
@@ -1,26 +1,21 @@
|
||||
"""Cluster a 2D LiDAR scan into world-frame sheep position estimates.
|
||||
|
||||
Pipeline:
|
||||
ranges (N,) ─► hit mask ─► world-frame points
|
||||
│
|
||||
▼
|
||||
adjacency clustering (gap > GAP_THRESHOLD
|
||||
starts a new cluster, walking rays in
|
||||
angular order)
|
||||
│
|
||||
▼
|
||||
centroid + span filter
|
||||
│
|
||||
▼
|
||||
field/pen-corridor filter
|
||||
│
|
||||
▼
|
||||
list of (x, y) detections
|
||||
|
||||
The clusterer is intentionally simple — for ≤10 sheep there is rarely
|
||||
any real ambiguity, and proper DBSCAN would only matter if rays from
|
||||
two adjacent sheep merged. The downstream tracker handles association
|
||||
across frames.
|
||||
ranges (N,) → hit mask → world-frame points
|
||||
│
|
||||
▼
|
||||
adjacency clustering (gap > GAP_THRESHOLD
|
||||
starts a new cluster, walking rays in
|
||||
angular order)
|
||||
│
|
||||
▼
|
||||
centroid + span + region + structure filters
|
||||
│
|
||||
▼
|
||||
list of (x, y) detections
|
||||
|
||||
The downstream tracker handles association across frames.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -35,23 +30,19 @@ from herding.perception.lidar_sim import (
|
||||
)
|
||||
|
||||
|
||||
GAP_THRESHOLD = 0.6 # m — adjacent ray-points farther apart start new cluster
|
||||
MAX_CLUSTER_SPAN = 1.5 # m — clusters wider than this are likely walls/structures
|
||||
GAP_THRESHOLD = 0.6 # m — adjacent ray-points farther apart start a new cluster
|
||||
MAX_CLUSTER_SPAN = 1.5 # m — wider clusters are walls / structures
|
||||
RANGE_HIT_EPS = 0.05 # m — hit if range < max_range - eps
|
||||
WALL_REJECT = 0.5 # m — drop detections this close to a known wall line
|
||||
|
||||
# Known sheep-sized static features. Detections within STATIC_REJECT
|
||||
# of any of these are discarded — these aren't sheep. Mid-pillars on
|
||||
# the field walls are NOT in this list because they're embedded in the
|
||||
# wall (the wall's span filter handles them); listing them here would
|
||||
# only reject real sheep that happened to be near the wall.
|
||||
# Sheep-sized static features (gate posts, corner pillars). A cluster
|
||||
# centred within STATIC_REJECT of any of these is never a sheep.
|
||||
_STATIC_FEATURES = (
|
||||
# Gate posts (sheep-sized boxes flanking the south-wall opening)
|
||||
( 10.0, -15.0), ( 13.0, -15.0),
|
||||
# Field corner pillars
|
||||
( 15.0, 15.0), ( 15.0, -15.0), (-15.0, 15.0), (-15.0, -15.0),
|
||||
( 10.0, -15.0), ( 13.0, -15.0), # gate posts
|
||||
( 15.0, 15.0), ( 15.0, -15.0),
|
||||
(-15.0, 15.0), (-15.0, -15.0), # field corners
|
||||
)
|
||||
STATIC_REJECT = 0.8 # m — detection within this of a static feature → drop
|
||||
STATIC_REJECT = 0.8
|
||||
|
||||
|
||||
def detections_from_scan(
|
||||
@@ -71,6 +62,8 @@ def detections_from_scan(
|
||||
px = dog_x + ranges * np.cos(world_a)
|
||||
py = dog_y + ranges * np.sin(world_a)
|
||||
|
||||
# Walk rays in angular order; a large jump between consecutive
|
||||
# world-frame hit points closes the current cluster.
|
||||
clusters: list[list[tuple[float, float]]] = []
|
||||
current: list[tuple[float, float]] = []
|
||||
prev: tuple[float, float] | None = None
|
||||
@@ -98,41 +91,30 @@ def detections_from_scan(
|
||||
span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
|
||||
if span > MAX_CLUSTER_SPAN:
|
||||
continue
|
||||
# Surface-to-centre correction: rays hit the front of the sheep,
|
||||
# so the cluster centroid is biased toward the dog by SHEEP_RADIUS.
|
||||
# Push it outward along the dog→cluster direction.
|
||||
# Rays hit the front edge of the sheep; offset outward by
|
||||
# SHEEP_RADIUS along the dog→cluster direction to estimate the
|
||||
# centre.
|
||||
dx, dy = cx - dog_x, cy - dog_y
|
||||
d = math.hypot(dx, dy)
|
||||
if d > 1e-3:
|
||||
cx += SHEEP_RADIUS * dx / d
|
||||
cy += SHEEP_RADIUS * dy / d
|
||||
# Keep detections inside the field OR in the gate corridor /
|
||||
# external pen — penned sheep are still worth tracking so the
|
||||
# tracker can latch them as "penned" rather than spawn fresh
|
||||
# tracks each scan.
|
||||
# Accept detections inside the field, plus a narrow strip
|
||||
# immediately south of the gate to catch sheep mid-crossing
|
||||
# (so they get marked penned via is_penned_position before the
|
||||
# track goes stale). Detections deeper into the pen are
|
||||
# dropped entirely — Webots's pen posts and rails would
|
||||
# otherwise produce a torrent of phantom penned tracks that
|
||||
# the tracker can't keep up with.
|
||||
# Region filter: in-field clusters, plus a narrow strip south of
|
||||
# the gate so sheep mid-crossing get latched penned. Detections
|
||||
# deeper into the pen are dropped — pen posts and rails would
|
||||
# otherwise generate phantom penned tracks.
|
||||
in_main = (FIELD_X[0] - 0.2 < cx < FIELD_X[1] + 0.2 and
|
||||
FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)
|
||||
in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
|
||||
GATE_Y - 1.0 < cy < GATE_Y + 0.2)
|
||||
if not (in_main or in_gate_strip):
|
||||
continue
|
||||
# Known-static-feature filter: gate posts and corner pillars
|
||||
# show up as sheep-sized clusters but are never sheep.
|
||||
# Known sheep-sized static features.
|
||||
if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT
|
||||
for fx, fy in _STATIC_FEATURES):
|
||||
continue
|
||||
# Wall-proximity filter: at oblique scan angles, walls produce
|
||||
# multiple short clusters because adjacent ray returns are
|
||||
# spaced just above GAP_THRESHOLD. Sheep can't get within ~0.3 m
|
||||
# of a wall (the env clips them to FIELD_INSIDE), so anything
|
||||
# right at the wall line is structure noise.
|
||||
# Wall-proximity filter — sheep can't get this close to a wall,
|
||||
# so detections right at the wall line are structure noise.
|
||||
near_field_wall = (
|
||||
cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or
|
||||
cy > FIELD_Y[1] - WALL_REJECT or
|
||||
|
||||
@@ -1,16 +1,12 @@
|
||||
"""Fast 2D LiDAR simulator for the Gymnasium env.
|
||||
|
||||
Raycasts against:
|
||||
* **Sheep** — discs of radius ``SHEEP_RADIUS``.
|
||||
* **Static world geometry** — axis-aligned wall segments and gate
|
||||
posts taken from ``worlds/field.wbt``. Without these, demos
|
||||
collected in-env would never include the false-positive clusters
|
||||
Webots produces from the stone walls and gate-post boxes, and the
|
||||
BC student trained on those demos collapses on deployment.
|
||||
Raycasts against sheep (discs) and static world geometry (axis-aligned
|
||||
walls + gate posts) so the env reproduces the false-positive cluster
|
||||
distribution Webots produces from real 3D geometry.
|
||||
|
||||
Returns a range array matching the Webots Lidar device on the dog
|
||||
(see ``protos/ShepherdDog.proto``: 180 rays, 140° FOV centred on
|
||||
forward, 12 m max range, 5 mm noise).
|
||||
Returns a range array matching the Webots Lidar device:
|
||||
180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise.
|
||||
See ``protos/ShepherdDog.proto``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -26,19 +22,13 @@ LIDAR_FOV = 2.44 # rad ≈ 140°
|
||||
LIDAR_MAX_RANGE = 12.0
|
||||
LIDAR_NOISE = 0.005 # m, gaussian std
|
||||
|
||||
# Sheep modelled as a vertical cylinder; this is the horizontal-section
|
||||
# radius the LiDAR plane intersects. Tuned to the proto sheep (~0.45 m
|
||||
# body length). The exact value is not load-bearing — the perception
|
||||
# clusterer is range-tolerant.
|
||||
# Sheep cross-section in the LiDAR plane (horizontal cylinder approx).
|
||||
SHEEP_RADIUS = 0.30
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Static world geometry — must match worlds/field.wbt
|
||||
# ---------------------------------------------------------------------------
|
||||
# --- Static world geometry — mirrors worlds/field.wbt ---
|
||||
|
||||
# Vertical walls: (x, y_min, y_max). Field east/west walls and the two
|
||||
# pen side walls are visible through the open gate.
|
||||
# Vertical walls: (x, y_min, y_max).
|
||||
_VERTICAL_WALLS = (
|
||||
( 15.0, -15.0, 15.0), # field east
|
||||
(-15.0, -15.0, 15.0), # field west
|
||||
@@ -46,8 +36,7 @@ _VERTICAL_WALLS = (
|
||||
( 13.0, -22.0, -15.0), # pen east
|
||||
)
|
||||
|
||||
# Horizontal walls: (y, x_min, x_max). South wall is split by the 3 m
|
||||
# gate at x ∈ [10, 13]; the pen south wall closes the back of the pen.
|
||||
# Horizontal walls: (y, x_min, x_max). South wall has a 3 m gap at the gate.
|
||||
_HORIZONTAL_WALLS = (
|
||||
( 15.0, -15.0, 15.0), # field north
|
||||
(-15.0, -15.0, 10.0), # field south-west of gate
|
||||
@@ -55,31 +44,23 @@ _HORIZONTAL_WALLS = (
|
||||
(-22.0, 10.0, 13.0), # pen south
|
||||
)
|
||||
|
||||
# Gate posts and field corner pillars treated as vertical cylinders at
|
||||
# LiDAR height. Radius 0.25 m comes from the 0.44 × 0.44 m boxes in the
|
||||
# wbt — close enough to a circular cross-section for this purpose.
|
||||
# Gate posts + field corner pillars, treated as discs at LiDAR height.
|
||||
_POSTS_XY = np.array([
|
||||
( 10.0, -15.0), # west gate post
|
||||
( 13.0, -15.0), # east gate post
|
||||
( 15.0, 15.0), # NE field corner
|
||||
( 15.0, -15.0), # SE field corner
|
||||
(-15.0, 15.0), # NW field corner
|
||||
(-15.0, -15.0), # SW field corner
|
||||
( 10.0, -15.0), ( 13.0, -15.0),
|
||||
( 15.0, 15.0), ( 15.0, -15.0),
|
||||
(-15.0, 15.0), (-15.0, -15.0),
|
||||
], dtype=np.float64)
|
||||
POST_RADIUS = 0.25
|
||||
|
||||
|
||||
def ray_angles(n: int = LIDAR_N_RAYS, fov: float = LIDAR_FOV) -> np.ndarray:
|
||||
"""Local-frame ray angles, sweeping from +fov/2 to -fov/2.
|
||||
"""Local-frame ray angles, CCW from forward, sweeping +fov/2 → -fov/2.
|
||||
|
||||
Convention: angle is measured CCW from the dog's forward axis. Ray 0
|
||||
points to the dog's left, last ray to the right. Webots' default
|
||||
Lidar sweep matches this.
|
||||
Matches Webots' default Lidar sweep direction.
|
||||
"""
|
||||
return np.linspace(fov / 2.0, -fov / 2.0, n, dtype=np.float64)
|
||||
|
||||
|
||||
# Cached so we don't rebuild every step.
|
||||
_ANGLES = ray_angles()
|
||||
_COS = np.cos(_ANGLES)
|
||||
_SIN = np.sin(_ANGLES)
|
||||
@@ -88,13 +69,7 @@ _SIN = np.sin(_ANGLES)
|
||||
def _raycast_static(
|
||||
ox: float, oy: float, cos_w: np.ndarray, sin_w: np.ndarray,
|
||||
) -> np.ndarray:
|
||||
"""Per-ray distance to nearest wall or post hit (∞ if none).
|
||||
|
||||
Walls are axis-aligned line segments; for each ray we compute t at
|
||||
which it crosses the wall's constant-coord plane and check the
|
||||
other coord lies in the segment. Posts are circles; same disc
|
||||
intersection as for sheep.
|
||||
"""
|
||||
"""Per-ray distance to the nearest wall or post hit (∞ if none)."""
|
||||
n_rays = cos_w.shape[0]
|
||||
best = np.full(n_rays, np.inf, dtype=np.float64)
|
||||
|
||||
@@ -144,10 +119,7 @@ def simulate_scan(
|
||||
) -> np.ndarray:
|
||||
"""Return a (N,) float32 range array. No-hit entries equal ``max_range``.
|
||||
|
||||
``sheep_xy`` is the list of (x, y) world positions of every sheep in
|
||||
the scene (penned and active). Static world geometry (walls and
|
||||
posts) is also raycast so demos contain the same false-positive
|
||||
clusters Webots produces.
|
||||
``sheep_xy`` is every sheep (penned or active) in the scene.
|
||||
"""
|
||||
n_rays = _ANGLES.shape[0]
|
||||
|
||||
@@ -172,8 +144,7 @@ def simulate_scan(
|
||||
nearest = candidate.min(axis=0)
|
||||
np.minimum(best, nearest, out=best)
|
||||
|
||||
# Clip to LIDAR_MAX_RANGE; entries that never got a hit stay at inf
|
||||
# → clipped down to max_range like the real Webots device.
|
||||
# Entries with no hit stay at inf → clipped to max_range, matching Webots.
|
||||
ranges = np.minimum(best, max_range).astype(np.float32)
|
||||
return _add_noise(ranges, noise, rng, max_range)
|
||||
|
||||
|
||||
@@ -1,31 +1,25 @@
|
||||
"""Observation builder for the shepherd dog policy.
|
||||
"""Observation builder for the shepherd-dog policy.
|
||||
|
||||
Order-invariant 32-D feature vector — the policy generalises across
|
||||
flock sizes 1..MAX_SHEEP because individual sheep coordinates never
|
||||
appear in the observation by index, only summary statistics, a polar
|
||||
histogram, and two "named" sheep (closest-to-pen and rearmost-from-pen).
|
||||
|
||||
The two named sheep matter for the sequential-driving teacher: it
|
||||
targets the closest-to-pen sheep specifically, so the policy needs
|
||||
that channel to mimic the teacher.
|
||||
Order-invariant 32-D feature vector. Sheep never appear by index in
|
||||
the observation, only via summary statistics, a polar histogram, and
|
||||
two "named" channels (closest-to-pen, rearmost-from-pen) — so the
|
||||
policy generalises across flock sizes 1..MAX_SHEEP.
|
||||
|
||||
Layout (all components normalised so values stay roughly in [-1, 1]):
|
||||
|
||||
idx field
|
||||
idx field
|
||||
----- ----------------------------------------------------------
|
||||
0..3 dog pose: x/15, y/15, cos(heading), sin(heading)
|
||||
0..3 dog pose: x/15, y/15, cos(h), sin(h)
|
||||
4..5 active-sheep CoM x/15, y/15
|
||||
6..8 flock dispersion: max-radius/15, std_x/15, std_y/15
|
||||
9..11 vector dog→CoM: dx/30, dy/30, dist/30
|
||||
12..14 vector dog→pen-entry: dx/30, dy/30, dist/30
|
||||
15..16 vector furthest-sheep→CoM: dx/15, dy/15
|
||||
6..8 flock dispersion: max_radius/15, std_x/15, std_y/15
|
||||
9..11 dog → CoM: dx/30, dy/30, dist/30
|
||||
12..14 dog → pen entry: dx/30, dy/30, dist/30
|
||||
15..16 furthest sheep → CoM: dx/15, dy/15
|
||||
17..18 min sheep-to-wall, min dog-to-wall (both /15)
|
||||
19 active-sheep count / MAX_SHEEP
|
||||
20..27 8-bin polar histogram of active sheep around the dog,
|
||||
rotation-aware (binned in dog-relative frame), normalised
|
||||
so the bins sum to 1.
|
||||
28..29 vector dog→closest-to-pen sheep: dx/15, dy/15
|
||||
30..31 vector dog→rearmost (furthest-from-pen) sheep: dx/15, dy/15
|
||||
19 active sheep count / MAX_SHEEP
|
||||
20..27 8-bin polar histogram of active sheep in the dog's body frame
|
||||
28..29 dog → closest-to-pen sheep: dx/15, dy/15
|
||||
30..31 dog → rearmost (furthest-from-pen) sheep: dx/15, dy/15
|
||||
"""
|
||||
|
||||
import math
|
||||
@@ -68,7 +62,6 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
|
||||
obs[14] = math.hypot(pdx0, pdy0) / 30.0
|
||||
|
||||
if n == 0:
|
||||
# All sheep penned — terminal observation.
|
||||
obs[19] = 0.0
|
||||
return obs
|
||||
|
||||
@@ -110,7 +103,7 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
|
||||
obs[18] = float(min_dog_wall) / 15.0
|
||||
obs[19] = n / n_max
|
||||
|
||||
# 8-bin polar histogram in the dog's body frame.
|
||||
# Polar histogram in the dog's body frame.
|
||||
rel_dx = arr[:, 0] - dog_x
|
||||
rel_dy = arr[:, 1] - dog_y
|
||||
angles = np.arctan2(rel_dy, rel_dx) - dog_heading
|
||||
@@ -121,11 +114,9 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
|
||||
hist /= max(1, n)
|
||||
obs[20:28] = hist
|
||||
|
||||
# Closest-to-pen sheep (the sequential teacher's target) and rearmost
|
||||
# (furthest-from-pen, the natural "next target" once the closest is
|
||||
# penned). Both expressed as offset from dog. These two channels make
|
||||
# BC tractable — without them the obs doesn't uniquely identify which
|
||||
# sheep the teacher is steering toward.
|
||||
# Closest-to-pen and rearmost (furthest-from-pen) sheep. Without
|
||||
# these named channels the obs cannot uniquely identify which sheep
|
||||
# the teacher is steering toward, and BC fails to mimic it.
|
||||
pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
|
||||
closest_idx = int(np.argmin(pen_dists))
|
||||
rearmost_idx = int(np.argmax(pen_dists))
|
||||
@@ -1,25 +1,14 @@
|
||||
"""Multi-target tracker for LiDAR-detected sheep.
|
||||
|
||||
Greedy nearest-neighbour data association (with a distance gate) across
|
||||
frames, plus a memory of last-seen positions for tracks that fall out
|
||||
of the dog's FOV. Output is a ``{name: (x, y)}`` dict shaped exactly
|
||||
like the receiver-based ``sheep_positions`` used previously by the
|
||||
Webots controller and by the env, so Strömbom and Sequential can
|
||||
consume it unchanged.
|
||||
Greedy nearest-neighbour data association across frames, with a wider
|
||||
re-acquisition gate for stale tracks (sheep flee during occlusion and
|
||||
reappear off-position), plus memory of last-seen positions for sheep
|
||||
out of FOV. Output is ``{name: (x, y)}`` — Strömbom / Sequential
|
||||
consume it directly.
|
||||
|
||||
Penned-detection heuristic
|
||||
--------------------------
|
||||
Two ways a track is marked penned:
|
||||
1. Its current estimated position is south of the gate plane and
|
||||
within the gate column (the ``is_penned_position`` test the env
|
||||
already uses on ground truth).
|
||||
2. It hasn't been observed for ``STALE_STEPS`` and its last-seen
|
||||
position was inside the gate-approach band — the dog's LiDAR can
|
||||
only see ~2 m into the pen through the open gate, so a sheep
|
||||
that disappeared near the entry has almost certainly entered.
|
||||
|
||||
Tracks marked penned are excluded from ``get_positions()`` (which is
|
||||
what Strömbom consumes), matching the prior receiver-based behaviour.
|
||||
A track is marked penned once its estimated position crosses the gate
|
||||
plane south (``is_penned_position``). Penned tracks are excluded from
|
||||
``get_positions`` and kept indefinitely.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -29,26 +18,22 @@ import math
|
||||
from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position
|
||||
|
||||
|
||||
GATE_M = 2.5 # m — primary NN gate (recent tracks)
|
||||
REACQUIRE_GATE_M = 4.5 # m — wider gate for re-acquiring stale tracks (sheep moved during occlusion)
|
||||
REACQUIRE_MIN_AGE = 20 # steps — only rebind via the wide gate if the track has been stale for this long
|
||||
PENNED_GATE_M = 4.0 # m — wide gate for matching against already-penned tracks; the pen is small (3×7 m) so duplicates are easy without it
|
||||
FORGET_STEPS = 200 # ~3.2 s — delete stale active tracks; tighter than 5 s to limit phantoms but long enough to bridge typical FOV gaps
|
||||
MAX_ACTIVE_TRACKS = MAX_SHEEP # hard cap to the worst-case real flock size
|
||||
# Penned tracks are never forgotten: sheep don't leave the pen, and
|
||||
# losing the track makes the counter oscillate as the same sheep gets
|
||||
# re-detected and counted multiple times.
|
||||
GATE_M = 2.5 # m — primary NN gate (recently observed tracks)
|
||||
REACQUIRE_GATE_M = 4.5 # m — wider gate for re-binding stale tracks
|
||||
REACQUIRE_MIN_AGE = 20 # steps — track must be this stale to use the wider gate
|
||||
PENNED_GATE_M = 4.0 # m — gate for matching detections to existing penned tracks
|
||||
FORGET_STEPS = 200 # ~3.2 s — delete stale active tracks (penned ones kept forever)
|
||||
MAX_ACTIVE_TRACKS = MAX_SHEEP
|
||||
|
||||
|
||||
class SheepTracker:
|
||||
"""Online tracker with NN association and a forgetful memory.
|
||||
"""Online tracker with NN association and forgetful memory.
|
||||
|
||||
Each track stores ``(x, y, last_seen_step, penned)``.
|
||||
"""
|
||||
|
||||
def __init__(self, gate: float = GATE_M):
|
||||
self.gate = gate
|
||||
# tid → (x, y, last_seen_step, penned)
|
||||
self._tracks: dict[int, tuple[float, float, int, bool]] = {}
|
||||
self._next_id = 0
|
||||
self.step = 0
|
||||
@@ -58,9 +43,6 @@ class SheepTracker:
|
||||
self._next_id = 0
|
||||
self.step = 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Update
|
||||
# ------------------------------------------------------------------
|
||||
def update(self, detections: list[tuple[float, float]]) -> dict[str, tuple[float, float]]:
|
||||
"""Fold a new set of detections in and return active positions."""
|
||||
self.step += 1
|
||||
@@ -68,9 +50,9 @@ class SheepTracker:
|
||||
det_used: set[int] = set()
|
||||
updated_tids: set[int] = set()
|
||||
|
||||
# Pass 1: match against ACTIVE tracks first (oldest-seen-first so
|
||||
# a re-emerging long-lost sheep grabs its old ID before a fresh
|
||||
# neighbour does).
|
||||
# Pass 1 — match active tracks within the primary gate. Oldest-
|
||||
# seen tracks bind first so a re-emerging long-lost sheep keeps
|
||||
# its old ID instead of being grabbed by a fresh neighbour.
|
||||
active_tids = [tid for tid, t in self._tracks.items() if not t[3]]
|
||||
active_tids.sort(key=lambda tid: self._tracks[tid][2])
|
||||
for tid in active_tids:
|
||||
@@ -89,12 +71,10 @@ class SheepTracker:
|
||||
det_used.add(best_j)
|
||||
updated_tids.add(tid)
|
||||
|
||||
# Pass 1b: re-acquisition with a wider gate for tracks that have
|
||||
# been stale for ≥ REACQUIRE_MIN_AGE steps. Sheep flee at
|
||||
# ~0.6 m/s; over a 1–2 s occlusion (dog rotating or driving)
|
||||
# they move enough that a fresh detection lies outside the
|
||||
# primary GATE_M but is still clearly the same sheep. Without
|
||||
# this, phantom tracks accumulate and corrupt the CoM.
|
||||
# Pass 1b — re-acquisition. Sheep flee at ~0.6 m/s, so over a
|
||||
# 1–2 s occlusion the same sheep may reappear outside the primary
|
||||
# gate. Allow rebinding within a wider gate for stale-enough
|
||||
# tracks; otherwise phantom tracks accumulate and corrupt CoM.
|
||||
for tid in active_tids:
|
||||
if tid in updated_tids:
|
||||
continue
|
||||
@@ -115,10 +95,7 @@ class SheepTracker:
|
||||
det_used.add(best_j)
|
||||
updated_tids.add(tid)
|
||||
|
||||
# Pass 2: match remaining detections against PENNED tracks with
|
||||
# a tighter gate. Without this, every frame near the gate spawns
|
||||
# a fresh penned track for the same sheep, which under a long
|
||||
# Webots run leads to thousands of phantom penned tracks.
|
||||
# Pass 2 — match remaining detections to penned tracks.
|
||||
penned_tids = [tid for tid, t in self._tracks.items() if t[3]]
|
||||
for tid in penned_tids:
|
||||
tx, ty, _, _ = self._tracks[tid]
|
||||
@@ -135,9 +112,8 @@ class SheepTracker:
|
||||
self._tracks[tid] = (dx, dy, self.step, True)
|
||||
det_used.add(best_j)
|
||||
|
||||
# Unmatched detections → new tracks. A detection that is already
|
||||
# inside the pen is born "penned" so we don't accumulate active
|
||||
# tracks for sheep that arrived in the pen during occlusion.
|
||||
# Spawn new tracks for unmatched detections. Born "penned" if
|
||||
# the detection already sits inside the pen geometry.
|
||||
for j, (dx, dy) in enumerate(detections):
|
||||
if j in det_used:
|
||||
continue
|
||||
@@ -145,44 +121,32 @@ class SheepTracker:
|
||||
self._tracks[self._next_id] = (dx, dy, self.step, penned)
|
||||
self._next_id += 1
|
||||
|
||||
# Promote active tracks to penned ONLY by geometric position
|
||||
# (sheep is in the pen column south of the gate). The previous
|
||||
# "stale + near gate" heuristic was firing on ordinary occlusion
|
||||
# near the gate and creating phantom penned tracks.
|
||||
# Promote active tracks whose current estimate crosses the gate.
|
||||
for tid, (tx, ty, last, penned) in list(self._tracks.items()):
|
||||
if penned:
|
||||
continue
|
||||
if is_penned_position(tx, ty):
|
||||
self._tracks[tid] = (tx, ty, last, True)
|
||||
|
||||
# Forget stale ACTIVE tracks after FORGET_STEPS. Penned tracks
|
||||
# are kept indefinitely — sheep can't escape the pen, so once a
|
||||
# track is marked penned, that sheep is permanently penned.
|
||||
# Forget stale active tracks; penned tracks live forever.
|
||||
for tid, (tx, ty, last, penned) in list(self._tracks.items()):
|
||||
if penned:
|
||||
continue
|
||||
if (self.step - last) > FORGET_STEPS:
|
||||
del self._tracks[tid]
|
||||
|
||||
# Hard cap on the active set. If we somehow have more than
|
||||
# MAX_ACTIVE_TRACKS active tracks, drop the oldest-seen ones
|
||||
# first — they are most likely false positives from world
|
||||
# geometry (walls, gate posts) the env's raycaster doesn't
|
||||
# model, and a bloated active set wrecks the downstream CoM.
|
||||
# Hard cap on the active set — drop the oldest-seen overflow.
|
||||
active = [(tid, last) for tid, (_, _, last, p) in self._tracks.items()
|
||||
if not p]
|
||||
if len(active) > MAX_ACTIVE_TRACKS:
|
||||
active.sort(key=lambda kv: kv[1]) # oldest-seen first
|
||||
active.sort(key=lambda kv: kv[1])
|
||||
for tid, _ in active[: len(active) - MAX_ACTIVE_TRACKS]:
|
||||
del self._tracks[tid]
|
||||
|
||||
return self.get_positions()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Outputs
|
||||
# ------------------------------------------------------------------
|
||||
def get_positions(self) -> dict[str, tuple[float, float]]:
|
||||
"""Active (not-yet-penned) tracks. Same shape as receiver dict."""
|
||||
"""Active (not-penned) tracks as a ``{name: (x, y)}`` dict."""
|
||||
return {f"t{tid}": (x, y)
|
||||
for tid, (x, y, _, penned) in self._tracks.items()
|
||||
if not penned}
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
"""Differential-drive kinematics matching the Webots robot specs.
|
||||
"""Differential-drive kinematics, shared by the env and Webots controllers.
|
||||
|
||||
The Webots controllers and the training env both use these helpers so the
|
||||
sim and the real (Webots) physics agree to first order. They do not model
|
||||
slip, wheel acceleration limits, or contact forces — Webots does that for
|
||||
us at inference time. The training env has to be close enough that a
|
||||
policy trained against this kinematic model still works when handed off
|
||||
to ODE physics.
|
||||
First-order rigid-body model — no slip, wheel-accel limits, or contact
|
||||
forces. Webots' ODE physics handles those at inference; the env stays
|
||||
close enough to first order that a policy trained here transfers.
|
||||
"""
|
||||
|
||||
import math
|
||||
@@ -34,10 +31,9 @@ def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
|
||||
|
||||
def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
|
||||
k_turn=4.0):
|
||||
"""Convert a desired (vx, vy) intent in [-1, 1]^2 to wheel speeds.
|
||||
"""Convert a desired (vx, vy) intent in [-1, 1]² to wheel speeds.
|
||||
|
||||
Mirrors ``drive_action`` in controllers/shepherd_dog/shepherd_dog.py:
|
||||
forward speed scales by ``cos(err)`` (clamped to ±90°), and a P
|
||||
Forward speed scales by ``cos(err)`` (clamped to ±90°); a P
|
||||
controller on heading error contributes the wheel-rate differential.
|
||||
"""
|
||||
speed_ms = math.hypot(vx, vy) * max_linear
|
||||
@@ -56,12 +52,7 @@ def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
|
||||
|
||||
def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
|
||||
k_turn=4.0):
|
||||
"""Sheep variant: speed already expressed in motor (wheel rad/s) units.
|
||||
|
||||
Matches the existing sheep controller (``controllers/sheep/sheep.py``)
|
||||
where ``speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))`` and
|
||||
these constants are wheel angular velocities, not linear m/s.
|
||||
"""
|
||||
"""Sheep variant: speed in wheel rad/s, target as a heading angle."""
|
||||
err = math.atan2(math.sin(heading - h), math.cos(heading - h))
|
||||
fwd = max(0.0, math.cos(err)) * speed_motor
|
||||
turn = k_turn * err
|
||||
|
||||
@@ -1,24 +1,19 @@
|
||||
"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987 hybrid.
|
||||
"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987.
|
||||
|
||||
This is the per-sheep behavioural step used both by the Webots sheep
|
||||
controller (scalar, one sheep at a time) and by the training environment
|
||||
(loop over sheep).
|
||||
|
||||
Model
|
||||
-----
|
||||
The force stack each step (summed → heading + speed):
|
||||
Per-sheep behavioural step used by both the Webots sheep controller
|
||||
and the training environment. Each step a force stack is summed:
|
||||
|
||||
flee — quadratic ramp away from dog within FLEE_DIST
|
||||
(Strömbom 2014 §2.1, term ρa)
|
||||
(Strömbom 2014, term ρa)
|
||||
cohesion — drift toward local centre of mass of peers within
|
||||
COHESION_DIST (Strömbom 2014 §2.1, term c).
|
||||
Weight is **higher when fleeing** — modelling the
|
||||
"safety in numbers" / predator-confusion effect
|
||||
Strömbom 2014 describes as fear-induced cohesion.
|
||||
COHESION_DIST (Strömbom 2014, term c). Weight is
|
||||
higher while fleeing — fear-induced cohesion.
|
||||
separation — short-range inverse-distance repulsion from peers
|
||||
(Strömbom 2014 §2.1, term α; Reynolds 1987)
|
||||
wander — small persistent drift for natural idle motion
|
||||
(Strömbom 2014 §2.1, noise term ε)
|
||||
(Strömbom 2014 term α; Reynolds 1987)
|
||||
wander — small persistent drift (Strömbom 2014 noise term ε)
|
||||
|
||||
Walls, the south-wall gate column, and in-pen containment are
|
||||
environment-specific additions for the fenced Webots field.
|
||||
|
||||
References
|
||||
----------
|
||||
@@ -26,26 +21,6 @@ References
|
||||
for herding autonomous, interacting agents." J R Soc Interface 11.
|
||||
- Reynolds (1987). "Flocks, herds and schools: A distributed
|
||||
behavioural model." SIGGRAPH '87.
|
||||
|
||||
Environment-specific adaptations
|
||||
--------------------------------
|
||||
The original Strömbom model assumes an open field. Our scenario adds:
|
||||
|
||||
* Field walls — soft repulsion within ``WALL_MARGIN`` plus a hard
|
||||
escape band when inside ``WALL_HARD_MARGIN``. Necessary because the
|
||||
Webots field is fenced (30 m square enclosure).
|
||||
* Gate column — the south wall has a 3 m gap at x ∈ [10, 13]; sheep
|
||||
pass through it freely (no wall force inside the column).
|
||||
* Penned containment — once a sheep crosses the gate plane south
|
||||
(``geometry.is_penned_position``), the caller flags ``penned=True``
|
||||
and we switch to in-pen wall-bounce + jitter. Sheep do not exit the
|
||||
pen on their own. This is a hard sim constraint, not a behavioural
|
||||
claim about real sheep.
|
||||
|
||||
Parameter tuning (cohesion weight 3× while fleeing) was chosen so the
|
||||
flock survives passage through the 3 m gate without fragmenting — this
|
||||
is a defensible engineering adaptation of Strömbom's qualitative
|
||||
"fear-induced cohesion" to our gate width.
|
||||
"""
|
||||
|
||||
import math
|
||||
@@ -57,9 +32,7 @@ from herding.world.geometry import (
|
||||
GATE_X,
|
||||
)
|
||||
|
||||
# --- Speed and force constants ---
|
||||
# All speeds here are in wheel rad/s (motor units), matching the existing
|
||||
# sheep controller. Conversion to m/s = speed * SHEEP_WHEEL_RADIUS.
|
||||
# Speeds are in wheel rad/s (motor units); m/s = speed * SHEEP_WHEEL_RADIUS.
|
||||
MAX_SPEED = 22.0
|
||||
FLEE_SPEED = 20.0
|
||||
WANDER_SPEED = 3.0
|
||||
@@ -70,7 +43,7 @@ WALL_HARD_GAIN = 50.0
|
||||
|
||||
FLEE_DIST = 7.0
|
||||
SEPARATION_DIST = 2.5
|
||||
COHESION_DIST = 12.0 # was 8.0 — wider engagement so far-flung sheep are pulled in
|
||||
COHESION_DIST = 12.0
|
||||
|
||||
PEN_MARGIN = 0.8
|
||||
|
||||
@@ -85,21 +58,17 @@ def _peers_iter(peers):
|
||||
def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
"""Return ``(heading, speed, new_wander_angle)`` for one sheep step.
|
||||
|
||||
``speed`` is in wheel rad/s (motor units), bounded by ``[WANDER_SPEED,
|
||||
FLEE_SPEED]``. ``heading`` is the world-frame target heading the sheep
|
||||
should aim for (atan2 convention).
|
||||
|
||||
``rng`` is an optional ``random.Random``-compatible object used for
|
||||
the wander-jitter. If ``None``, falls back to Python's global module
|
||||
(matches Webots controller usage). Pass an env-owned RNG to make
|
||||
rollouts deterministic given a seed.
|
||||
``speed`` is in wheel rad/s, bounded by ``[WANDER_SPEED, FLEE_SPEED]``.
|
||||
``heading`` is the world-frame target heading (atan2 convention).
|
||||
``rng`` is an optional ``random.Random`` used for wander jitter; if
|
||||
``None`` uses the module's global ``random``.
|
||||
"""
|
||||
fx, fy = 0.0, 0.0
|
||||
peer_list = _peers_iter(peers)
|
||||
rnd = rng if rng is not None else random
|
||||
|
||||
if penned:
|
||||
# --- Pen containment: bounce off the four pen walls ---
|
||||
# Pen containment: bounce off all four pen walls.
|
||||
pm = PEN_MARGIN
|
||||
if x < PEN_X[0] + pm:
|
||||
fx += ((PEN_X[0] + pm - x) / pm) * 15.0
|
||||
@@ -110,7 +79,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
if y > PEN_Y[1] - pm:
|
||||
fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0
|
||||
|
||||
# Mild peer separation — penned sheep crowd the corner otherwise.
|
||||
# Mild peer separation so penned sheep don't crowd one corner.
|
||||
for px, py in peer_list:
|
||||
dx, dy = px - x, py - y
|
||||
d = math.hypot(dx, dy)
|
||||
@@ -125,7 +94,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
else:
|
||||
# --- Free-roaming sheep in the field ---
|
||||
# Free-roaming sheep in the field.
|
||||
fleeing = False
|
||||
if dog_xy is not None:
|
||||
ddx = dog_xy[0] - x
|
||||
@@ -138,11 +107,9 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
fx -= (ddx / dist) * s
|
||||
fy -= (ddy / dist) * s
|
||||
|
||||
# Cohesion — drift toward flock CoM (peers within COHESION_DIST).
|
||||
# Cohesion is *stronger* under flee than at rest (the
|
||||
# predator-confusion / safety-in-numbers effect — sheep huddle when
|
||||
# threatened). This is what makes shepherding work: the flock stays
|
||||
# as one unit through the narrow gate instead of fragmenting.
|
||||
# Cohesion: drift toward the local CoM of peers within
|
||||
# COHESION_DIST. Stronger while fleeing — fear-induced
|
||||
# cohesion keeps the flock together through the gate.
|
||||
cx, cy, cn = 0.0, 0.0, 0
|
||||
for px, py in peer_list:
|
||||
d = math.hypot(px - x, py - y)
|
||||
@@ -151,12 +118,6 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
cy += py
|
||||
cn += 1
|
||||
if cn > 0:
|
||||
# Cohesion needs to dominate flee at close range so the flock
|
||||
# stays glued together when squeezing through the narrow gate.
|
||||
# Flee at 2 m has magnitude ~10; cohesion of w=3.0 with the
|
||||
# peer-CoM 4 m away contributes ~12, so the flock prefers
|
||||
# bunching to dispersing under pressure. This is what makes
|
||||
# canonical Strömbom drive work in our 3 m gate.
|
||||
w = 3.0 if fleeing else 1.0
|
||||
fx += (cx / cn - x) * w
|
||||
fy += (cy / cn - y) * w
|
||||
@@ -170,8 +131,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
fx -= (ddx / d) * push * 2.5
|
||||
fy -= (ddy / d) * push * 2.5
|
||||
|
||||
# Wall soft repulsion. The south wall is absent inside the gate
|
||||
# column so sheep can be driven through it by the dog.
|
||||
# Wall soft repulsion (south wall absent inside the gate column).
|
||||
if x < FIELD_X[0] + WALL_MARGIN:
|
||||
fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
|
||||
if x > FIELD_X[1] - WALL_MARGIN:
|
||||
@@ -187,7 +147,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
# --- Hard escape band — overrides everything when very close to a wall ---
|
||||
# Hard escape band — overrides everything else near a wall.
|
||||
m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
|
||||
if x - FIELD_X[0] < m:
|
||||
fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
|
||||
@@ -195,7 +155,6 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
|
||||
if FIELD_Y[1] - y < m:
|
||||
fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
|
||||
# South wall hard escape only when not in the gate column and not penned.
|
||||
if (not penned) and (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
|
||||
fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))
|
||||
|
||||
|
||||
+14
-35
@@ -1,23 +1,15 @@
|
||||
"""World geometry and robot specs.
|
||||
|
||||
All coordinates are in meters. (0, 0) is the centre of the field, +x is
|
||||
east, +y is north. Z is up but unused here. These constants must match
|
||||
``worlds/field.wbt`` and the proto files; if the world changes, change
|
||||
this file and only this file.
|
||||
|
||||
Pen layout (post-refactor)
|
||||
--------------------------
|
||||
The pen is *external* to the field, accessed through a 3 m gate cut into
|
||||
the south stone wall at y = -15. Sheep entering through the gate end up
|
||||
in a fenced rectangle south of the field; the dog stays in the field
|
||||
(soft-limited above DOG_SOUTH_LIMIT during training and inference).
|
||||
Coordinates are metres; (0, 0) is the field centre, +x east, +y north.
|
||||
These constants mirror ``worlds/field.wbt`` and the proto files — if
|
||||
the world changes, this file is the single point of update.
|
||||
|
||||
field +y north
|
||||
+-----------+
|
||||
| |
|
||||
| |
|
||||
| ...... |
|
||||
+---||||----+ y = -15 (south wall, gate at x ∈ [10, 13])
|
||||
+---||||----+ y = -15 (south wall, 3 m gate at x ∈ [10, 13])
|
||||
||||
|
||||
|pen| y ∈ [-22, -15]
|
||||
+---+
|
||||
@@ -25,46 +17,38 @@ in a fenced rectangle south of the field; the dog stays in the field
|
||||
|
||||
import math
|
||||
|
||||
# --- Field (square, stone-walled) ---
|
||||
# Field (square, stone-walled)
|
||||
FIELD_X = (-15.0, 15.0)
|
||||
FIELD_Y = (-15.0, 15.0)
|
||||
|
||||
# Conservative inside bounds — sheep/dog should not graze the wall.
|
||||
FIELD_INSIDE_MARGIN = 0.5
|
||||
|
||||
# --- Pen (external, south of the field) ---
|
||||
# Pen (external, south of the field)
|
||||
PEN_X = (10.0, 13.0)
|
||||
PEN_Y = (-22.0, -15.0)
|
||||
PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
|
||||
# The point the dog drives the flock toward: the gate centre on the field side.
|
||||
PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
|
||||
|
||||
# --- Gate (the hole in the south stone wall) ---
|
||||
# Gate (hole in the south wall)
|
||||
GATE_X = PEN_X
|
||||
GATE_Y = -15.0
|
||||
|
||||
# --- Robot specs (must match proto files) ---
|
||||
# Dog (controllers/shepherd_dog/, protos/ShepherdDog.proto)
|
||||
# Dog spec — protos/ShepherdDog.proto
|
||||
DOG_WHEEL_RADIUS = 0.038 # m
|
||||
DOG_WHEEL_BASE = 0.28 # m, axle-to-axle
|
||||
DOG_MAX_WHEEL_OMEGA = 70.0 # rad/s
|
||||
DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA # ~2.66 m/s
|
||||
DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA # ≈ 2.66 m/s
|
||||
|
||||
# Sheep (controllers/sheep/, protos/Sheep.proto)
|
||||
# Sheep spec — protos/Sheep.proto
|
||||
SHEEP_WHEEL_RADIUS = 0.031 # m
|
||||
SHEEP_WHEEL_BASE = 0.20 # m
|
||||
SHEEP_MAX_WHEEL_OMEGA = 25.0 # rad/s
|
||||
SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA # ~0.78 m/s
|
||||
SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA # ≈ 0.78 m/s
|
||||
|
||||
# --- Webots step ---
|
||||
WEBOTS_DT = 0.016 # seconds, matches WorldInfo.basicTimeStep = 16 in field.wbt
|
||||
WEBOTS_DT = 0.016 # seconds (matches WorldInfo.basicTimeStep)
|
||||
|
||||
# --- Dog "virtual south wall" (training keeps dog out of the pen) ---
|
||||
# At inference the controller also clips to this so a slightly miscalibrated
|
||||
# policy doesn't accidentally drive into the pen and trap the sheep.
|
||||
# Virtual south wall — env and controller both keep the dog north of this.
|
||||
DOG_SOUTH_LIMIT = -14.5
|
||||
|
||||
# --- Maximum supported flock size ---
|
||||
MAX_SHEEP = 10
|
||||
|
||||
|
||||
@@ -85,12 +69,7 @@ def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:
|
||||
|
||||
|
||||
def is_penned_position(x: float, y: float, latch_margin: float = 0.2) -> bool:
|
||||
"""A sheep latches to "penned" once it crosses the gate plane south.
|
||||
|
||||
True iff x is inside the gate column (with a small margin) AND
|
||||
y has dipped below the gate line. Once latched, the sheep is held by
|
||||
in-pen forces and will not exit on its own.
|
||||
"""
|
||||
"""True iff (x, y) is in the gate column and south of the gate line."""
|
||||
return (PEN_X[0] - latch_margin <= x <= PEN_X[1] + latch_margin
|
||||
and y <= GATE_Y)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user