Checkpoint 7

This commit is contained in:
Johnny Fernandes
2026-05-11 12:21:51 +01:00
parent fce0e0c786
commit a01a5c9cef
34 changed files with 1266 additions and 1038 deletions
+25 -55
View File
@@ -1,23 +1,19 @@
"""Active-perception wrapper for the analytic shepherding teachers.
"""Active-perception wrapper for the analytic shepherd teachers.
Under LiDAR (partial observability), the tracker starts empty — the
dog hasn't seen any sheep yet. A naive Strömbom call returns
``(0, 0, "idle")`` and the dog stops. The student then learns "do
nothing when the tracker is empty," which is a fatal local optimum.
Under partial-observability LiDAR perception the tracker starts empty
— a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
stops. This wrapper interleaves the underlying teacher with two
exploration behaviours:
This wrapper replaces the idle case with a **scan action**: a unit
vector 90° CCW from the dog's current forward direction. Passed
through ``velocity_to_wheels`` it produces a fast in-place rotation
(``cos(err)`` clamp drives forward speed to ~0 because the target is
orthogonal to the heading). The dog spins for the first
``initial_scan_steps`` steps of every episode regardless of tracker
state, and re-enters scan whenever the tracker goes empty mid-episode.
* opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
guaranteeing the LiDAR sweeps a full circle before driving;
* walk-to-centre when the tracker has been empty for at least
``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
beyond the 12 m LiDAR range).
Once enough sheep are tracked, control hands over to the underlying
analytic teacher (Strömbom or Sequential), which now operates on a
populated tracker dict. Both teacher and student see the same
LiDAR-perceived view — there's no information asymmetry, so the
student can in principle achieve the teacher's full performance.
When the tracker has detections the base teacher's action is used,
post-processed by ``modulate_speed_near_sheep`` so the dog doesn't
charge the flock.
"""
from __future__ import annotations
@@ -27,26 +23,17 @@ import math
from herding.control.modulation import modulate_speed_near_sheep
INITIAL_SCAN_STEPS = 80 # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
EXPLORE_SPEED = 0.7 # m/s-ish unit (action norm) used when walking blind
# Debounce on tracker emptiness — a single empty frame between
# detections is not enough reason to abandon the drive and start
# scanning. Require this many consecutive empty frames first.
EMPTY_DEBOUNCE_STEPS = 8
INITIAL_SCAN_STEPS = 80 # ≈1.3 s — covers one full rotation
EXPLORE_SPEED = 0.7 # action norm while walking blind
EMPTY_DEBOUNCE_STEPS = 8 # consecutive empty frames before exploring
class ActiveScanTeacher:
"""Stateful wrapper. Construct one per episode; call ``reset()``
between episodes if reusing the instance.
"""Stateful wrapper. Construct one per episode (or call ``reset``).
Call signature::
vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
Note the extra ``dog_heading`` arg — required to compute the
rotation direction. The base teachers (Strömbom, Sequential)
don't use heading; we strip it before passing them through.
"""
def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
@@ -61,27 +48,17 @@ class ActiveScanTeacher:
@staticmethod
def _scan_action(dog_heading: float) -> tuple[float, float]:
# Target = current_heading + π. velocity_to_wheels gets err=π,
# so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
# cos(err) clamps the forward speed to ~0. Maximum in-place
# rotation under this controller; one full rotation in ~60 steps.
# Target opposite to current heading; velocity_to_wheels'
# cos(err) clamp drives forward speed to ~0 → in-place rotation.
target = dog_heading + math.pi
return math.cos(target), math.sin(target)
@staticmethod
def _explore_action(dog_xy) -> tuple[float, float]:
"""Walk back toward the field centre when nothing is in view.
At difficulty=1 sheep can spawn up to ~18 m from origin while
the LiDAR has a 12 m range, so an in-place scan from a corner
can return zero hits. Walking toward (0, 0) shrinks the
max-distance-to-any-sheep and the scanner cone sweeps along
the path, eventually picking sheep up.
"""
"""Walk toward (0, 0) while the LiDAR keeps sweeping."""
dx, dy = -dog_xy[0], -dog_xy[1]
d = math.hypot(dx, dy)
if d < 0.5:
# At the centre — fall through to a scan instead.
return 0.0, 0.0
return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
@@ -89,22 +66,18 @@ class ActiveScanTeacher:
self.step += 1
n_visible = len(sheep_positions)
# Track empty-streak for the explore debounce.
if n_visible == 0:
self.empty_streak += 1
else:
self.empty_streak = 0
# Phase 1: opening rotation, regardless of tracker state.
# Phase 1: opening rotation.
if self.step <= self.initial_scan:
vx, vy = self._scan_action(dog_heading)
self.last_action = (vx, vy)
return vx, vy, "scan_initial"
# Phase 2: tracker has been empty for a while — walk back to the
# centre while the LiDAR keeps sweeping. The debounce prevents
# this from firing every time the tracker briefly blinks to zero
# (which causes the "dog starts going away from sheep" symptom).
# Phase 2: walk-to-centre after a sustained empty tracker.
if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
ex, ey = self._explore_action(dog_xy)
if ex == 0.0 and ey == 0.0:
@@ -116,16 +89,13 @@ class ActiveScanTeacher:
self.last_action = (vx, vy)
return vx, vy, mode
# Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
# hold the previous action so the dog doesn't lurch.
# Phase 2b: brief tracker blink — hold the previous action.
if n_visible == 0:
vx, vy = self.last_action
return vx, vy, "hold"
# Phase 3: hand to the underlying analytic teacher, then apply
# the shared near-sheep speed modulation (centralised in
# herding.control so the BC student, Strömbom, Sequential and
# the DAgger teacher all behave identically near sheep).
# Phase 3: hand off to the underlying analytic teacher, then
# apply the shared near-sheep speed modulation.
vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
self.last_action = (vx, vy)
+9 -20
View File
@@ -1,8 +1,8 @@
"""Shared low-level control helpers used by every dog mode.
"""Shared action post-processing.
Centralised here so the BC student, Strömbom, Sequential, and the DAgger
teacher all apply identical post-processing to their action outputs.
The downstream wheel-velocity layer (``herding.diffdrive``) is unchanged.
Every dog mode routes its action through ``modulate_speed_near_sheep``
so the magnitude is reduced near sheep — direction (intent) is
preserved.
"""
from __future__ import annotations
@@ -10,12 +10,8 @@ from __future__ import annotations
import math
# Speed-modulation: scale action magnitude down when close to the
# nearest sheep. Stops the dog from charging in at full speed and
# scattering the flock. Action norm linearly ramps from MIN_SPEED at
# distance 0 to 1.0 at SLOW_NEAR_SHEEP.
SLOW_NEAR_SHEEP = 2.5
MIN_SPEED = 0.30
SLOW_NEAR_SHEEP = 2.5 # m — distance below which action norm is scaled down
MIN_SPEED = 0.30 # action norm at zero distance
def modulate_speed_near_sheep(
@@ -25,16 +21,9 @@ def modulate_speed_near_sheep(
slow_dist: float = SLOW_NEAR_SHEEP,
min_scale: float = MIN_SPEED,
) -> tuple[float, float]:
"""Scale (vx, vy) magnitude down when close to the nearest sheep.
``sheep_positions`` accepts either a ``{name: (x, y)}`` dict
(matching what the trackers emit) or an iterable of ``(x, y)``
tuples. Empty input → action returned unchanged.
The intent direction is preserved; only magnitude is reduced. With
``slow_dist=2.5`` and ``min_scale=0.3``, an action that started at
norm 1 is multiplied by 0.3 right next to a sheep, by 0.65 at 1 m
away, and by 1.0 once the nearest sheep is ≥ 2.5 m off.
"""Linearly ramp action magnitude from ``min_scale`` at distance 0
to 1.0 at ``slow_dist``. ``sheep_positions`` may be a
``{name: (x, y)}`` dict or an iterable of ``(x, y)`` tuples.
"""
if not sheep_positions:
return vx, vy
+7 -31
View File
@@ -1,25 +1,9 @@
"""Sequential single-target shepherd dog algorithm.
"""Sequential "pin-and-push" shepherd-dog controller.
Strömbom drives the flock's centre of mass; with N sheep and a narrow
3 m gate, this fails because the flock is wider than the gate and CoM
driving abandons stragglers. Real sheepdogs solve this differently:
they pick *one* sheep at a time, drive it through, return for the next.
This module implements that "pin-and-push" approach.
Algorithm (one step):
1. Active sheep = those still in the field (not yet penned).
2. Target = the active sheep currently closest to the pen entry.
3. Drive position = ``target + Δ · unit(target pen_entry)`` —
directly behind the target relative to the goal.
4. Output unit vector pointing the dog at the drive position.
Once the target crosses the gate it latches as penned and is removed
from the active set; the next-closest unpenned sheep becomes the
target. The algorithm naturally "queues" sheep through the gate.
Empirically (with our flocking dynamics) this scales linearly with
flock size and works up to at least n=10 within a 15 000-step budget.
Single-target alternative to Strömbom: each step, target the sheep
closest to the pen, park behind it, drive it through; once it latches
penned the next-closest sheep becomes the target. Naturally queues
the flock through a narrow gate.
"""
import math
@@ -43,25 +27,17 @@ def _is_active(x, y) -> bool:
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Return ``(vx, vy, mode)`` where mode encodes the current target.
Compatible with the Strömbom call signature so it can be drop-in
swapped in the dog controller and the env's imitation reward.
"""
"""Return ``(vx, vy, mode)`` — same call signature as Strömbom."""
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
if _is_active(x, y)]
if not active:
return 0.0, 0.0, "idle"
# Pick target = sheep closest to pen entry. Stable choice: as one
# sheep approaches and crosses the gate it stays the target until
# latched; then the next-closest takes over.
name, sx, sy = min(
active,
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
)
# Drive position behind the target along the (target → pen) line.
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
tx = sx + DELTA_DRIVE * ux
ty = sy + DELTA_DRIVE * uy
@@ -71,7 +47,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Debug variant returning ``(vx, vy, mode, debug_dict)``."""
"""``compute_action`` plus a debug dict (target, drive point)."""
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
if _is_active(x, y)]
if not active:
+14 -33
View File
@@ -1,30 +1,20 @@
"""Strömbom collect/drive heuristic for the shepherd dog.
"""Strömbom (2014) collect/drive heuristic for the shepherd dog.
Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
updated for the external pen layout. Used as a baseline controller and
as the fallback when the RL policy isn't available.
When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
to a point behind the furthest sheep and pushes it back toward the
flock CoM. Otherwise it drives, parking behind the CoM relative to
the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.
Reference: Strömbom et al. 2014, "Solving the shepherding problem".
Reference: Strömbom et al. 2014, "Solving the shepherding problem."
"""
import math
from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen
# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
# the original (4.0 / 2.5) because the new external pen sits ~26 m from
# typical sheep spawn locations — at the old 4 m standoff, the flee force
# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
# the path inside the 3000-step episode budget.
#
# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
# the dog stays in *drive* mode much longer. With our tighter cohesion
# (flocking_sim.py), partially-collected flocks consolidate naturally
# during a drive, and we don't waste 80% of the time budget on a slow
# "collect" pre-phase.
F_FACTOR = 4.0
DELTA_COLLECT = 1.5
DELTA_DRIVE = 2.0
F_FACTOR = 4.0 # collect/drive threshold scaled by √n
DELTA_COLLECT = 1.5 # drive-position offset behind the furthest sheep
DELTA_DRIVE = 2.0 # drive-position offset behind the flock CoM
def _unit(x, y):
@@ -35,18 +25,12 @@ def _unit(x, y):
def _is_active(x, y) -> bool:
"""A sheep is "active" if it's still in the field — not in or below
the gate plane (we treat anything south of the gate as committed to
the pen and stop trying to herd it)."""
"""A sheep still in the field counts; one south of the gate doesn't."""
return (not in_pen(x, y)) and y > GATE_Y
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
Webots controller's representation).
"""
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
if not active:
return 0.0, 0.0, "idle"
@@ -58,14 +42,14 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
radius = max(dists)
if radius > F_FACTOR * math.sqrt(n):
# Collect: aim at a point behind the furthest sheep, opposite the CoM.
# Collect: aim behind the furthest sheep, opposite the CoM.
idx = max(range(n), key=lambda i: dists[i])
sx, sy = active[idx]
ux, uy = _unit(sx - com_x, sy - com_y)
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
mode = "collect"
else:
# Drive: aim at a point behind the flock CoM relative to the goal.
# Drive: aim behind the CoM, opposite the pen.
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
mode = "drive"
@@ -75,10 +59,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
"""Variant of compute_action that also returns a small debug dict.
Kept for parity with the legacy controller's CSV logger.
"""
"""``compute_action`` plus a small debug dict (CoM, target, radius)."""
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
if not active:
return 0.0, 0.0, "idle", {
+34 -52
View File
@@ -1,26 +1,21 @@
"""Cluster a 2D LiDAR scan into world-frame sheep position estimates.
Pipeline:
ranges (N,) ─► hit mask ─► world-frame points
adjacency clustering (gap > GAP_THRESHOLD
starts a new cluster, walking rays in
angular order)
centroid + span filter
field/pen-corridor filter
list of (x, y) detections
The clusterer is intentionally simple — for ≤10 sheep there is rarely
any real ambiguity, and proper DBSCAN would only matter if rays from
two adjacent sheep merged. The downstream tracker handles association
across frames.
ranges (N,) → hit mask → world-frame points
adjacency clustering (gap > GAP_THRESHOLD
starts a new cluster, walking rays in
angular order)
centroid + span + region + structure filters
list of (x, y) detections
The downstream tracker handles association across frames.
"""
from __future__ import annotations
@@ -35,23 +30,19 @@ from herding.perception.lidar_sim import (
)
GAP_THRESHOLD = 0.6 # m — adjacent ray-points farther apart start new cluster
MAX_CLUSTER_SPAN = 1.5 # m — clusters wider than this are likely walls/structures
GAP_THRESHOLD = 0.6 # m — adjacent ray-points farther apart start a new cluster
MAX_CLUSTER_SPAN = 1.5 # m — wider clusters are walls / structures
RANGE_HIT_EPS = 0.05 # m — hit if range < max_range - eps
WALL_REJECT = 0.5 # m — drop detections this close to a known wall line
# Known sheep-sized static features. Detections within STATIC_REJECT
# of any of these are discarded — these aren't sheep. Mid-pillars on
# the field walls are NOT in this list because they're embedded in the
# wall (the wall's span filter handles them); listing them here would
# only reject real sheep that happened to be near the wall.
# Sheep-sized static features (gate posts, corner pillars). A cluster
# centred within STATIC_REJECT of any of these is never a sheep.
_STATIC_FEATURES = (
# Gate posts (sheep-sized boxes flanking the south-wall opening)
( 10.0, -15.0), ( 13.0, -15.0),
# Field corner pillars
( 15.0, 15.0), ( 15.0, -15.0), (-15.0, 15.0), (-15.0, -15.0),
( 10.0, -15.0), ( 13.0, -15.0), # gate posts
( 15.0, 15.0), ( 15.0, -15.0),
(-15.0, 15.0), (-15.0, -15.0), # field corners
)
STATIC_REJECT = 0.8 # m — detection within this of a static feature → drop
STATIC_REJECT = 0.8
def detections_from_scan(
@@ -71,6 +62,8 @@ def detections_from_scan(
px = dog_x + ranges * np.cos(world_a)
py = dog_y + ranges * np.sin(world_a)
# Walk rays in angular order; a large jump between consecutive
# world-frame hit points closes the current cluster.
clusters: list[list[tuple[float, float]]] = []
current: list[tuple[float, float]] = []
prev: tuple[float, float] | None = None
@@ -98,41 +91,30 @@ def detections_from_scan(
span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
if span > MAX_CLUSTER_SPAN:
continue
# Surface-to-centre correction: rays hit the front of the sheep,
# so the cluster centroid is biased toward the dog by SHEEP_RADIUS.
# Push it outward along the dog→cluster direction.
# Rays hit the front edge of the sheep; offset outward by
# SHEEP_RADIUS along the dog→cluster direction to estimate the
# centre.
dx, dy = cx - dog_x, cy - dog_y
d = math.hypot(dx, dy)
if d > 1e-3:
cx += SHEEP_RADIUS * dx / d
cy += SHEEP_RADIUS * dy / d
# Keep detections inside the field OR in the gate corridor /
# external pen — penned sheep are still worth tracking so the
# tracker can latch them as "penned" rather than spawn fresh
# tracks each scan.
# Accept detections inside the field, plus a narrow strip
# immediately south of the gate to catch sheep mid-crossing
# (so they get marked penned via is_penned_position before the
# track goes stale). Detections deeper into the pen are
# dropped entirely — Webots's pen posts and rails would
# otherwise produce a torrent of phantom penned tracks that
# the tracker can't keep up with.
# Region filter: in-field clusters, plus a narrow strip south of
# the gate so sheep mid-crossing get latched penned. Detections
# deeper into the pen are dropped — pen posts and rails would
# otherwise generate phantom penned tracks.
in_main = (FIELD_X[0] - 0.2 < cx < FIELD_X[1] + 0.2 and
FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)
in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
GATE_Y - 1.0 < cy < GATE_Y + 0.2)
if not (in_main or in_gate_strip):
continue
# Known-static-feature filter: gate posts and corner pillars
# show up as sheep-sized clusters but are never sheep.
# Known sheep-sized static features.
if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT
for fx, fy in _STATIC_FEATURES):
continue
# Wall-proximity filter: at oblique scan angles, walls produce
# multiple short clusters because adjacent ray returns are
# spaced just above GAP_THRESHOLD. Sheep can't get within ~0.3 m
# of a wall (the env clips them to FIELD_INSIDE), so anything
# right at the wall line is structure noise.
# Wall-proximity filter — sheep can't get this close to a wall,
# so detections right at the wall line are structure noise.
near_field_wall = (
cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or
cy > FIELD_Y[1] - WALL_REJECT or
+19 -48
View File
@@ -1,16 +1,12 @@
"""Fast 2D LiDAR simulator for the Gymnasium env.
Raycasts against:
* **Sheep** — discs of radius ``SHEEP_RADIUS``.
* **Static world geometry** — axis-aligned wall segments and gate
posts taken from ``worlds/field.wbt``. Without these, demos
collected in-env would never include the false-positive clusters
Webots produces from the stone walls and gate-post boxes, and the
BC student trained on those demos collapses on deployment.
Raycasts against sheep (discs) and static world geometry (axis-aligned
walls + gate posts) so the env reproduces the false-positive cluster
distribution Webots produces from real 3D geometry.
Returns a range array matching the Webots Lidar device on the dog
(see ``protos/ShepherdDog.proto``: 180 rays, 140° FOV centred on
forward, 12 m max range, 5 mm noise).
Returns a range array matching the Webots Lidar device:
180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise.
See ``protos/ShepherdDog.proto``.
"""
from __future__ import annotations
@@ -26,19 +22,13 @@ LIDAR_FOV = 2.44 # rad ≈ 140°
LIDAR_MAX_RANGE = 12.0
LIDAR_NOISE = 0.005 # m, gaussian std
# Sheep modelled as a vertical cylinder; this is the horizontal-section
# radius the LiDAR plane intersects. Tuned to the proto sheep (~0.45 m
# body length). The exact value is not load-bearing — the perception
# clusterer is range-tolerant.
# Sheep cross-section in the LiDAR plane (horizontal cylinder approx).
SHEEP_RADIUS = 0.30
# ---------------------------------------------------------------------------
# Static world geometry — must match worlds/field.wbt
# ---------------------------------------------------------------------------
# --- Static world geometry — mirrors worlds/field.wbt ---
# Vertical walls: (x, y_min, y_max). Field east/west walls and the two
# pen side walls are visible through the open gate.
# Vertical walls: (x, y_min, y_max).
_VERTICAL_WALLS = (
( 15.0, -15.0, 15.0), # field east
(-15.0, -15.0, 15.0), # field west
@@ -46,8 +36,7 @@ _VERTICAL_WALLS = (
( 13.0, -22.0, -15.0), # pen east
)
# Horizontal walls: (y, x_min, x_max). South wall is split by the 3 m
# gate at x ∈ [10, 13]; the pen south wall closes the back of the pen.
# Horizontal walls: (y, x_min, x_max). South wall has a 3 m gap at the gate.
_HORIZONTAL_WALLS = (
( 15.0, -15.0, 15.0), # field north
(-15.0, -15.0, 10.0), # field south-west of gate
@@ -55,31 +44,23 @@ _HORIZONTAL_WALLS = (
(-22.0, 10.0, 13.0), # pen south
)
# Gate posts and field corner pillars treated as vertical cylinders at
# LiDAR height. Radius 0.25 m comes from the 0.44 × 0.44 m boxes in the
# wbt — close enough to a circular cross-section for this purpose.
# Gate posts + field corner pillars, treated as discs at LiDAR height.
_POSTS_XY = np.array([
( 10.0, -15.0), # west gate post
( 13.0, -15.0), # east gate post
( 15.0, 15.0), # NE field corner
( 15.0, -15.0), # SE field corner
(-15.0, 15.0), # NW field corner
(-15.0, -15.0), # SW field corner
( 10.0, -15.0), ( 13.0, -15.0),
( 15.0, 15.0), ( 15.0, -15.0),
(-15.0, 15.0), (-15.0, -15.0),
], dtype=np.float64)
POST_RADIUS = 0.25
def ray_angles(n: int = LIDAR_N_RAYS, fov: float = LIDAR_FOV) -> np.ndarray:
"""Local-frame ray angles, sweeping from +fov/2 to -fov/2.
"""Local-frame ray angles, CCW from forward, sweeping +fov/2 -fov/2.
Convention: angle is measured CCW from the dog's forward axis. Ray 0
points to the dog's left, last ray to the right. Webots' default
Lidar sweep matches this.
Matches Webots' default Lidar sweep direction.
"""
return np.linspace(fov / 2.0, -fov / 2.0, n, dtype=np.float64)
# Cached so we don't rebuild every step.
_ANGLES = ray_angles()
_COS = np.cos(_ANGLES)
_SIN = np.sin(_ANGLES)
@@ -88,13 +69,7 @@ _SIN = np.sin(_ANGLES)
def _raycast_static(
ox: float, oy: float, cos_w: np.ndarray, sin_w: np.ndarray,
) -> np.ndarray:
"""Per-ray distance to nearest wall or post hit (∞ if none).
Walls are axis-aligned line segments; for each ray we compute t at
which it crosses the wall's constant-coord plane and check the
other coord lies in the segment. Posts are circles; same disc
intersection as for sheep.
"""
"""Per-ray distance to the nearest wall or post hit (∞ if none)."""
n_rays = cos_w.shape[0]
best = np.full(n_rays, np.inf, dtype=np.float64)
@@ -144,10 +119,7 @@ def simulate_scan(
) -> np.ndarray:
"""Return a (N,) float32 range array. No-hit entries equal ``max_range``.
``sheep_xy`` is the list of (x, y) world positions of every sheep in
the scene (penned and active). Static world geometry (walls and
posts) is also raycast so demos contain the same false-positive
clusters Webots produces.
``sheep_xy`` is every sheep (penned or active) in the scene.
"""
n_rays = _ANGLES.shape[0]
@@ -172,8 +144,7 @@ def simulate_scan(
nearest = candidate.min(axis=0)
np.minimum(best, nearest, out=best)
# Clip to LIDAR_MAX_RANGE; entries that never got a hit stay at inf
# → clipped down to max_range like the real Webots device.
# Entries with no hit stay at inf → clipped to max_range, matching Webots.
ranges = np.minimum(best, max_range).astype(np.float32)
return _add_noise(ranges, noise, rng, max_range)
+19 -28
View File
@@ -1,31 +1,25 @@
"""Observation builder for the shepherd dog policy.
"""Observation builder for the shepherd-dog policy.
Order-invariant 32-D feature vector the policy generalises across
flock sizes 1..MAX_SHEEP because individual sheep coordinates never
appear in the observation by index, only summary statistics, a polar
histogram, and two "named" sheep (closest-to-pen and rearmost-from-pen).
The two named sheep matter for the sequential-driving teacher: it
targets the closest-to-pen sheep specifically, so the policy needs
that channel to mimic the teacher.
Order-invariant 32-D feature vector. Sheep never appear by index in
the observation, only via summary statistics, a polar histogram, and
two "named" channels (closest-to-pen, rearmost-from-pen) so the
policy generalises across flock sizes 1..MAX_SHEEP.
Layout (all components normalised so values stay roughly in [-1, 1]):
idx field
idx field
----- ----------------------------------------------------------
0..3 dog pose: x/15, y/15, cos(heading), sin(heading)
0..3 dog pose: x/15, y/15, cos(h), sin(h)
4..5 active-sheep CoM x/15, y/15
6..8 flock dispersion: max-radius/15, std_x/15, std_y/15
9..11 vector dogCoM: dx/30, dy/30, dist/30
12..14 vector dogpen-entry: dx/30, dy/30, dist/30
15..16 vector furthest-sheepCoM: dx/15, dy/15
6..8 flock dispersion: max_radius/15, std_x/15, std_y/15
9..11 dog CoM: dx/30, dy/30, dist/30
12..14 dog pen entry: dx/30, dy/30, dist/30
15..16 furthest sheep CoM: dx/15, dy/15
17..18 min sheep-to-wall, min dog-to-wall (both /15)
19 active-sheep count / MAX_SHEEP
20..27 8-bin polar histogram of active sheep around the dog,
rotation-aware (binned in dog-relative frame), normalised
so the bins sum to 1.
28..29 vector dogclosest-to-pen sheep: dx/15, dy/15
30..31 vector dogrearmost (furthest-from-pen) sheep: dx/15, dy/15
19 active sheep count / MAX_SHEEP
20..27 8-bin polar histogram of active sheep in the dog's body frame
28..29 dog closest-to-pen sheep: dx/15, dy/15
30..31 dog rearmost (furthest-from-pen) sheep: dx/15, dy/15
"""
import math
@@ -68,7 +62,6 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
obs[14] = math.hypot(pdx0, pdy0) / 30.0
if n == 0:
# All sheep penned — terminal observation.
obs[19] = 0.0
return obs
@@ -110,7 +103,7 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
obs[18] = float(min_dog_wall) / 15.0
obs[19] = n / n_max
# 8-bin polar histogram in the dog's body frame.
# Polar histogram in the dog's body frame.
rel_dx = arr[:, 0] - dog_x
rel_dy = arr[:, 1] - dog_y
angles = np.arctan2(rel_dy, rel_dx) - dog_heading
@@ -121,11 +114,9 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
hist /= max(1, n)
obs[20:28] = hist
# Closest-to-pen sheep (the sequential teacher's target) and rearmost
# (furthest-from-pen, the natural "next target" once the closest is
# penned). Both expressed as offset from dog. These two channels make
# BC tractable — without them the obs doesn't uniquely identify which
# sheep the teacher is steering toward.
# Closest-to-pen and rearmost (furthest-from-pen) sheep. Without
# these named channels the obs cannot uniquely identify which sheep
# the teacher is steering toward, and BC fails to mimic it.
pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
closest_idx = int(np.argmin(pen_dists))
rearmost_idx = int(np.argmax(pen_dists))
+30 -66
View File
@@ -1,25 +1,14 @@
"""Multi-target tracker for LiDAR-detected sheep.
Greedy nearest-neighbour data association (with a distance gate) across
frames, plus a memory of last-seen positions for tracks that fall out
of the dog's FOV. Output is a ``{name: (x, y)}`` dict shaped exactly
like the receiver-based ``sheep_positions`` used previously by the
Webots controller and by the env, so Strömbom and Sequential can
consume it unchanged.
Greedy nearest-neighbour data association across frames, with a wider
re-acquisition gate for stale tracks (sheep flee during occlusion and
reappear off-position), plus memory of last-seen positions for sheep
out of FOV. Output is ``{name: (x, y)}`` — Strömbom / Sequential
consume it directly.
Penned-detection heuristic
--------------------------
Two ways a track is marked penned:
1. Its current estimated position is south of the gate plane and
within the gate column (the ``is_penned_position`` test the env
already uses on ground truth).
2. It hasn't been observed for ``STALE_STEPS`` and its last-seen
position was inside the gate-approach band — the dog's LiDAR can
only see ~2 m into the pen through the open gate, so a sheep
that disappeared near the entry has almost certainly entered.
Tracks marked penned are excluded from ``get_positions()`` (which is
what Strömbom consumes), matching the prior receiver-based behaviour.
A track is marked penned once its estimated position crosses the gate
plane south (``is_penned_position``). Penned tracks are excluded from
``get_positions`` and kept indefinitely.
"""
from __future__ import annotations
@@ -29,26 +18,22 @@ import math
from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position
GATE_M = 2.5 # m — primary NN gate (recent tracks)
REACQUIRE_GATE_M = 4.5 # m — wider gate for re-acquiring stale tracks (sheep moved during occlusion)
REACQUIRE_MIN_AGE = 20 # steps — only rebind via the wide gate if the track has been stale for this long
PENNED_GATE_M = 4.0 # m — wide gate for matching against already-penned tracks; the pen is small (3×7 m) so duplicates are easy without it
FORGET_STEPS = 200 # ~3.2 s — delete stale active tracks; tighter than 5 s to limit phantoms but long enough to bridge typical FOV gaps
MAX_ACTIVE_TRACKS = MAX_SHEEP # hard cap to the worst-case real flock size
# Penned tracks are never forgotten: sheep don't leave the pen, and
# losing the track makes the counter oscillate as the same sheep gets
# re-detected and counted multiple times.
GATE_M = 2.5 # m — primary NN gate (recently observed tracks)
REACQUIRE_GATE_M = 4.5 # m — wider gate for re-binding stale tracks
REACQUIRE_MIN_AGE = 20 # steps — track must be this stale to use the wider gate
PENNED_GATE_M = 4.0 # m — gate for matching detections to existing penned tracks
FORGET_STEPS = 200 # ~3.2 s — delete stale active tracks (penned ones kept forever)
MAX_ACTIVE_TRACKS = MAX_SHEEP
class SheepTracker:
"""Online tracker with NN association and a forgetful memory.
"""Online tracker with NN association and forgetful memory.
Each track stores ``(x, y, last_seen_step, penned)``.
"""
def __init__(self, gate: float = GATE_M):
self.gate = gate
# tid → (x, y, last_seen_step, penned)
self._tracks: dict[int, tuple[float, float, int, bool]] = {}
self._next_id = 0
self.step = 0
@@ -58,9 +43,6 @@ class SheepTracker:
self._next_id = 0
self.step = 0
# ------------------------------------------------------------------
# Update
# ------------------------------------------------------------------
def update(self, detections: list[tuple[float, float]]) -> dict[str, tuple[float, float]]:
"""Fold a new set of detections in and return active positions."""
self.step += 1
@@ -68,9 +50,9 @@ class SheepTracker:
det_used: set[int] = set()
updated_tids: set[int] = set()
# Pass 1: match against ACTIVE tracks first (oldest-seen-first so
# a re-emerging long-lost sheep grabs its old ID before a fresh
# neighbour does).
# Pass 1 match active tracks within the primary gate. Oldest-
# seen tracks bind first so a re-emerging long-lost sheep keeps
# its old ID instead of being grabbed by a fresh neighbour.
active_tids = [tid for tid, t in self._tracks.items() if not t[3]]
active_tids.sort(key=lambda tid: self._tracks[tid][2])
for tid in active_tids:
@@ -89,12 +71,10 @@ class SheepTracker:
det_used.add(best_j)
updated_tids.add(tid)
# Pass 1b: re-acquisition with a wider gate for tracks that have
# been stale for ≥ REACQUIRE_MIN_AGE steps. Sheep flee at
# ~0.6 m/s; over a 12 s occlusion (dog rotating or driving)
# they move enough that a fresh detection lies outside the
# primary GATE_M but is still clearly the same sheep. Without
# this, phantom tracks accumulate and corrupt the CoM.
# Pass 1b re-acquisition. Sheep flee at ~0.6 m/s, so over a
# 12 s occlusion the same sheep may reappear outside the primary
# gate. Allow rebinding within a wider gate for stale-enough
# tracks; otherwise phantom tracks accumulate and corrupt CoM.
for tid in active_tids:
if tid in updated_tids:
continue
@@ -115,10 +95,7 @@ class SheepTracker:
det_used.add(best_j)
updated_tids.add(tid)
# Pass 2: match remaining detections against PENNED tracks with
# a tighter gate. Without this, every frame near the gate spawns
# a fresh penned track for the same sheep, which under a long
# Webots run leads to thousands of phantom penned tracks.
# Pass 2 match remaining detections to penned tracks.
penned_tids = [tid for tid, t in self._tracks.items() if t[3]]
for tid in penned_tids:
tx, ty, _, _ = self._tracks[tid]
@@ -135,9 +112,8 @@ class SheepTracker:
self._tracks[tid] = (dx, dy, self.step, True)
det_used.add(best_j)
# Unmatched detections → new tracks. A detection that is already
# inside the pen is born "penned" so we don't accumulate active
# tracks for sheep that arrived in the pen during occlusion.
# Spawn new tracks for unmatched detections. Born "penned" if
# the detection already sits inside the pen geometry.
for j, (dx, dy) in enumerate(detections):
if j in det_used:
continue
@@ -145,44 +121,32 @@ class SheepTracker:
self._tracks[self._next_id] = (dx, dy, self.step, penned)
self._next_id += 1
# Promote active tracks to penned ONLY by geometric position
# (sheep is in the pen column south of the gate). The previous
# "stale + near gate" heuristic was firing on ordinary occlusion
# near the gate and creating phantom penned tracks.
# Promote active tracks whose current estimate crosses the gate.
for tid, (tx, ty, last, penned) in list(self._tracks.items()):
if penned:
continue
if is_penned_position(tx, ty):
self._tracks[tid] = (tx, ty, last, True)
# Forget stale ACTIVE tracks after FORGET_STEPS. Penned tracks
# are kept indefinitely — sheep can't escape the pen, so once a
# track is marked penned, that sheep is permanently penned.
# Forget stale active tracks; penned tracks live forever.
for tid, (tx, ty, last, penned) in list(self._tracks.items()):
if penned:
continue
if (self.step - last) > FORGET_STEPS:
del self._tracks[tid]
# Hard cap on the active set. If we somehow have more than
# MAX_ACTIVE_TRACKS active tracks, drop the oldest-seen ones
# first — they are most likely false positives from world
# geometry (walls, gate posts) the env's raycaster doesn't
# model, and a bloated active set wrecks the downstream CoM.
# Hard cap on the active set — drop the oldest-seen overflow.
active = [(tid, last) for tid, (_, _, last, p) in self._tracks.items()
if not p]
if len(active) > MAX_ACTIVE_TRACKS:
active.sort(key=lambda kv: kv[1]) # oldest-seen first
active.sort(key=lambda kv: kv[1])
for tid, _ in active[: len(active) - MAX_ACTIVE_TRACKS]:
del self._tracks[tid]
return self.get_positions()
# ------------------------------------------------------------------
# Outputs
# ------------------------------------------------------------------
def get_positions(self) -> dict[str, tuple[float, float]]:
"""Active (not-yet-penned) tracks. Same shape as receiver dict."""
"""Active (not-penned) tracks as a ``{name: (x, y)}`` dict."""
return {f"t{tid}": (x, y)
for tid, (x, y, _, penned) in self._tracks.items()
if not penned}
+7 -16
View File
@@ -1,11 +1,8 @@
"""Differential-drive kinematics matching the Webots robot specs.
"""Differential-drive kinematics, shared by the env and Webots controllers.
The Webots controllers and the training env both use these helpers so the
sim and the real (Webots) physics agree to first order. They do not model
slip, wheel acceleration limits, or contact forces — Webots does that for
us at inference time. The training env has to be close enough that a
policy trained against this kinematic model still works when handed off
to ODE physics.
First-order rigid-body model — no slip, wheel-accel limits, or contact
forces. Webots' ODE physics handles those at inference; the env stays
close enough to first order that a policy trained here transfers.
"""
import math
@@ -34,10 +31,9 @@ def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
k_turn=4.0):
"""Convert a desired (vx, vy) intent in [-1, 1]^2 to wheel speeds.
"""Convert a desired (vx, vy) intent in [-1, 1]² to wheel speeds.
Mirrors ``drive_action`` in controllers/shepherd_dog/shepherd_dog.py:
forward speed scales by ``cos(err)`` (clamped to ±90°), and a P
Forward speed scales by ``cos(err)`` (clamped to ±90°); a P
controller on heading error contributes the wheel-rate differential.
"""
speed_ms = math.hypot(vx, vy) * max_linear
@@ -56,12 +52,7 @@ def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
k_turn=4.0):
"""Sheep variant: speed already expressed in motor (wheel rad/s) units.
Matches the existing sheep controller (``controllers/sheep/sheep.py``)
where ``speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))`` and
these constants are wheel angular velocities, not linear m/s.
"""
"""Sheep variant: speed in wheel rad/s, target as a heading angle."""
err = math.atan2(math.sin(heading - h), math.cos(heading - h))
fwd = max(0.0, math.cos(err)) * speed_motor
turn = k_turn * err
+25 -66
View File
@@ -1,24 +1,19 @@
"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987 hybrid.
"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987.
This is the per-sheep behavioural step used both by the Webots sheep
controller (scalar, one sheep at a time) and by the training environment
(loop over sheep).
Model
-----
The force stack each step (summed → heading + speed):
Per-sheep behavioural step used by both the Webots sheep controller
and the training environment. Each step a force stack is summed:
flee — quadratic ramp away from dog within FLEE_DIST
(Strömbom 2014 §2.1, term ρa)
(Strömbom 2014, term ρa)
cohesion — drift toward local centre of mass of peers within
COHESION_DIST (Strömbom 2014 §2.1, term c).
Weight is **higher when fleeing**modelling the
"safety in numbers" / predator-confusion effect
Strömbom 2014 describes as fear-induced cohesion.
COHESION_DIST (Strömbom 2014, term c). Weight is
higher while fleeing — fear-induced cohesion.
separation — short-range inverse-distance repulsion from peers
(Strömbom 2014 §2.1, term α; Reynolds 1987)
wander — small persistent drift for natural idle motion
(Strömbom 2014 §2.1, noise term ε)
(Strömbom 2014 term α; Reynolds 1987)
wander — small persistent drift (Strömbom 2014 noise term ε)
Walls, the south-wall gate column, and in-pen containment are
environment-specific additions for the fenced Webots field.
References
----------
@@ -26,26 +21,6 @@ References
for herding autonomous, interacting agents." J R Soc Interface 11.
- Reynolds (1987). "Flocks, herds and schools: A distributed
behavioural model." SIGGRAPH '87.
Environment-specific adaptations
--------------------------------
The original Strömbom model assumes an open field. Our scenario adds:
* Field walls — soft repulsion within ``WALL_MARGIN`` plus a hard
escape band when inside ``WALL_HARD_MARGIN``. Necessary because the
Webots field is fenced (30 m square enclosure).
* Gate column — the south wall has a 3 m gap at x ∈ [10, 13]; sheep
pass through it freely (no wall force inside the column).
* Penned containment — once a sheep crosses the gate plane south
(``geometry.is_penned_position``), the caller flags ``penned=True``
and we switch to in-pen wall-bounce + jitter. Sheep do not exit the
pen on their own. This is a hard sim constraint, not a behavioural
claim about real sheep.
Parameter tuning (cohesion weight 3× while fleeing) was chosen so the
flock survives passage through the 3 m gate without fragmenting — this
is a defensible engineering adaptation of Strömbom's qualitative
"fear-induced cohesion" to our gate width.
"""
import math
@@ -57,9 +32,7 @@ from herding.world.geometry import (
GATE_X,
)
# --- Speed and force constants ---
# All speeds here are in wheel rad/s (motor units), matching the existing
# sheep controller. Conversion to m/s = speed * SHEEP_WHEEL_RADIUS.
# Speeds are in wheel rad/s (motor units); m/s = speed * SHEEP_WHEEL_RADIUS.
MAX_SPEED = 22.0
FLEE_SPEED = 20.0
WANDER_SPEED = 3.0
@@ -70,7 +43,7 @@ WALL_HARD_GAIN = 50.0
FLEE_DIST = 7.0
SEPARATION_DIST = 2.5
COHESION_DIST = 12.0 # was 8.0 — wider engagement so far-flung sheep are pulled in
COHESION_DIST = 12.0
PEN_MARGIN = 0.8
@@ -85,21 +58,17 @@ def _peers_iter(peers):
def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
"""Return ``(heading, speed, new_wander_angle)`` for one sheep step.
``speed`` is in wheel rad/s (motor units), bounded by ``[WANDER_SPEED,
FLEE_SPEED]``. ``heading`` is the world-frame target heading the sheep
should aim for (atan2 convention).
``rng`` is an optional ``random.Random``-compatible object used for
the wander-jitter. If ``None``, falls back to Python's global module
(matches Webots controller usage). Pass an env-owned RNG to make
rollouts deterministic given a seed.
``speed`` is in wheel rad/s, bounded by ``[WANDER_SPEED, FLEE_SPEED]``.
``heading`` is the world-frame target heading (atan2 convention).
``rng`` is an optional ``random.Random`` used for wander jitter; if
``None`` uses the module's global ``random``.
"""
fx, fy = 0.0, 0.0
peer_list = _peers_iter(peers)
rnd = rng if rng is not None else random
if penned:
# --- Pen containment: bounce off the four pen walls ---
# Pen containment: bounce off all four pen walls.
pm = PEN_MARGIN
if x < PEN_X[0] + pm:
fx += ((PEN_X[0] + pm - x) / pm) * 15.0
@@ -110,7 +79,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
if y > PEN_Y[1] - pm:
fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0
# Mild peer separation penned sheep crowd the corner otherwise.
# Mild peer separation so penned sheep don't crowd one corner.
for px, py in peer_list:
dx, dy = px - x, py - y
d = math.hypot(dx, dy)
@@ -125,7 +94,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
fy += math.sin(wander_angle) * 0.5
else:
# --- Free-roaming sheep in the field ---
# Free-roaming sheep in the field.
fleeing = False
if dog_xy is not None:
ddx = dog_xy[0] - x
@@ -138,11 +107,9 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
fx -= (ddx / dist) * s
fy -= (ddy / dist) * s
# Cohesion drift toward flock CoM (peers within COHESION_DIST).
# Cohesion is *stronger* under flee than at rest (the
# predator-confusion / safety-in-numbers effect — sheep huddle when
# threatened). This is what makes shepherding work: the flock stays
# as one unit through the narrow gate instead of fragmenting.
# Cohesion: drift toward the local CoM of peers within
# COHESION_DIST. Stronger while fleeing — fear-induced
# cohesion keeps the flock together through the gate.
cx, cy, cn = 0.0, 0.0, 0
for px, py in peer_list:
d = math.hypot(px - x, py - y)
@@ -151,12 +118,6 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
cy += py
cn += 1
if cn > 0:
# Cohesion needs to dominate flee at close range so the flock
# stays glued together when squeezing through the narrow gate.
# Flee at 2 m has magnitude ~10; cohesion of w=3.0 with the
# peer-CoM 4 m away contributes ~12, so the flock prefers
# bunching to dispersing under pressure. This is what makes
# canonical Strömbom drive work in our 3 m gate.
w = 3.0 if fleeing else 1.0
fx += (cx / cn - x) * w
fy += (cy / cn - y) * w
@@ -170,8 +131,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
fx -= (ddx / d) * push * 2.5
fy -= (ddy / d) * push * 2.5
# Wall soft repulsion. The south wall is absent inside the gate
# column so sheep can be driven through it by the dog.
# Wall soft repulsion (south wall absent inside the gate column).
if x < FIELD_X[0] + WALL_MARGIN:
fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
if x > FIELD_X[1] - WALL_MARGIN:
@@ -187,7 +147,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
fx += math.cos(wander_angle) * 0.5
fy += math.sin(wander_angle) * 0.5
# --- Hard escape band — overrides everything when very close to a wall ---
# Hard escape band — overrides everything else near a wall.
m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
if x - FIELD_X[0] < m:
fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
@@ -195,7 +155,6 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
if FIELD_Y[1] - y < m:
fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
# South wall hard escape only when not in the gate column and not penned.
if (not penned) and (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))
+14 -35
View File
@@ -1,23 +1,15 @@
"""World geometry and robot specs.
All coordinates are in meters. (0, 0) is the centre of the field, +x is
east, +y is north. Z is up but unused here. These constants must match
``worlds/field.wbt`` and the proto files; if the world changes, change
this file and only this file.
Pen layout (post-refactor)
--------------------------
The pen is *external* to the field, accessed through a 3 m gate cut into
the south stone wall at y = -15. Sheep entering through the gate end up
in a fenced rectangle south of the field; the dog stays in the field
(soft-limited above DOG_SOUTH_LIMIT during training and inference).
Coordinates are metres; (0, 0) is the field centre, +x east, +y north.
These constants mirror ``worlds/field.wbt`` and the proto files — if
the world changes, this file is the single point of update.
field +y north
+-----------+
| |
| |
| ...... |
+---||||----+ y = -15 (south wall, gate at x ∈ [10, 13])
+---||||----+ y = -15 (south wall, 3 m gate at x ∈ [10, 13])
||||
|pen| y ∈ [-22, -15]
+---+
@@ -25,46 +17,38 @@ in a fenced rectangle south of the field; the dog stays in the field
import math
# --- Field (square, stone-walled) ---
# Field (square, stone-walled)
FIELD_X = (-15.0, 15.0)
FIELD_Y = (-15.0, 15.0)
# Conservative inside bounds — sheep/dog should not graze the wall.
FIELD_INSIDE_MARGIN = 0.5
# --- Pen (external, south of the field) ---
# Pen (external, south of the field)
PEN_X = (10.0, 13.0)
PEN_Y = (-22.0, -15.0)
PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
# The point the dog drives the flock toward: the gate centre on the field side.
PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
# --- Gate (the hole in the south stone wall) ---
# Gate (hole in the south wall)
GATE_X = PEN_X
GATE_Y = -15.0
# --- Robot specs (must match proto files) ---
# Dog (controllers/shepherd_dog/, protos/ShepherdDog.proto)
# Dog spec — protos/ShepherdDog.proto
DOG_WHEEL_RADIUS = 0.038 # m
DOG_WHEEL_BASE = 0.28 # m, axle-to-axle
DOG_MAX_WHEEL_OMEGA = 70.0 # rad/s
DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA # ~2.66 m/s
DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA # 2.66 m/s
# Sheep (controllers/sheep/, protos/Sheep.proto)
# Sheep spec — protos/Sheep.proto
SHEEP_WHEEL_RADIUS = 0.031 # m
SHEEP_WHEEL_BASE = 0.20 # m
SHEEP_MAX_WHEEL_OMEGA = 25.0 # rad/s
SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA # ~0.78 m/s
SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA # 0.78 m/s
# --- Webots step ---
WEBOTS_DT = 0.016 # seconds, matches WorldInfo.basicTimeStep = 16 in field.wbt
WEBOTS_DT = 0.016 # seconds (matches WorldInfo.basicTimeStep)
# --- Dog "virtual south wall" (training keeps dog out of the pen) ---
# At inference the controller also clips to this so a slightly miscalibrated
# policy doesn't accidentally drive into the pen and trap the sheep.
# Virtual south wall — env and controller both keep the dog north of this.
DOG_SOUTH_LIMIT = -14.5
# --- Maximum supported flock size ---
MAX_SHEEP = 10
@@ -85,12 +69,7 @@ def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:
def is_penned_position(x: float, y: float, latch_margin: float = 0.2) -> bool:
"""A sheep latches to "penned" once it crosses the gate plane south.
True iff x is inside the gate column (with a small margin) AND
y has dipped below the gate line. Once latched, the sheep is held by
in-pen forces and will not exit on its own.
"""
"""True iff (x, y) is in the gate column and south of the gate line."""
return (PEN_X[0] - latch_margin <= x <= PEN_X[1] + latch_margin
and y <= GATE_Y)