Checkpoint 7

2026-05-11 12:21:51 +01:00
parent fce0e0c786
commit a01a5c9cef
34 changed files with 1266 additions and 1038 deletions
@@ -1,23 +1,19 @@
-"""Active-perception wrapper for the analytic shepherding teachers.
+"""Active-perception wrapper for the analytic shepherd teachers.

-Under LiDAR (partial observability), the tracker starts empty — the
-dog hasn't seen any sheep yet. A naive Strömbom call returns
-``(0, 0, "idle")`` and the dog stops. The student then learns "do
-nothing when the tracker is empty," which is a fatal local optimum.
+Under partial-observability LiDAR perception the tracker starts empty
+— a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
+stops. This wrapper interleaves the underlying teacher with two
+exploration behaviours:

-This wrapper replaces the idle case with a **scan action**: a unit
-vector 90° CCW from the dog's current forward direction. Passed
-through ``velocity_to_wheels`` it produces a fast in-place rotation
-(``cos(err)`` clamp drives forward speed to ~0 because the target is
-orthogonal to the heading). The dog spins for the first
-``initial_scan_steps`` steps of every episode regardless of tracker
-state, and re-enters scan whenever the tracker goes empty mid-episode.
+* opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
+  guaranteeing the LiDAR sweeps a full circle before driving;
+* walk-to-centre when the tracker has been empty for at least
+  ``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
+  beyond the 12 m LiDAR range).

-Once enough sheep are tracked, control hands over to the underlying
-analytic teacher (Strömbom or Sequential), which now operates on a
-populated tracker dict. Both teacher and student see the same
-LiDAR-perceived view — there's no information asymmetry, so the
-student can in principle achieve the teacher's full performance.
+When the tracker has detections the base teacher's action is used,
+post-processed by ``modulate_speed_near_sheep`` so the dog doesn't
+charge the flock.
 """

 from __future__ import annotations
@@ -27,26 +23,17 @@ import math
 from herding.control.modulation import modulate_speed_near_sheep


-INITIAL_SCAN_STEPS = 80    # ≈1.3 s at dt=16 ms — full rotation at the +π turn target.
-EXPLORE_SPEED = 0.7        # m/s-ish unit (action norm) used when walking blind
-
-# Debounce on tracker emptiness — a single empty frame between
-# detections is not enough reason to abandon the drive and start
-# scanning. Require this many consecutive empty frames first.
-EMPTY_DEBOUNCE_STEPS = 8
+INITIAL_SCAN_STEPS = 80         # ≈1.3 s — covers one full rotation
+EXPLORE_SPEED = 0.7             # action norm while walking blind
+EMPTY_DEBOUNCE_STEPS = 8        # consecutive empty frames before exploring


 class ActiveScanTeacher:
-    """Stateful wrapper. Construct one per episode; call ``reset()``
-    between episodes if reusing the instance.
+    """Stateful wrapper. Construct one per episode (or call ``reset``).

    Call signature::

        vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
-
-    Note the extra ``dog_heading`` arg — required to compute the
-    rotation direction. The base teachers (Strömbom, Sequential)
-    don't use heading; we strip it before passing them through.
    """

    def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
@@ -61,27 +48,17 @@ class ActiveScanTeacher:

    @staticmethod
    def _scan_action(dog_heading: float) -> tuple[float, float]:
-        # Target = current_heading + π. velocity_to_wheels gets err=π,
-        # so turn = k_turn·π = 4π ≈ 12.6 rad/s wheel angular vel and
-        # cos(err) clamps the forward speed to ~0. Maximum in-place
-        # rotation under this controller; one full rotation in ~60 steps.
+        # Target opposite to current heading; velocity_to_wheels'
+        # cos(err) clamp drives forward speed to ~0 → in-place rotation.
        target = dog_heading + math.pi
        return math.cos(target), math.sin(target)

    @staticmethod
    def _explore_action(dog_xy) -> tuple[float, float]:
-        """Walk back toward the field centre when nothing is in view.
-
-        At difficulty=1 sheep can spawn up to ~18 m from origin while
-        the LiDAR has a 12 m range, so an in-place scan from a corner
-        can return zero hits. Walking toward (0, 0) shrinks the
-        max-distance-to-any-sheep and the scanner cone sweeps along
-        the path, eventually picking sheep up.
-        """
+        """Walk toward (0, 0) while the LiDAR keeps sweeping."""
        dx, dy = -dog_xy[0], -dog_xy[1]
        d = math.hypot(dx, dy)
        if d < 0.5:
-            # At the centre — fall through to a scan instead.
            return 0.0, 0.0
        return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d

@@ -89,22 +66,18 @@ class ActiveScanTeacher:
        self.step += 1
        n_visible = len(sheep_positions)

-        # Track empty-streak for the explore debounce.
        if n_visible == 0:
            self.empty_streak += 1
        else:
            self.empty_streak = 0

-        # Phase 1: opening rotation, regardless of tracker state.
+        # Phase 1: opening rotation.
        if self.step <= self.initial_scan:
            vx, vy = self._scan_action(dog_heading)
            self.last_action = (vx, vy)
            return vx, vy, "scan_initial"

-        # Phase 2: tracker has been empty for a while — walk back to the
-        # centre while the LiDAR keeps sweeping. The debounce prevents
-        # this from firing every time the tracker briefly blinks to zero
-        # (which causes the "dog starts going away from sheep" symptom).
+        # Phase 2: walk-to-centre after a sustained empty tracker.
        if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
            ex, ey = self._explore_action(dog_xy)
            if ex == 0.0 and ey == 0.0:
@@ -116,16 +89,13 @@ class ActiveScanTeacher:
            self.last_action = (vx, vy)
            return vx, vy, mode

-        # Phase 2b: tracker just blinked empty for <DEBOUNCE frames —
-        # hold the previous action so the dog doesn't lurch.
+        # Phase 2b: brief tracker blink — hold the previous action.
        if n_visible == 0:
            vx, vy = self.last_action
            return vx, vy, "hold"

-        # Phase 3: hand to the underlying analytic teacher, then apply
-        # the shared near-sheep speed modulation (centralised in
-        # herding.control so the BC student, Strömbom, Sequential and
-        # the DAgger teacher all behave identically near sheep).
+        # Phase 3: hand off to the underlying analytic teacher, then
+        # apply the shared near-sheep speed modulation.
        vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
        self.last_action = (vx, vy)
@@ -1,8 +1,8 @@
-"""Shared low-level control helpers used by every dog mode.
+"""Shared action post-processing.

-Centralised here so the BC student, Strömbom, Sequential, and the DAgger
-teacher all apply identical post-processing to their action outputs.
-The downstream wheel-velocity layer (``herding.diffdrive``) is unchanged.
+Every dog mode routes its action through ``modulate_speed_near_sheep``
+so the magnitude is reduced near sheep — direction (intent) is
+preserved.
 """

 from __future__ import annotations
@@ -10,12 +10,8 @@ from __future__ import annotations
 import math


-# Speed-modulation: scale action magnitude down when close to the
-# nearest sheep. Stops the dog from charging in at full speed and
-# scattering the flock. Action norm linearly ramps from MIN_SPEED at
-# distance 0 to 1.0 at SLOW_NEAR_SHEEP.
-SLOW_NEAR_SHEEP = 2.5
-MIN_SPEED = 0.30
+SLOW_NEAR_SHEEP = 2.5  # m — distance below which action norm is scaled down
+MIN_SPEED = 0.30       # action norm at zero distance


 def modulate_speed_near_sheep(
@@ -25,16 +21,9 @@ def modulate_speed_near_sheep(
    slow_dist: float = SLOW_NEAR_SHEEP,
    min_scale: float = MIN_SPEED,
 ) -> tuple[float, float]:
-    """Scale (vx, vy) magnitude down when close to the nearest sheep.
-
-    ``sheep_positions`` accepts either a ``{name: (x, y)}`` dict
-    (matching what the trackers emit) or an iterable of ``(x, y)``
-    tuples. Empty input → action returned unchanged.
-
-    The intent direction is preserved; only magnitude is reduced. With
-    ``slow_dist=2.5`` and ``min_scale=0.3``, an action that started at
-    norm 1 is multiplied by 0.3 right next to a sheep, by 0.65 at 1 m
-    away, and by 1.0 once the nearest sheep is ≥ 2.5 m off.
+    """Linearly ramp action magnitude from ``min_scale`` at distance 0
+    to 1.0 at ``slow_dist``. ``sheep_positions`` may be a
+    ``{name: (x, y)}`` dict or an iterable of ``(x, y)`` tuples.
    """
    if not sheep_positions:
        return vx, vy
@@ -1,25 +1,9 @@
-"""Sequential single-target shepherd dog algorithm.
+"""Sequential "pin-and-push" shepherd-dog controller.

-Strömbom drives the flock's centre of mass; with N sheep and a narrow
-3 m gate, this fails because the flock is wider than the gate and CoM
-driving abandons stragglers. Real sheepdogs solve this differently:
-they pick *one* sheep at a time, drive it through, return for the next.
-
-This module implements that "pin-and-push" approach.
-
-Algorithm (one step):
-1. Active sheep = those still in the field (not yet penned).
-2. Target = the active sheep currently closest to the pen entry.
-3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
-   directly behind the target relative to the goal.
-4. Output unit vector pointing the dog at the drive position.
-
-Once the target crosses the gate it latches as penned and is removed
-from the active set; the next-closest unpenned sheep becomes the
-target. The algorithm naturally "queues" sheep through the gate.
-
-Empirically (with our flocking dynamics) this scales linearly with
-flock size and works up to at least n=10 within a 15 000-step budget.
+Single-target alternative to Strömbom: each step, target the sheep
+closest to the pen, park behind it, drive it through; once it latches
+penned the next-closest sheep becomes the target. Naturally queues
+the flock through a narrow gate.
 """

 import math
@@ -43,25 +27,17 @@ def _is_active(x, y) -> bool:


 def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Return ``(vx, vy, mode)`` where mode encodes the current target.
-
-    Compatible with the Strömbom call signature so it can be drop-in
-    swapped in the dog controller and the env's imitation reward.
-    """
+    """Return ``(vx, vy, mode)`` — same call signature as Strömbom."""
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"

-    # Pick target = sheep closest to pen entry. Stable choice: as one
-    # sheep approaches and crosses the gate it stays the target until
-    # latched; then the next-closest takes over.
    name, sx, sy = min(
        active,
        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
    )

-    # Drive position behind the target along the (target → pen) line.
    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
    tx = sx + DELTA_DRIVE * ux
    ty = sy + DELTA_DRIVE * uy
@@ -71,7 +47,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):


 def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Debug variant returning ``(vx, vy, mode, debug_dict)``."""
+    """``compute_action`` plus a debug dict (target, drive point)."""
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
@@ -1,30 +1,20 @@
-"""Strömbom collect/drive heuristic for the shepherd dog.
+"""Strömbom (2014) collect/drive heuristic for the shepherd dog.

-Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
-updated for the external pen layout. Used as a baseline controller and
-as the fallback when the RL policy isn't available.
+When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
+to a point behind the furthest sheep and pushes it back toward the
+flock CoM. Otherwise it drives, parking behind the CoM relative to
+the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.

-Reference: Strömbom et al. 2014, "Solving the shepherding problem".
+Reference: Strömbom et al. 2014, "Solving the shepherding problem."
 """

 import math

 from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen

-# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
-# the original (4.0 / 2.5) because the new external pen sits ~26 m from
-# typical sheep spawn locations — at the old 4 m standoff, the flee force
-# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
-# the path inside the 3000-step episode budget.
-#
-# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
-# the dog stays in *drive* mode much longer. With our tighter cohesion
-# (flocking_sim.py), partially-collected flocks consolidate naturally
-# during a drive, and we don't waste 80% of the time budget on a slow
-# "collect" pre-phase.
-F_FACTOR = 4.0
-DELTA_COLLECT = 1.5
-DELTA_DRIVE = 2.0
+F_FACTOR = 4.0       # collect/drive threshold scaled by √n
+DELTA_COLLECT = 1.5  # drive-position offset behind the furthest sheep
+DELTA_DRIVE = 2.0    # drive-position offset behind the flock CoM


 def _unit(x, y):
@@ -35,18 +25,12 @@ def _unit(x, y):


 def _is_active(x, y) -> bool:
-    """A sheep is "active" if it's still in the field — not in or below
-    the gate plane (we treat anything south of the gate as committed to
-    the pen and stop trying to herd it)."""
+    """A sheep still in the field counts; one south of the gate doesn't."""
    return (not in_pen(x, y)) and y > GATE_Y


 def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
-
-    ``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
-    Webots controller's representation).
-    """
+    """Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"
@@ -58,14 +42,14 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    radius = max(dists)

    if radius > F_FACTOR * math.sqrt(n):
-        # Collect: aim at a point behind the furthest sheep, opposite the CoM.
+        # Collect: aim behind the furthest sheep, opposite the CoM.
        idx = max(range(n), key=lambda i: dists[i])
        sx, sy = active[idx]
        ux, uy = _unit(sx - com_x, sy - com_y)
        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
        mode = "collect"
    else:
-        # Drive: aim at a point behind the flock CoM relative to the goal.
+        # Drive: aim behind the CoM, opposite the pen.
        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
        mode = "drive"
@@ -75,10 +59,7 @@ def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):


 def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Variant of compute_action that also returns a small debug dict.
-
-    Kept for parity with the legacy controller's CSV logger.
-    """
+    """``compute_action`` plus a small debug dict (CoM, target, radius)."""
    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle", {
@@ -1,26 +1,21 @@
 """Cluster a 2D LiDAR scan into world-frame sheep position estimates.

 Pipeline:
-    ranges (N,) ─► hit mask ─► world-frame points
-                                     │
-                                     ▼
-                              adjacency clustering (gap > GAP_THRESHOLD
-                              starts a new cluster, walking rays in
-                              angular order)
-                                     │
-                                     ▼
-                              centroid + span filter
-                                     │
-                                     ▼
-                              field/pen-corridor filter
-                                     │
-                                     ▼
-                              list of (x, y) detections

-The clusterer is intentionally simple — for ≤10 sheep there is rarely
-any real ambiguity, and proper DBSCAN would only matter if rays from
-two adjacent sheep merged. The downstream tracker handles association
-across frames.
+    ranges (N,) → hit mask → world-frame points
+                                │
+                                ▼
+                         adjacency clustering (gap > GAP_THRESHOLD
+                         starts a new cluster, walking rays in
+                         angular order)
+                                │
+                                ▼
+                         centroid + span + region + structure filters
+                                │
+                                ▼
+                         list of (x, y) detections
+
+The downstream tracker handles association across frames.
 """

 from __future__ import annotations
@@ -35,23 +30,19 @@ from herding.perception.lidar_sim import (
 )


-GAP_THRESHOLD = 0.6      # m — adjacent ray-points farther apart start new cluster
-MAX_CLUSTER_SPAN = 1.5   # m — clusters wider than this are likely walls/structures
+GAP_THRESHOLD = 0.6      # m — adjacent ray-points farther apart start a new cluster
+MAX_CLUSTER_SPAN = 1.5   # m — wider clusters are walls / structures
 RANGE_HIT_EPS = 0.05     # m — hit if range < max_range - eps
 WALL_REJECT = 0.5        # m — drop detections this close to a known wall line

-# Known sheep-sized static features. Detections within STATIC_REJECT
-# of any of these are discarded — these aren't sheep. Mid-pillars on
-# the field walls are NOT in this list because they're embedded in the
-# wall (the wall's span filter handles them); listing them here would
-# only reject real sheep that happened to be near the wall.
+# Sheep-sized static features (gate posts, corner pillars). A cluster
+# centred within STATIC_REJECT of any of these is never a sheep.
 _STATIC_FEATURES = (
-    # Gate posts (sheep-sized boxes flanking the south-wall opening)
-    ( 10.0, -15.0), ( 13.0, -15.0),
-    # Field corner pillars
-    ( 15.0,  15.0), ( 15.0, -15.0), (-15.0,  15.0), (-15.0, -15.0),
+    ( 10.0, -15.0), ( 13.0, -15.0),                   # gate posts
+    ( 15.0,  15.0), ( 15.0, -15.0),
+    (-15.0,  15.0), (-15.0, -15.0),                   # field corners
 )
-STATIC_REJECT = 0.8      # m — detection within this of a static feature → drop
+STATIC_REJECT = 0.8


 def detections_from_scan(
@@ -71,6 +62,8 @@ def detections_from_scan(
    px = dog_x + ranges * np.cos(world_a)
    py = dog_y + ranges * np.sin(world_a)

+    # Walk rays in angular order; a large jump between consecutive
+    # world-frame hit points closes the current cluster.
    clusters: list[list[tuple[float, float]]] = []
    current: list[tuple[float, float]] = []
    prev: tuple[float, float] | None = None
@@ -98,41 +91,30 @@ def detections_from_scan(
        span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
        if span > MAX_CLUSTER_SPAN:
            continue
-        # Surface-to-centre correction: rays hit the front of the sheep,
-        # so the cluster centroid is biased toward the dog by SHEEP_RADIUS.
-        # Push it outward along the dog→cluster direction.
+        # Rays hit the front edge of the sheep; offset outward by
+        # SHEEP_RADIUS along the dog→cluster direction to estimate the
+        # centre.
        dx, dy = cx - dog_x, cy - dog_y
        d = math.hypot(dx, dy)
        if d > 1e-3:
            cx += SHEEP_RADIUS * dx / d
            cy += SHEEP_RADIUS * dy / d
-        # Keep detections inside the field OR in the gate corridor /
-        # external pen — penned sheep are still worth tracking so the
-        # tracker can latch them as "penned" rather than spawn fresh
-        # tracks each scan.
-        # Accept detections inside the field, plus a narrow strip
-        # immediately south of the gate to catch sheep mid-crossing
-        # (so they get marked penned via is_penned_position before the
-        # track goes stale). Detections deeper into the pen are
-        # dropped entirely — Webots's pen posts and rails would
-        # otherwise produce a torrent of phantom penned tracks that
-        # the tracker can't keep up with.
+        # Region filter: in-field clusters, plus a narrow strip south of
+        # the gate so sheep mid-crossing get latched penned. Detections
+        # deeper into the pen are dropped — pen posts and rails would
+        # otherwise generate phantom penned tracks.
        in_main = (FIELD_X[0] - 0.2 < cx < FIELD_X[1] + 0.2 and
                   FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)
        in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
                         GATE_Y - 1.0 < cy < GATE_Y + 0.2)
        if not (in_main or in_gate_strip):
            continue
-        # Known-static-feature filter: gate posts and corner pillars
-        # show up as sheep-sized clusters but are never sheep.
+        # Known sheep-sized static features.
        if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT
               for fx, fy in _STATIC_FEATURES):
            continue
-        # Wall-proximity filter: at oblique scan angles, walls produce
-        # multiple short clusters because adjacent ray returns are
-        # spaced just above GAP_THRESHOLD. Sheep can't get within ~0.3 m
-        # of a wall (the env clips them to FIELD_INSIDE), so anything
-        # right at the wall line is structure noise.
+        # Wall-proximity filter — sheep can't get this close to a wall,
+        # so detections right at the wall line are structure noise.
        near_field_wall = (
            cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or
            cy > FIELD_Y[1] - WALL_REJECT or
@@ -1,16 +1,12 @@
 """Fast 2D LiDAR simulator for the Gymnasium env.

-Raycasts against:
-  * **Sheep** — discs of radius ``SHEEP_RADIUS``.
-  * **Static world geometry** — axis-aligned wall segments and gate
-    posts taken from ``worlds/field.wbt``. Without these, demos
-    collected in-env would never include the false-positive clusters
-    Webots produces from the stone walls and gate-post boxes, and the
-    BC student trained on those demos collapses on deployment.
+Raycasts against sheep (discs) and static world geometry (axis-aligned
+walls + gate posts) so the env reproduces the false-positive cluster
+distribution Webots produces from real 3D geometry.

-Returns a range array matching the Webots Lidar device on the dog
-(see ``protos/ShepherdDog.proto``: 180 rays, 140° FOV centred on
-forward, 12 m max range, 5 mm noise).
+Returns a range array matching the Webots Lidar device:
+180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise.
+See ``protos/ShepherdDog.proto``.
 """

 from __future__ import annotations
@@ -26,19 +22,13 @@ LIDAR_FOV = 2.44       # rad ≈ 140°
 LIDAR_MAX_RANGE = 12.0
 LIDAR_NOISE = 0.005    # m, gaussian std

-# Sheep modelled as a vertical cylinder; this is the horizontal-section
-# radius the LiDAR plane intersects. Tuned to the proto sheep (~0.45 m
-# body length). The exact value is not load-bearing — the perception
-# clusterer is range-tolerant.
+# Sheep cross-section in the LiDAR plane (horizontal cylinder approx).
 SHEEP_RADIUS = 0.30


-# ---------------------------------------------------------------------------
-# Static world geometry — must match worlds/field.wbt
-# ---------------------------------------------------------------------------
+# --- Static world geometry — mirrors worlds/field.wbt ---

-# Vertical walls: (x, y_min, y_max). Field east/west walls and the two
-# pen side walls are visible through the open gate.
+# Vertical walls: (x, y_min, y_max).
 _VERTICAL_WALLS = (
    ( 15.0, -15.0,  15.0),  # field east
    (-15.0, -15.0,  15.0),  # field west
@@ -46,8 +36,7 @@ _VERTICAL_WALLS = (
    ( 13.0, -22.0, -15.0),  # pen east
 )

-# Horizontal walls: (y, x_min, x_max). South wall is split by the 3 m
-# gate at x ∈ [10, 13]; the pen south wall closes the back of the pen.
+# Horizontal walls: (y, x_min, x_max). South wall has a 3 m gap at the gate.
 _HORIZONTAL_WALLS = (
    ( 15.0, -15.0,  15.0),  # field north
    (-15.0, -15.0,  10.0),  # field south-west of gate
@@ -55,31 +44,23 @@ _HORIZONTAL_WALLS = (
    (-22.0,  10.0,  13.0),  # pen south
 )

-# Gate posts and field corner pillars treated as vertical cylinders at
-# LiDAR height. Radius 0.25 m comes from the 0.44 × 0.44 m boxes in the
-# wbt — close enough to a circular cross-section for this purpose.
+# Gate posts + field corner pillars, treated as discs at LiDAR height.
 _POSTS_XY = np.array([
-    ( 10.0, -15.0),  # west gate post
-    ( 13.0, -15.0),  # east gate post
-    ( 15.0,  15.0),  # NE field corner
-    ( 15.0, -15.0),  # SE field corner
-    (-15.0,  15.0),  # NW field corner
-    (-15.0, -15.0),  # SW field corner
+    ( 10.0, -15.0), ( 13.0, -15.0),
+    ( 15.0,  15.0), ( 15.0, -15.0),
+    (-15.0,  15.0), (-15.0, -15.0),
 ], dtype=np.float64)
 POST_RADIUS = 0.25


 def ray_angles(n: int = LIDAR_N_RAYS, fov: float = LIDAR_FOV) -> np.ndarray:
-    """Local-frame ray angles, sweeping from +fov/2 to -fov/2.
+    """Local-frame ray angles, CCW from forward, sweeping +fov/2 → -fov/2.

-    Convention: angle is measured CCW from the dog's forward axis. Ray 0
-    points to the dog's left, last ray to the right. Webots' default
-    Lidar sweep matches this.
+    Matches Webots' default Lidar sweep direction.
    """
    return np.linspace(fov / 2.0, -fov / 2.0, n, dtype=np.float64)


-# Cached so we don't rebuild every step.
 _ANGLES = ray_angles()
 _COS = np.cos(_ANGLES)
 _SIN = np.sin(_ANGLES)
@@ -88,13 +69,7 @@ _SIN = np.sin(_ANGLES)
 def _raycast_static(
    ox: float, oy: float, cos_w: np.ndarray, sin_w: np.ndarray,
 ) -> np.ndarray:
-    """Per-ray distance to nearest wall or post hit (∞ if none).
-
-    Walls are axis-aligned line segments; for each ray we compute t at
-    which it crosses the wall's constant-coord plane and check the
-    other coord lies in the segment. Posts are circles; same disc
-    intersection as for sheep.
-    """
+    """Per-ray distance to the nearest wall or post hit (∞ if none)."""
    n_rays = cos_w.shape[0]
    best = np.full(n_rays, np.inf, dtype=np.float64)

@@ -144,10 +119,7 @@ def simulate_scan(
 ) -> np.ndarray:
    """Return a (N,) float32 range array. No-hit entries equal ``max_range``.

-    ``sheep_xy`` is the list of (x, y) world positions of every sheep in
-    the scene (penned and active). Static world geometry (walls and
-    posts) is also raycast so demos contain the same false-positive
-    clusters Webots produces.
+    ``sheep_xy`` is every sheep (penned or active) in the scene.
    """
    n_rays = _ANGLES.shape[0]

@@ -172,8 +144,7 @@ def simulate_scan(
        nearest = candidate.min(axis=0)
        np.minimum(best, nearest, out=best)

-    # Clip to LIDAR_MAX_RANGE; entries that never got a hit stay at inf
-    # → clipped down to max_range like the real Webots device.
+    # Entries with no hit stay at inf → clipped to max_range, matching Webots.
    ranges = np.minimum(best, max_range).astype(np.float32)
    return _add_noise(ranges, noise, rng, max_range)

@@ -1,31 +1,25 @@
-"""Observation builder for the shepherd dog policy.
+"""Observation builder for the shepherd-dog policy.

-Order-invariant 32-D feature vector — the policy generalises across
-flock sizes 1..MAX_SHEEP because individual sheep coordinates never
-appear in the observation by index, only summary statistics, a polar
-histogram, and two "named" sheep (closest-to-pen and rearmost-from-pen).
-
-The two named sheep matter for the sequential-driving teacher: it
-targets the closest-to-pen sheep specifically, so the policy needs
-that channel to mimic the teacher.
+Order-invariant 32-D feature vector. Sheep never appear by index in
+the observation, only via summary statistics, a polar histogram, and
+two "named" channels (closest-to-pen, rearmost-from-pen) — so the
+policy generalises across flock sizes 1..MAX_SHEEP.

 Layout (all components normalised so values stay roughly in [-1, 1]):

-    idx   field
+    idx    field
    -----  ----------------------------------------------------------
-     0..3  dog pose: x/15, y/15, cos(heading), sin(heading)
+     0..3  dog pose: x/15, y/15, cos(h), sin(h)
     4..5  active-sheep CoM x/15, y/15
-     6..8  flock dispersion: max-radius/15, std_x/15, std_y/15
-     9..11 vector dog→CoM: dx/30, dy/30, dist/30
-    12..14 vector dog→pen-entry: dx/30, dy/30, dist/30
-    15..16 vector furthest-sheep→CoM: dx/15, dy/15
+     6..8  flock dispersion: max_radius/15, std_x/15, std_y/15
+     9..11 dog → CoM: dx/30, dy/30, dist/30
+    12..14 dog → pen entry: dx/30, dy/30, dist/30
+    15..16 furthest sheep → CoM: dx/15, dy/15
    17..18 min sheep-to-wall, min dog-to-wall (both /15)
-       19  active-sheep count / MAX_SHEEP
-    20..27 8-bin polar histogram of active sheep around the dog,
-           rotation-aware (binned in dog-relative frame), normalised
-           so the bins sum to 1.
-    28..29 vector dog→closest-to-pen sheep: dx/15, dy/15
-    30..31 vector dog→rearmost (furthest-from-pen) sheep: dx/15, dy/15
+       19  active sheep count / MAX_SHEEP
+    20..27 8-bin polar histogram of active sheep in the dog's body frame
+    28..29 dog → closest-to-pen sheep: dx/15, dy/15
+    30..31 dog → rearmost (furthest-from-pen) sheep: dx/15, dy/15
 """

 import math
@@ -68,7 +62,6 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
    obs[14] = math.hypot(pdx0, pdy0) / 30.0

    if n == 0:
-        # All sheep penned — terminal observation.
        obs[19] = 0.0
        return obs

@@ -110,7 +103,7 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
    obs[18] = float(min_dog_wall) / 15.0
    obs[19] = n / n_max

-    # 8-bin polar histogram in the dog's body frame.
+    # Polar histogram in the dog's body frame.
    rel_dx = arr[:, 0] - dog_x
    rel_dy = arr[:, 1] - dog_y
    angles = np.arctan2(rel_dy, rel_dx) - dog_heading
@@ -121,11 +114,9 @@ def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
    hist /= max(1, n)
    obs[20:28] = hist

-    # Closest-to-pen sheep (the sequential teacher's target) and rearmost
-    # (furthest-from-pen, the natural "next target" once the closest is
-    # penned). Both expressed as offset from dog. These two channels make
-    # BC tractable — without them the obs doesn't uniquely identify which
-    # sheep the teacher is steering toward.
+    # Closest-to-pen and rearmost (furthest-from-pen) sheep. Without
+    # these named channels the obs cannot uniquely identify which sheep
+    # the teacher is steering toward, and BC fails to mimic it.
    pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
    closest_idx = int(np.argmin(pen_dists))
    rearmost_idx = int(np.argmax(pen_dists))
@@ -1,25 +1,14 @@
 """Multi-target tracker for LiDAR-detected sheep.

-Greedy nearest-neighbour data association (with a distance gate) across
-frames, plus a memory of last-seen positions for tracks that fall out
-of the dog's FOV. Output is a ``{name: (x, y)}`` dict shaped exactly
-like the receiver-based ``sheep_positions`` used previously by the
-Webots controller and by the env, so Strömbom and Sequential can
-consume it unchanged.
+Greedy nearest-neighbour data association across frames, with a wider
+re-acquisition gate for stale tracks (sheep flee during occlusion and
+reappear off-position), plus memory of last-seen positions for sheep
+out of FOV. Output is ``{name: (x, y)}`` — Strömbom / Sequential
+consume it directly.

-Penned-detection heuristic
--------------------------
-Two ways a track is marked penned:
-  1. Its current estimated position is south of the gate plane and
-     within the gate column (the ``is_penned_position`` test the env
-     already uses on ground truth).
-  2. It hasn't been observed for ``STALE_STEPS`` and its last-seen
-     position was inside the gate-approach band — the dog's LiDAR can
-     only see ~2 m into the pen through the open gate, so a sheep
-     that disappeared near the entry has almost certainly entered.
-
-Tracks marked penned are excluded from ``get_positions()`` (which is
-what Strömbom consumes), matching the prior receiver-based behaviour.
+A track is marked penned once its estimated position crosses the gate
+plane south (``is_penned_position``). Penned tracks are excluded from
+``get_positions`` and kept indefinitely.
 """

 from __future__ import annotations
@@ -29,26 +18,22 @@ import math
 from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position


-GATE_M = 2.5              # m — primary NN gate (recent tracks)
-REACQUIRE_GATE_M = 4.5    # m — wider gate for re-acquiring stale tracks (sheep moved during occlusion)
-REACQUIRE_MIN_AGE = 20    # steps — only rebind via the wide gate if the track has been stale for this long
-PENNED_GATE_M = 4.0       # m — wide gate for matching against already-penned tracks; the pen is small (3×7 m) so duplicates are easy without it
-FORGET_STEPS = 200        # ~3.2 s — delete stale active tracks; tighter than 5 s to limit phantoms but long enough to bridge typical FOV gaps
-MAX_ACTIVE_TRACKS = MAX_SHEEP  # hard cap to the worst-case real flock size
-# Penned tracks are never forgotten: sheep don't leave the pen, and
-# losing the track makes the counter oscillate as the same sheep gets
-# re-detected and counted multiple times.
+GATE_M = 2.5              # m — primary NN gate (recently observed tracks)
+REACQUIRE_GATE_M = 4.5    # m — wider gate for re-binding stale tracks
+REACQUIRE_MIN_AGE = 20    # steps — track must be this stale to use the wider gate
+PENNED_GATE_M = 4.0       # m — gate for matching detections to existing penned tracks
+FORGET_STEPS = 200        # ~3.2 s — delete stale active tracks (penned ones kept forever)
+MAX_ACTIVE_TRACKS = MAX_SHEEP


 class SheepTracker:
-    """Online tracker with NN association and a forgetful memory.
+    """Online tracker with NN association and forgetful memory.

    Each track stores ``(x, y, last_seen_step, penned)``.
    """

    def __init__(self, gate: float = GATE_M):
        self.gate = gate
-        # tid → (x, y, last_seen_step, penned)
        self._tracks: dict[int, tuple[float, float, int, bool]] = {}
        self._next_id = 0
        self.step = 0
@@ -58,9 +43,6 @@ class SheepTracker:
        self._next_id = 0
        self.step = 0

-    # ------------------------------------------------------------------
-    # Update
-    # ------------------------------------------------------------------
    def update(self, detections: list[tuple[float, float]]) -> dict[str, tuple[float, float]]:
        """Fold a new set of detections in and return active positions."""
        self.step += 1
@@ -68,9 +50,9 @@ class SheepTracker:
        det_used: set[int] = set()
        updated_tids: set[int] = set()

-        # Pass 1: match against ACTIVE tracks first (oldest-seen-first so
-        # a re-emerging long-lost sheep grabs its old ID before a fresh
-        # neighbour does).
+        # Pass 1 — match active tracks within the primary gate. Oldest-
+        # seen tracks bind first so a re-emerging long-lost sheep keeps
+        # its old ID instead of being grabbed by a fresh neighbour.
        active_tids = [tid for tid, t in self._tracks.items() if not t[3]]
        active_tids.sort(key=lambda tid: self._tracks[tid][2])
        for tid in active_tids:
@@ -89,12 +71,10 @@ class SheepTracker:
                det_used.add(best_j)
                updated_tids.add(tid)

-        # Pass 1b: re-acquisition with a wider gate for tracks that have
-        # been stale for ≥ REACQUIRE_MIN_AGE steps. Sheep flee at
-        # ~0.6 m/s; over a 1–2 s occlusion (dog rotating or driving)
-        # they move enough that a fresh detection lies outside the
-        # primary GATE_M but is still clearly the same sheep. Without
-        # this, phantom tracks accumulate and corrupt the CoM.
+        # Pass 1b — re-acquisition. Sheep flee at ~0.6 m/s, so over a
+        # 1–2 s occlusion the same sheep may reappear outside the primary
+        # gate. Allow rebinding within a wider gate for stale-enough
+        # tracks; otherwise phantom tracks accumulate and corrupt CoM.
        for tid in active_tids:
            if tid in updated_tids:
                continue
@@ -115,10 +95,7 @@ class SheepTracker:
                det_used.add(best_j)
                updated_tids.add(tid)

-        # Pass 2: match remaining detections against PENNED tracks with
-        # a tighter gate. Without this, every frame near the gate spawns
-        # a fresh penned track for the same sheep, which under a long
-        # Webots run leads to thousands of phantom penned tracks.
+        # Pass 2 — match remaining detections to penned tracks.
        penned_tids = [tid for tid, t in self._tracks.items() if t[3]]
        for tid in penned_tids:
            tx, ty, _, _ = self._tracks[tid]
@@ -135,9 +112,8 @@ class SheepTracker:
                self._tracks[tid] = (dx, dy, self.step, True)
                det_used.add(best_j)

-        # Unmatched detections → new tracks. A detection that is already
-        # inside the pen is born "penned" so we don't accumulate active
-        # tracks for sheep that arrived in the pen during occlusion.
+        # Spawn new tracks for unmatched detections. Born "penned" if
+        # the detection already sits inside the pen geometry.
        for j, (dx, dy) in enumerate(detections):
            if j in det_used:
                continue
@@ -145,44 +121,32 @@ class SheepTracker:
            self._tracks[self._next_id] = (dx, dy, self.step, penned)
            self._next_id += 1

-        # Promote active tracks to penned ONLY by geometric position
-        # (sheep is in the pen column south of the gate). The previous
-        # "stale + near gate" heuristic was firing on ordinary occlusion
-        # near the gate and creating phantom penned tracks.
+        # Promote active tracks whose current estimate crosses the gate.
        for tid, (tx, ty, last, penned) in list(self._tracks.items()):
            if penned:
                continue
            if is_penned_position(tx, ty):
                self._tracks[tid] = (tx, ty, last, True)

-        # Forget stale ACTIVE tracks after FORGET_STEPS. Penned tracks
-        # are kept indefinitely — sheep can't escape the pen, so once a
-        # track is marked penned, that sheep is permanently penned.
+        # Forget stale active tracks; penned tracks live forever.
        for tid, (tx, ty, last, penned) in list(self._tracks.items()):
            if penned:
                continue
            if (self.step - last) > FORGET_STEPS:
                del self._tracks[tid]

-        # Hard cap on the active set. If we somehow have more than
-        # MAX_ACTIVE_TRACKS active tracks, drop the oldest-seen ones
-        # first — they are most likely false positives from world
-        # geometry (walls, gate posts) the env's raycaster doesn't
-        # model, and a bloated active set wrecks the downstream CoM.
+        # Hard cap on the active set — drop the oldest-seen overflow.
        active = [(tid, last) for tid, (_, _, last, p) in self._tracks.items()
                  if not p]
        if len(active) > MAX_ACTIVE_TRACKS:
-            active.sort(key=lambda kv: kv[1])  # oldest-seen first
+            active.sort(key=lambda kv: kv[1])
            for tid, _ in active[: len(active) - MAX_ACTIVE_TRACKS]:
                del self._tracks[tid]

        return self.get_positions()

-    # ------------------------------------------------------------------
-    # Outputs
-    # ------------------------------------------------------------------
    def get_positions(self) -> dict[str, tuple[float, float]]:
-        """Active (not-yet-penned) tracks. Same shape as receiver dict."""
+        """Active (not-penned) tracks as a ``{name: (x, y)}`` dict."""
        return {f"t{tid}": (x, y)
                for tid, (x, y, _, penned) in self._tracks.items()
                if not penned}
@@ -1,11 +1,8 @@
-"""Differential-drive kinematics matching the Webots robot specs.
+"""Differential-drive kinematics, shared by the env and Webots controllers.

-The Webots controllers and the training env both use these helpers so the
-sim and the real (Webots) physics agree to first order. They do not model
-slip, wheel acceleration limits, or contact forces — Webots does that for
-us at inference time. The training env has to be close enough that a
-policy trained against this kinematic model still works when handed off
-to ODE physics.
+First-order rigid-body model — no slip, wheel-accel limits, or contact
+forces. Webots' ODE physics handles those at inference; the env stays
+close enough to first order that a policy trained here transfers.
 """

 import math
@@ -34,10 +31,9 @@ def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):

 def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
                       k_turn=4.0):
-    """Convert a desired (vx, vy) intent in [-1, 1]^2 to wheel speeds.
+    """Convert a desired (vx, vy) intent in [-1, 1]² to wheel speeds.

-    Mirrors ``drive_action`` in controllers/shepherd_dog/shepherd_dog.py:
-    forward speed scales by ``cos(err)`` (clamped to ±90°), and a P
+    Forward speed scales by ``cos(err)`` (clamped to ±90°); a P
    controller on heading error contributes the wheel-rate differential.
    """
    speed_ms = math.hypot(vx, vy) * max_linear
@@ -56,12 +52,7 @@ def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,

 def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
                            k_turn=4.0):
-    """Sheep variant: speed already expressed in motor (wheel rad/s) units.
-
-    Matches the existing sheep controller (``controllers/sheep/sheep.py``)
-    where ``speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))`` and
-    these constants are wheel angular velocities, not linear m/s.
-    """
+    """Sheep variant: speed in wheel rad/s, target as a heading angle."""
    err = math.atan2(math.sin(heading - h), math.cos(heading - h))
    fwd = max(0.0, math.cos(err)) * speed_motor
    turn = k_turn * err
@@ -1,24 +1,19 @@
-"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987 hybrid.
+"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987.

-This is the per-sheep behavioural step used both by the Webots sheep
-controller (scalar, one sheep at a time) and by the training environment
-(loop over sheep).
-
-Model
-----
-The force stack each step (summed → heading + speed):
+Per-sheep behavioural step used by both the Webots sheep controller
+and the training environment. Each step a force stack is summed:

    flee       — quadratic ramp away from dog within FLEE_DIST
-                 (Strömbom 2014 §2.1, term ρa)
+                 (Strömbom 2014, term ρa)
    cohesion   — drift toward local centre of mass of peers within
-                 COHESION_DIST (Strömbom 2014 §2.1, term c).
-                 Weight is **higher when fleeing** — modelling the
-                 "safety in numbers" / predator-confusion effect
-                 Strömbom 2014 describes as fear-induced cohesion.
+                 COHESION_DIST (Strömbom 2014, term c). Weight is
+                 higher while fleeing — fear-induced cohesion.
    separation — short-range inverse-distance repulsion from peers
-                 (Strömbom 2014 §2.1, term α; Reynolds 1987)
-    wander     — small persistent drift for natural idle motion
-                 (Strömbom 2014 §2.1, noise term ε)
+                 (Strömbom 2014 term α; Reynolds 1987)
+    wander     — small persistent drift (Strömbom 2014 noise term ε)
+
+Walls, the south-wall gate column, and in-pen containment are
+environment-specific additions for the fenced Webots field.

 References
 ----------
@@ -26,26 +21,6 @@ References
  for herding autonomous, interacting agents." J R Soc Interface 11.
 - Reynolds (1987). "Flocks, herds and schools: A distributed
  behavioural model." SIGGRAPH '87.
-
-Environment-specific adaptations
--------------------------------
-The original Strömbom model assumes an open field. Our scenario adds:
-
-* Field walls — soft repulsion within ``WALL_MARGIN`` plus a hard
-  escape band when inside ``WALL_HARD_MARGIN``. Necessary because the
-  Webots field is fenced (30 m square enclosure).
-* Gate column — the south wall has a 3 m gap at x ∈ [10, 13]; sheep
-  pass through it freely (no wall force inside the column).
-* Penned containment — once a sheep crosses the gate plane south
-  (``geometry.is_penned_position``), the caller flags ``penned=True``
-  and we switch to in-pen wall-bounce + jitter. Sheep do not exit the
-  pen on their own. This is a hard sim constraint, not a behavioural
-  claim about real sheep.
-
-Parameter tuning (cohesion weight 3× while fleeing) was chosen so the
-flock survives passage through the 3 m gate without fragmenting — this
-is a defensible engineering adaptation of Strömbom's qualitative
-"fear-induced cohesion" to our gate width.
 """

 import math
@@ -57,9 +32,7 @@ from herding.world.geometry import (
    GATE_X,
 )

-# --- Speed and force constants ---
-# All speeds here are in wheel rad/s (motor units), matching the existing
-# sheep controller. Conversion to m/s = speed * SHEEP_WHEEL_RADIUS.
+# Speeds are in wheel rad/s (motor units); m/s = speed * SHEEP_WHEEL_RADIUS.
 MAX_SPEED = 22.0
 FLEE_SPEED = 20.0
 WANDER_SPEED = 3.0
@@ -70,7 +43,7 @@ WALL_HARD_GAIN = 50.0

 FLEE_DIST = 7.0
 SEPARATION_DIST = 2.5
-COHESION_DIST = 12.0    # was 8.0 — wider engagement so far-flung sheep are pulled in
+COHESION_DIST = 12.0

 PEN_MARGIN = 0.8

@@ -85,21 +58,17 @@ def _peers_iter(peers):
 def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
    """Return ``(heading, speed, new_wander_angle)`` for one sheep step.

-    ``speed`` is in wheel rad/s (motor units), bounded by ``[WANDER_SPEED,
-    FLEE_SPEED]``. ``heading`` is the world-frame target heading the sheep
-    should aim for (atan2 convention).
-
-    ``rng`` is an optional ``random.Random``-compatible object used for
-    the wander-jitter. If ``None``, falls back to Python's global module
-    (matches Webots controller usage). Pass an env-owned RNG to make
-    rollouts deterministic given a seed.
+    ``speed`` is in wheel rad/s, bounded by ``[WANDER_SPEED, FLEE_SPEED]``.
+    ``heading`` is the world-frame target heading (atan2 convention).
+    ``rng`` is an optional ``random.Random`` used for wander jitter; if
+    ``None`` uses the module's global ``random``.
    """
    fx, fy = 0.0, 0.0
    peer_list = _peers_iter(peers)
    rnd = rng if rng is not None else random

    if penned:
-        # --- Pen containment: bounce off the four pen walls ---
+        # Pen containment: bounce off all four pen walls.
        pm = PEN_MARGIN
        if x < PEN_X[0] + pm:
            fx += ((PEN_X[0] + pm - x) / pm) * 15.0
@@ -110,7 +79,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
        if y > PEN_Y[1] - pm:
            fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0

-        # Mild peer separation — penned sheep crowd the corner otherwise.
+        # Mild peer separation so penned sheep don't crowd one corner.
        for px, py in peer_list:
            dx, dy = px - x, py - y
            d = math.hypot(dx, dy)
@@ -125,7 +94,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
        fy += math.sin(wander_angle) * 0.5

    else:
-        # --- Free-roaming sheep in the field ---
+        # Free-roaming sheep in the field.
        fleeing = False
        if dog_xy is not None:
            ddx = dog_xy[0] - x
@@ -138,11 +107,9 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
                fx -= (ddx / dist) * s
                fy -= (ddy / dist) * s

-        # Cohesion — drift toward flock CoM (peers within COHESION_DIST).
-        # Cohesion is *stronger* under flee than at rest (the
-        # predator-confusion / safety-in-numbers effect — sheep huddle when
-        # threatened). This is what makes shepherding work: the flock stays
-        # as one unit through the narrow gate instead of fragmenting.
+        # Cohesion: drift toward the local CoM of peers within
+        # COHESION_DIST. Stronger while fleeing — fear-induced
+        # cohesion keeps the flock together through the gate.
        cx, cy, cn = 0.0, 0.0, 0
        for px, py in peer_list:
            d = math.hypot(px - x, py - y)
@@ -151,12 +118,6 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
                cy += py
                cn += 1
        if cn > 0:
-            # Cohesion needs to dominate flee at close range so the flock
-            # stays glued together when squeezing through the narrow gate.
-            # Flee at 2 m has magnitude ~10; cohesion of w=3.0 with the
-            # peer-CoM 4 m away contributes ~12, so the flock prefers
-            # bunching to dispersing under pressure. This is what makes
-            # canonical Strömbom drive work in our 3 m gate.
            w = 3.0 if fleeing else 1.0
            fx += (cx / cn - x) * w
            fy += (cy / cn - y) * w
@@ -170,8 +131,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
                fx -= (ddx / d) * push * 2.5
                fy -= (ddy / d) * push * 2.5

-        # Wall soft repulsion. The south wall is absent inside the gate
-        # column so sheep can be driven through it by the dog.
+        # Wall soft repulsion (south wall absent inside the gate column).
        if x < FIELD_X[0] + WALL_MARGIN:
            fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
        if x > FIELD_X[1] - WALL_MARGIN:
@@ -187,7 +147,7 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
            fx += math.cos(wander_angle) * 0.5
            fy += math.sin(wander_angle) * 0.5

-    # --- Hard escape band — overrides everything when very close to a wall ---
+    # Hard escape band — overrides everything else near a wall.
    m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
    if x - FIELD_X[0] < m:
        fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
@@ -195,7 +155,6 @@ def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
        fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
    if FIELD_Y[1] - y < m:
        fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
-    # South wall hard escape only when not in the gate column and not penned.
    if (not penned) and (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
        fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))

@@ -1,23 +1,15 @@
 """World geometry and robot specs.

-All coordinates are in meters. (0, 0) is the centre of the field, +x is
-east, +y is north. Z is up but unused here. These constants must match
-``worlds/field.wbt`` and the proto files; if the world changes, change
-this file and only this file.
-
-Pen layout (post-refactor)
--------------------------
-The pen is *external* to the field, accessed through a 3 m gate cut into
-the south stone wall at y = -15. Sheep entering through the gate end up
-in a fenced rectangle south of the field; the dog stays in the field
-(soft-limited above DOG_SOUTH_LIMIT during training and inference).
+Coordinates are metres; (0, 0) is the field centre, +x east, +y north.
+These constants mirror ``worlds/field.wbt`` and the proto files — if
+the world changes, this file is the single point of update.

    field        +y north
    +-----------+
    |           |
    |           |
    |  ......   |
-    +---||||----+   y = -15  (south wall, gate at x ∈ [10, 13])
+    +---||||----+   y = -15  (south wall, 3 m gate at x ∈ [10, 13])
        ||||
        |pen|       y ∈ [-22, -15]
        +---+
@@ -25,46 +17,38 @@ in a fenced rectangle south of the field; the dog stays in the field

 import math

-# --- Field (square, stone-walled) ---
+# Field (square, stone-walled)
 FIELD_X = (-15.0, 15.0)
 FIELD_Y = (-15.0, 15.0)
-
-# Conservative inside bounds — sheep/dog should not graze the wall.
 FIELD_INSIDE_MARGIN = 0.5

-# --- Pen (external, south of the field) ---
+# Pen (external, south of the field)
 PEN_X = (10.0, 13.0)
 PEN_Y = (-22.0, -15.0)
 PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
-# The point the dog drives the flock toward: the gate centre on the field side.
 PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)

-# --- Gate (the hole in the south stone wall) ---
+# Gate (hole in the south wall)
 GATE_X = PEN_X
 GATE_Y = -15.0

-# --- Robot specs (must match proto files) ---
-# Dog (controllers/shepherd_dog/, protos/ShepherdDog.proto)
+# Dog spec — protos/ShepherdDog.proto
 DOG_WHEEL_RADIUS = 0.038         # m
 DOG_WHEEL_BASE = 0.28            # m, axle-to-axle
 DOG_MAX_WHEEL_OMEGA = 70.0       # rad/s
-DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA  # ~2.66 m/s
+DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA  # ≈ 2.66 m/s

-# Sheep (controllers/sheep/, protos/Sheep.proto)
+# Sheep spec — protos/Sheep.proto
 SHEEP_WHEEL_RADIUS = 0.031       # m
 SHEEP_WHEEL_BASE = 0.20          # m
 SHEEP_MAX_WHEEL_OMEGA = 25.0     # rad/s
-SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA  # ~0.78 m/s
+SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA  # ≈ 0.78 m/s

-# --- Webots step ---
-WEBOTS_DT = 0.016  # seconds, matches WorldInfo.basicTimeStep = 16 in field.wbt
+WEBOTS_DT = 0.016                # seconds (matches WorldInfo.basicTimeStep)

-# --- Dog "virtual south wall" (training keeps dog out of the pen) ---
-# At inference the controller also clips to this so a slightly miscalibrated
-# policy doesn't accidentally drive into the pen and trap the sheep.
+# Virtual south wall — env and controller both keep the dog north of this.
 DOG_SOUTH_LIMIT = -14.5

-# --- Maximum supported flock size ---
 MAX_SHEEP = 10


@@ -85,12 +69,7 @@ def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:


 def is_penned_position(x: float, y: float, latch_margin: float = 0.2) -> bool:
-    """A sheep latches to "penned" once it crosses the gate plane south.
-
-    True iff x is inside the gate column (with a small margin) AND
-    y has dipped below the gate line. Once latched, the sheep is held by
-    in-pen forces and will not exit on its own.
-    """
+    """True iff (x, y) is in the gate column and south of the gate line."""
    return (PEN_X[0] - latch_margin <= x <= PEN_X[1] + latch_margin
            and y <= GATE_Y)