Checkpoint 2

2026-05-07 22:00:10 +01:00
parent 90aa3bbcb4
commit 1bb9415414
37 changed files with 3068 additions and 2912 deletions
@@ -1,233 +1,148 @@
-"""
-Sheep flocking controller (Webots, Reynolds boids variant).
+"""Sheep flocking controller (Webots).

 Each sheep broadcasts its GPS position every 3 steps on channel 1 and
-listens for the dog and peer sheep positions.  Peers are keyed by robot
-name so each neighbour has exactly one current entry in the dict.
+listens for the dog and peer sheep positions. The behavioural step is
+delegated to ``herding.flocking_sim.compute_heading_speed`` so the
+training environment and Webots run identical sheep dynamics.

-Force stack each step (summed then converted to a heading + speed):
-    flee       — away from dog, quadratic ramp, dominant when close
-    cohesion   — toward flock centre, halved while fleeing
-    separation — inverse-distance push, prevents physical overlap
-    walls      — linear repulsion from field boundary
-    wander     — small persistent drift for natural idle motion
-
-Pen behaviour: on first entry into the quarantine pen the sheep latches
-permanently — it turns pink (via the exposed woolColor PROTO field) and
-the normal force stack is replaced by pen-confinement forces only.
+Pen behaviour: a sheep latches to ``penned`` the first time it crosses
+the south-wall gate plane into the gate corridor. Once latched it turns
+pink (via the exposed ``woolColor`` PROTO field) and the force stack
+switches to in-pen containment.
 """

-import random
 import math
+import os
+import random
+import sys
+
+# --- Make the shared herding/ package importable from this controller dir ---
+_HERE = os.path.dirname(os.path.abspath(__file__))
+_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+
 from controller import Supervisor

-# ---------------------------------------------------------------------------
-# Tuning constants
-# ---------------------------------------------------------------------------
+from herding.diffdrive import heading_speed_to_wheels
+from herding.flocking_sim import MAX_SPEED, compute_heading_speed
+from herding.geometry import (
+    SHEEP_MAX_WHEEL_OMEGA,
+    is_penned_position,
+)

-MAX_SPEED    = 22.0   # rad/s hard clamp on both motors
-FLEE_SPEED   = 20.0   # rad/s upper bound while panicking
-WANDER_SPEED =  3.0   # rad/s lower bound during calm wandering
-
-X_MIN, X_MAX = -14.5, 14.5   # stone wall inner edges (metres)
-Y_MIN, Y_MAX = -14.5, 14.5
-WALL_MARGIN  =  3.5           # avoidance starts this far from the wall
-
-FLEE_DIST       = 7.0   # dog within this radius triggers flee (metres)
-SEPARATION_DIST = 2.5   # inverse-distance push active inside this radius
-COHESION_DIST   = 8.0   # pull toward flock centre active inside this radius
-
-PEN_X_MIN, PEN_X_MAX = 10.0, 13.0   # quarantine pen extents (metres)
-PEN_Y_MIN, PEN_Y_MAX = -15.0, -8.0  # open entrance at y=-8, gate at y=-15
-PEN_MARGIN = 0.8                     # confinement force starts this far from pen wall

 # ---------------------------------------------------------------------------
 # Device setup
 # ---------------------------------------------------------------------------

-robot    = Supervisor()
+robot = Supervisor()
 timestep = int(robot.getBasicTimeStep())
-name     = robot.getName()
+name = robot.getName()
 self_node = robot.getSelf()

-left_motor  = robot.getDevice("left wheel motor")
+left_motor = robot.getDevice("left wheel motor")
 right_motor = robot.getDevice("right wheel motor")
 left_motor.setPosition(float("inf"))
 right_motor.setPosition(float("inf"))
 left_motor.setVelocity(0.0)
 right_motor.setVelocity(0.0)
+MOTOR_MAX = min(left_motor.getMaxVelocity(), SHEEP_MAX_WHEEL_OMEGA)

-gps      = robot.getDevice("gps");      gps.enable(timestep)
-compass  = robot.getDevice("compass");  compass.enable(timestep)
+gps = robot.getDevice("gps");           gps.enable(timestep)
+compass = robot.getDevice("compass");   compass.enable(timestep)
 receiver = robot.getDevice("receiver"); receiver.enable(timestep)
-emitter  = robot.getDevice("emitter")
+emitter = robot.getDevice("emitter")
+

 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------

-def norm_angle(a):
-    return math.atan2(math.sin(a), math.cos(a))
-
-
 def bearing():
    # Compass returns north direction in sensor frame; for this Z-up world
    # with north = +Y, atan2(n[0], n[1]) gives the standard math angle
-    # (0 = east, π/2 = north) matching atan2(fy, fx) used for heading.
+    # (0 = east, π/2 = north) matching atan2(fy, fx) used for headings.
    n = compass.getValues()
    return math.atan2(n[0], n[1])


-def drive(heading, speed):
-    err = norm_angle(heading - bearing())
-    # Scale forward component by cos(err): at 90° error fwd→0 so the robot
-    # spins in place to realign rather than driving sideways at full speed.
-    fwd = speed * max(0.0, math.cos(err))
-    k = 4.0
-    left_motor.setVelocity( max(-MAX_SPEED, min(MAX_SPEED, fwd - k * err)))
-    right_motor.setVelocity(max(-MAX_SPEED, min(MAX_SPEED, fwd + k * err)))
+def drive(heading, speed_motor):
+    left_w, right_w = heading_speed_to_wheels(
+        heading, min(speed_motor, MAX_SPEED), bearing(), MOTOR_MAX, k_turn=4.0
+    )
+    left_motor.setVelocity(left_w)
+    right_motor.setVelocity(right_w)


 def paint_pink():
    # woolColor is declared as a PROTO field with IS binding to the DEF WOOL
-    # PBRAppearance baseColor.  Changing it here propagates to every USE WOOL
-    # shape on the body.  Direct field access avoids PROTO-internal opacity.
+    # PBRAppearance baseColor; setting it propagates to every USE WOOL shape.
    self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])

+
 # ---------------------------------------------------------------------------
 # State
 # ---------------------------------------------------------------------------

 wander_angle = random.uniform(-math.pi, math.pi)
-step   = 0
-dog_x  = None
-dog_y  = None
-peers  = {}   # name → (x, y), one entry per neighbour, cleared every 30 steps
+step_count = 0
+dog_x, dog_y = None, None
+peers = {}        # name → (x, y), one entry per neighbour, cleared every 30 steps
 penned = False

+# Stuck detection: differential-drive sheep can pin against a wall and need
+# a forced reverse-and-rotate to escape. If displacement < STUCK_DIST for
+# STUCK_STEPS consecutive steps, drive toward field centre.
+_prev_x, _prev_y = None, None
+_stuck_count = 0
+STUCK_STEPS = 20
+STUCK_DIST = 0.05
+
+
 # ---------------------------------------------------------------------------
 # Main loop
 # ---------------------------------------------------------------------------

 while robot.step(timestep) != -1:
-    step += 1
+    step_count += 1
    pos = gps.getValues()
    x, y = pos[0], pos[1]

-    # Pen entry: one-way latch, never unset
-    if not penned and PEN_X_MIN < x < PEN_X_MAX and PEN_Y_MIN < y < PEN_Y_MAX:
+    # Pen entry: one-way latch. Penned sheep get pink wool and switch behaviour.
+    if not penned and is_penned_position(x, y):
        penned = True
        paint_pink()

-    # Refresh peer table (clear before receiving so fresh data is never lost)
-    if step % 30 == 0:
+    # Refresh peer table — clear before receiving so fresh data is never lost.
+    if step_count % 30 == 0:
        peers.clear()
    while receiver.getQueueLength() > 0:
        msg = receiver.getString()
        receiver.nextPacket()
-        p = msg.split(":")
-        if p[0] == "dog" and len(p) >= 3:
-            dog_x, dog_y = float(p[1]), float(p[2])
-        elif p[0] == "sheep" and len(p) >= 4 and p[1] != name:
-            peers[p[1]] = (float(p[2]), float(p[3]))
+        parts = msg.split(":")
+        if parts[0] == "dog" and len(parts) >= 3:
+            dog_x, dog_y = float(parts[1]), float(parts[2])
+        elif parts[0] == "sheep" and len(parts) >= 4 and parts[1] != name:
+            peers[parts[1]] = (float(parts[2]), float(parts[3]))

-    fx, fy = 0.0, 0.0
+    dog_xy = (dog_x, dog_y) if dog_x is not None and dog_y is not None else None
+    heading, speed, wander_angle = compute_heading_speed(
+        x=x, y=y, penned=penned, dog_xy=dog_xy, peers=peers,
+        wander_angle=wander_angle,
+    )

-    # Repel unpenned sheep from the exterior of the pen's side walls so they
-    # don't get pinned by flee forces. Only fires when strictly outside the pen
-    # (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range).
-    # Entrance is open on the north (y > PEN_Y_MAX) — no force there.
-    PEN_EXT_MARGIN = 0.8
-    if not penned and PEN_Y_MIN < y < PEN_Y_MAX:
-        if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN:
-            fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0
-        if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN:
-            fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0
+    # Stuck detection — safety net for differential-drive wall pinning.
+    if _prev_x is not None:
+        moved = math.hypot(x - _prev_x, y - _prev_y)
+        _stuck_count = _stuck_count + 1 if moved < STUCK_DIST else 0
+    if _stuck_count >= STUCK_STEPS:
+        heading = math.atan2(-y, -x)   # always points away from the boundary
+        speed = MAX_SPEED
+        _stuck_count = 0
+    _prev_x, _prev_y = x, y

-    if penned:
-        # Inside pen: wander freely, strong boundary forces prevent exit,
-        # separation still active to avoid collisions with other penned sheep.
-
-        pm = PEN_MARGIN
-        if x < PEN_X_MIN + pm: fx += ((PEN_X_MIN + pm - x) / pm) * 15.0
-        if x > PEN_X_MAX - pm: fx -= ((x - (PEN_X_MAX - pm)) / pm) * 15.0
-        if y < PEN_Y_MIN + pm: fy += ((PEN_Y_MIN + pm - y) / pm) * 15.0
-        if y > PEN_Y_MAX - pm: fy -= ((y - (PEN_Y_MAX - pm)) / pm) * 15.0
-
-        for px, py in peers.values():
-            dx, dy = px - x, py - y
-            d = math.hypot(dx, dy)
-            if 0.05 < d < SEPARATION_DIST:
-                push = (SEPARATION_DIST - d) / d
-                fx -= (dx / d) * push * 2.5
-                fy -= (dy / d) * push * 2.5
-
-        if random.random() < 0.02:
-            wander_angle += random.uniform(-0.6, 0.6)
-        fx += math.cos(wander_angle) * 0.5
-        fy += math.sin(wander_angle) * 0.5
-
-    else:
-        fleeing = False
-
-        # Flee — quadratic ramp so force grows rapidly as the dog closes in
-        if dog_x is not None:
-            dx   = dog_x - x
-            dy   = dog_y - y
-            dist = math.hypot(dx, dy)
-            if 0.01 < dist < FLEE_DIST:
-                fleeing = True
-                t = 1.0 - dist / FLEE_DIST
-                s = t * t * 20.0
-                fx -= (dx / dist) * s
-                fy -= (dy / dist) * s
-
-        # Cohesion — halved while fleeing to reduce mid-panic collisions
-        cx, cy, cn = 0.0, 0.0, 0
-        for px, py in peers.values():
-            d = math.hypot(px - x, py - y)
-            if 0.3 < d < COHESION_DIST:
-                cx += px; cy += py; cn += 1
-        if cn > 0:
-            w = 0.08 if fleeing else 0.15
-            fx += (cx / cn - x) * w
-            fy += (cy / cn - y) * w
-
-        # Separation — inverse-distance: huge when nearly overlapping, fades quickly
-        for px, py in peers.values():
-            dx, dy = px - x, py - y
-            d = math.hypot(dx, dy)
-            if 0.05 < d < SEPARATION_DIST:
-                push = (SEPARATION_DIST - d) / d
-                fx -= (dx / d) * push * 2.5
-                fy -= (dy / d) * push * 2.5
-
-        # Walls
-        if x < X_MIN + WALL_MARGIN: fx += ((X_MIN + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
-        if x > X_MAX - WALL_MARGIN: fx -= ((x - (X_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
-        if y < Y_MIN + WALL_MARGIN: fy += ((Y_MIN + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
-        if y > Y_MAX - WALL_MARGIN: fy -= ((y - (Y_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
-
-        # Wander — suppressed while fleeing so drift cannot deflect the flee heading
-        if not fleeing:
-            if random.random() < 0.02:
-                wander_angle += random.uniform(-0.6, 0.6)
-            fx += math.cos(wander_angle) * 0.5
-            fy += math.sin(wander_angle) * 0.5
-
-    # Hard-stop clamp: within 0.5 m of a wall, zero any force component that
-    # would push further into it.  Prevents the flee force from pinning a sheep
-    # against the boundary when the dog approaches from outside.
-    HS = 0.5
-    if x < X_MIN + HS and fx < 0: fx = 0.0
-    if x > X_MAX - HS and fx > 0: fx = 0.0
-    if y < Y_MIN + HS and fy < 0: fy = 0.0
-    if y > Y_MAX - HS and fy > 0: fy = 0.0
-
-    heading = math.atan2(fy, fx)
-    mag     = math.hypot(fx, fy)
-    speed   = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
    drive(heading, speed)

-    if step % 3 == 0:
+    if step_count % 3 == 0:
        emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
@@ -0,0 +1,78 @@
+"""Lazy loader for the SB3 PPO policy used by the dog controller.
+
+Importing stable-baselines3 inside the Webots Python interpreter is only
+needed when ``HERDING_MODE=rl``; the Strömbom mode runs without it. This
+loader keeps SB3 out of the import path until you actually ask for the RL
+policy, so users without SB3 installed can still run the Strömbom
+baseline.
+
+The policy + VecNormalize statistics are saved together by
+``training/train_ppo.py``:
+
+    runs/<name>/best/best_model.zip     # SB3 PPO checkpoint
+    runs/<name>/best/vecnormalize.pkl   # observation-normaliser stats
+
+Pass either the directory or the explicit zip path.
+"""
+
+import os
+from pathlib import Path
+
+
+class PolicyHandle:
+    """Wrap a loaded PPO policy + VecNormalize so the controller can call
+    ``predict(obs)`` without thinking about either."""
+
+    def __init__(self, model, vecnorm):
+        self.model = model
+        self.vecnorm = vecnorm
+
+    def predict(self, obs):
+        # VecNormalize expects a batched obs of shape (n_envs, obs_dim).
+        if self.vecnorm is not None:
+            import numpy as np
+            obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
+            obs_b = self.vecnorm.normalize_obs(obs_b)
+        else:
+            import numpy as np
+            obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
+        action, _ = self.model.predict(obs_b, deterministic=True)
+        return action[0]
+
+
+def load(model_path: str, vecnorm_path: str | None = None) -> PolicyHandle:
+    """Load a PPO model (and optional VecNormalize) from disk.
+
+    ``model_path`` may be the .zip checkpoint or a directory containing
+    ``best_model.zip`` (and optionally ``vecnormalize.pkl``).
+    """
+    p = Path(model_path)
+    if p.is_dir():
+        zip_candidates = [p / "best_model.zip", p / "final.zip", p / "policy.zip"]
+        zip_path = next((z for z in zip_candidates if z.exists()), None)
+        if zip_path is None:
+            raise FileNotFoundError(
+                f"No PPO zip found in {p} (looked for best_model.zip, final.zip, policy.zip)"
+            )
+        if vecnorm_path is None:
+            vn = p / "vecnormalize.pkl"
+            if vn.exists():
+                vecnorm_path = str(vn)
+    else:
+        zip_path = p
+
+    # Imports deferred so the Strömbom path doesn't require SB3.
+    from stable_baselines3 import PPO
+    from stable_baselines3.common.vec_env import VecNormalize
+
+    model = PPO.load(str(zip_path), device="auto")
+    vecnorm = None
+    if vecnorm_path and os.path.exists(vecnorm_path):
+        # VecNormalize.load needs a venv to attach to; we only need its stats
+        # at inference, so we reconstruct the wrapper manually.
+        import pickle
+        with open(vecnorm_path, "rb") as f:
+            vecnorm = pickle.load(f)
+        vecnorm.training = False
+        vecnorm.norm_reward = False
+    return PolicyHandle(model=model, vecnorm=vecnorm)
@@ -1,88 +1,283 @@
-"""
-Shepherd Dog controller (Webots, manual keyboard control).
+"""Shepherd Dog controller (Webots).

-WASD / arrow keys drive the robot.  +/- adjust speed in 10 % increments.
-GPS position is broadcast every step on channel 1 so sheep controllers
-can compute flee forces.  Ears wag continuously via sinusoidal position
-targets — purely cosmetic.
+Runs in one of two modes selected by the ``HERDING_MODE`` environment
+variable:
+
+    HERDING_MODE=rl        → load an SB3 PPO policy from
+                             HERDING_POLICY_DIR (default
+                             training/runs/latest/best) and use its
+                             (vx, vy) action each step.
+    HERDING_MODE=strombom  → use the analytic Strömbom collect/drive
+                             heuristic. This is the fallback if the RL
+                             policy can't be loaded (e.g. SB3 not
+                             installed in the Webots Python env, or no
+                             checkpoint yet).
+
+Both modes share the same low-level differential-drive controller
+(``herding.diffdrive.velocity_to_wheels`` + clamped forward speed), so
+switching modes does not retune the actuation layer.
+
+A safety supervisor enforces the "dog stays out of the pen" invariant:
+if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
+overridden with a north-driving correction. This is a hard guarantee
+the policy cannot escape.
 """

 import math
-from controller import Robot, Keyboard
+import os
+import sys

-robot    = Robot()
+# --- Make the shared herding/ package importable from this controller dir ---
+_HERE = os.path.dirname(os.path.abspath(__file__))
+_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+
+from controller import Robot
+
+from herding.diffdrive import velocity_to_wheels
+from herding.geometry import (
+    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
+    DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
+    PEN_ENTRY,
+)
+from herding.obs import build_obs
+from herding.sequential import compute_action_debug as sequential_action_debug
+from herding.strombom import compute_action_debug as strombom_action_debug
+
+
+# ---------------------------------------------------------------------------
+# Mode selection
+# ---------------------------------------------------------------------------
+
+def _load_runtime_config():
+    """Read mode + policy_dir overrides from a runtime config file.
+
+    Webots strips HERDING_* env vars in some configurations, so the
+    launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
+    in the project root and the controller reads it here. Env vars
+    win if both are present; the file is the fallback.
+    """
+    cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
+    if not os.path.exists(cfg_path):
+        return {}
+    out = {}
+    try:
+        with open(cfg_path) as f:
+            for line in f:
+                line = line.strip()
+                if not line or line.startswith("#") or "=" not in line:
+                    continue
+                k, _, v = line.partition("=")
+                out[k.strip().upper()] = v.strip()
+    except OSError:
+        return {}
+    return out
+
+
+_runtime_cfg = _load_runtime_config()
+MODE = (os.environ.get("HERDING_MODE")
+        or _runtime_cfg.get("HERDING_MODE")
+        or "rl").lower()
+
+
+def _resolve_policy_dir() -> str:
+    """Where to look for the trained policy.
+
+    Priority:
+      1. HERDING_POLICY_DIR env var (if set and points to a real dir)
+      2. training/runs/bc_pretrained/  (BC-only checkpoint)
+      3. training/runs/bc_ppo/best/    (PPO fine-tuned best)
+      4. training/runs/latest/best/    (legacy default)
+    """
+    env_dir = (os.environ.get("HERDING_POLICY_DIR")
+               or _runtime_cfg.get("HERDING_POLICY_DIR"))
+    if env_dir and os.path.isdir(env_dir):
+        return env_dir
+    candidates = [
+        os.path.join(_PROJECT_ROOT, "training", "runs", "bc_pretrained"),
+        os.path.join(_PROJECT_ROOT, "training", "runs", "bc_ppo", "best"),
+        os.path.join(_PROJECT_ROOT, "training", "runs", "latest", "best"),
+    ]
+    for c in candidates:
+        if os.path.isdir(c):
+            return c
+    # Last resort — return env var anyway so error message is informative.
+    return env_dir or candidates[0]
+
+
+POLICY_DIR = _resolve_policy_dir()
+
+policy_handle = None
+if MODE == "rl":
+    print(f"[dog] HERDING_MODE={MODE}  HERDING_POLICY_DIR(env)="
+          f"{os.environ.get('HERDING_POLICY_DIR', '<unset>')}")
+    print(f"[dog] resolved POLICY_DIR={POLICY_DIR}  exists="
+          f"{os.path.isdir(POLICY_DIR)}")
+    if os.path.isdir(POLICY_DIR):
+        try:
+            entries = sorted(os.listdir(POLICY_DIR))
+        except OSError:
+            entries = []
+        print(f"[dog] dir contents: {entries}")
+    try:
+        from policy_loader import load as _load_policy
+        policy_handle = _load_policy(POLICY_DIR)
+        print(f"[dog] RL policy loaded from {POLICY_DIR}")
+    except Exception as exc:
+        print(f"[dog] RL policy load failed ({exc!r}); falling back to Strömbom.")
+        MODE = "strombom"
+if MODE not in ("rl", "strombom", "sequential"):
+    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
+    MODE = "strombom"
+print(f"[dog] running in mode={MODE}")
+
+
+# ---------------------------------------------------------------------------
+# Action smoothing + safety supervisor
+# ---------------------------------------------------------------------------
+
+ACTION_SMOOTH = 0.35
+prev_action = (0.0, 0.0)
+
+
+def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
+    """If the dog is near the south barrier and the action would push it
+    further south, override with a northward action. Hard invariant: the
+    dog never enters the pen."""
+    if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
+        return (0.0, 1.0)
+    if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
+        return (vx * 0.5, max(0.0, vy + 0.5))
+    return (vx, vy)
+
+
+# ---------------------------------------------------------------------------
+# Driving
+# ---------------------------------------------------------------------------
+
+def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
+    if math.hypot(vx, vy) < 1e-3:
+        left_motor.setVelocity(0.0)
+        right_motor.setVelocity(0.0)
+        return
+    n = compass.getValues()
+    h = math.atan2(n[0], n[1])
+    left, right = velocity_to_wheels(
+        vx, vy, h,
+        max_linear=DOG_MAX_LINEAR,
+        wheel_radius=DOG_WHEEL_RADIUS,
+        max_wheel_omega=motor_max,
+        k_turn=4.0,
+    )
+    left_motor.setVelocity(left)
+    right_motor.setVelocity(right)
+
+
+# ---------------------------------------------------------------------------
+# Webots devices
+# ---------------------------------------------------------------------------
+
+robot = Robot()
 timestep = int(robot.getBasicTimeStep())

-left_motor  = robot.getDevice("left wheel motor")
+left_motor = robot.getDevice("left wheel motor")
 right_motor = robot.getDevice("right wheel motor")
 left_motor.setPosition(float("inf"))
 right_motor.setPosition(float("inf"))
 left_motor.setVelocity(0.0)
 right_motor.setVelocity(0.0)
+MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)

-lidar = robot.getDevice("lidar")
-lidar.enable(timestep)
-lidar.enablePointCloud()
-
-gps     = robot.getDevice("gps");     gps.enable(timestep)
-compass = robot.getDevice("compass"); compass.enable(timestep)
-emitter = robot.getDevice("emitter")
+gps = robot.getDevice("gps");           gps.enable(timestep)
+compass = robot.getDevice("compass");   compass.enable(timestep)
 receiver = robot.getDevice("receiver"); receiver.enable(timestep)
+emitter = robot.getDevice("emitter")

-left_ear  = robot.getDevice("left ear motor")
+# Cosmetic ear motors — ignored by control logic but keep them animated.
+left_ear = robot.getDevice("left ear motor")
 right_ear = robot.getDevice("right ear motor")
 left_ear.setPosition(float("inf"))
 right_ear.setPosition(float("inf"))
 left_ear.setVelocity(0.0)
 right_ear.setVelocity(0.0)
+ear_phase = 0.0
+EAR_AMPLITUDE = 0.35
+EAR_RATE = 8.0

-keyboard = robot.getKeyboard()
-keyboard.enable(timestep)

-MOTOR_MAX   = left_motor.getMaxVelocity()
-speed_level = 0.5   # fraction of MOTOR_MAX; adjusted by +/-
+# ---------------------------------------------------------------------------
+# Main loop
+# ---------------------------------------------------------------------------

-EAR_AMPLITUDE = 0.35   # rad, peak ear deflection
-EAR_RATE      = 8.0    # rad/s, how fast the ears are driven
-ear_phase     = 0.0
+# {name: (x, y)} — kept across all sheep ever heard from. Sheep that drift
+# into the pen are tracked by ``penned`` so observations and Strömbom
+# agree on which ones still need herding.
+sheep_positions: dict = {}
+penned_set: set = set()
+step_count = 0
+
+from herding.geometry import is_penned_position

 while robot.step(timestep) != -1:
-    speed = MOTOR_MAX * speed_level
-    turn  = speed * 0.6   # differential turn radius
+    step_count += 1

-    left_vel  = 0.0
-    right_vel = 0.0
-    key = keyboard.getKey()
-    while key > 0:
-        if   key in (ord('W'), Keyboard.UP):
-            left_vel  = speed
-            right_vel = speed
-        elif key in (ord('S'), Keyboard.DOWN):
-            left_vel  = -speed
-            right_vel = -speed
-        elif key in (ord('A'), Keyboard.LEFT):
-            left_vel  = -turn
-            right_vel =  turn
-        elif key in (ord('D'), Keyboard.RIGHT):
-            left_vel  =  turn
-            right_vel = -turn
-        elif key in (ord('+'), ord('=')):
-            speed_level = min(1.0, speed_level + 0.1)
-            print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
-        elif key in (ord('-'), ord('_')):
-            speed_level = max(0.1, speed_level - 0.1)
-            print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
-        key = keyboard.getKey()
-
-    left_motor.setVelocity(left_vel)
-    right_motor.setVelocity(right_vel)
+    while receiver.getQueueLength() > 0:
+        msg = receiver.getString()
+        receiver.nextPacket()
+        parts = msg.split(":")
+        if len(parts) == 4 and parts[0] == "sheep":
+            try:
+                x, y = float(parts[2]), float(parts[3])
+            except ValueError:
+                continue
+            sheep_positions[parts[1]] = (x, y)
+            if parts[1] not in penned_set and is_penned_position(x, y):
+                penned_set.add(parts[1])

    pos = gps.getValues()
-    emitter.send(f"dog:{pos[0]}:{pos[1]}")
+    dog_xy = (pos[0], pos[1])
+    n = compass.getValues()
+    dog_heading = math.atan2(n[0], n[1])

+    # ---- Action selection ----
+    if MODE == "rl" and policy_handle is not None:
+        sheep_xy_list = list(sheep_positions.values())
+        sheep_names = list(sheep_positions.keys())
+        sheep_penned_list = [s in penned_set for s in sheep_names]
+        obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
+        action = policy_handle.predict(obs)
+        vx, vy = float(action[0]), float(action[1])
+    elif MODE == "sequential":
+        vx, vy, _mode_str, _dbg = sequential_action_debug(
+            dog_xy, sheep_positions, PEN_ENTRY,
+        )
+    else:
+        # Strömbom (canonical baseline).
+        vx, vy, _mode_str, _dbg = strombom_action_debug(
+            dog_xy, sheep_positions, PEN_ENTRY,
+        )
+
+    # EMA smoothing — reduces oscillation from policy or Strömbom flips.
+    vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
+    vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
+
+    # Safety: dog must never enter the pen.
+    vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
+    prev_action = (vx, vy)
+
+    drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
+    emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
+
+    # Cosmetic ear wiggle — purely visual.
    ear_phase += 0.12
    ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
    left_ear.setVelocity(EAR_RATE)
    right_ear.setVelocity(EAR_RATE)
-    left_ear.setPosition( ear_pos)
+    left_ear.setPosition(ear_pos)
    right_ear.setPosition(-ear_pos)
+
+    if step_count % 200 == 0:
+        n_active = sum(1 for s in sheep_positions if s not in penned_set)
+        print(f"[dog mode={MODE}] step={step_count} known={len(sheep_positions)} "
+              f"penned={len(penned_set)} active={n_active} action=({vx:+.2f}, {vy:+.2f})")
@@ -1,153 +0,0 @@
-"""
-Render Webots-side debug trajectory from debug.csv.
-
-The shepherd_dog_rl controller writes per-step state to debug.csv when
-DOG_DEBUG=1. This script reads it and produces:
-
-  trajectory.png   — dog path + sheep paths overlaid on the field
-  obs_drift.png    — normalized observation distribution over time
-  actions.png      — vx, vy time series
-
-Run:
-    python plot_debug.py                    # uses debug.csv next to this file
-    python plot_debug.py --csv path/to.csv --out-dir somewhere/
-"""
-import argparse
-import csv
-import os
-import sys
-
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-import matplotlib.patches as mpatches
-import numpy as np
-
-
-def load_csv(path):
-    rows = []
-    with open(path) as f:
-        rd = csv.DictReader(f)
-        for r in rd:
-            rows.append(r)
-    if not rows:
-        sys.exit(f"empty CSV: {path}")
-    return rows
-
-
-def parse_floats(s):
-    return [float(x) for x in s.split(";") if x]
-
-
-def plot_trajectory(rows, out_path):
-    fig, ax = plt.subplots(figsize=(7, 7))
-    ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
-    ax.set_facecolor("#dcedc8")
-    ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
-                 fill=False, edgecolor="#795548", lw=2))
-    ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
-                 facecolor="#ffe082", edgecolor="#795548", lw=2))
-    ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
-
-    dog_x = [float(r["dog_x"]) for r in rows]
-    dog_y = [float(r["dog_y"]) for r in rows]
-    ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
-    ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
-    ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
-
-    # Sheep — re-shape into per-sheep tracks
-    sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
-    sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
-    if sx_all and sx_all[-1]:
-        n_sheep = len(sx_all[-1])
-        palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
-                   "#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
-        for i in range(n_sheep):
-            xs = [r[i] if i < len(r) else None for r in sx_all]
-            ys = [r[i] if i < len(r) else None for r in sy_all]
-            xs = [x for x in xs if x is not None]
-            ys = [y for y in ys if y is not None]
-            if xs:
-                c = palette[i % len(palette)]
-                ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
-                ax.plot(xs[0], ys[0], "o", color=c, ms=6)
-                ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
-
-    n_in_pen = int(rows[-1]["n_penned"])
-    ax.set_title(f"Webots trajectory  {len(rows)} steps  penned={n_in_pen}",
-                 fontsize=12)
-    ax.legend(loc="upper left", fontsize=7, ncol=2)
-    plt.tight_layout()
-    fig.savefig(out_path, dpi=120)
-    plt.close(fig)
-
-
-def plot_actions(rows, out_path):
-    t = np.arange(len(rows))
-    vx = np.array([float(r["vx"]) for r in rows])
-    vy = np.array([float(r["vy"]) for r in rows])
-    mag = np.sqrt(vx ** 2 + vy ** 2)
-
-    fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
-    axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
-    axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
-    axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
-    axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
-    axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
-    axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
-    axes[2].axhline(1.0, color="gray", ls="--", lw=1)
-    axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
-    fig.suptitle("Webots action time series")
-    plt.tight_layout()
-    fig.savefig(out_path, dpi=120)
-    plt.close(fig)
-
-
-def plot_obs(rows, out_path):
-    norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
-    raw  = np.array([parse_floats(r["raw_obs"])  for r in rows])
-    if norm.size == 0:
-        return
-    n_dims = norm.shape[1]
-    labels = [
-        "dog_x", "dog_y", "com-dog_x", "com-dog_y",
-        "far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
-        "far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
-        "pen-far1_x", "pen-far1_y", "radius", "frac_active",
-    ][:n_dims]
-
-    t = np.arange(norm.shape[0])
-    fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
-    if n_dims == 1: axes = [axes]
-    for i in range(n_dims):
-        axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
-        axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
-        axes[i].set_ylabel(labels[i], fontsize=8)
-        axes[i].tick_params(labelsize=7)
-        if i == 0:
-            axes[i].legend(fontsize=7, loc="upper right")
-    axes[-1].set_xlabel("step")
-    fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
-    plt.tight_layout()
-    fig.savefig(out_path, dpi=110)
-    plt.close(fig)
-
-
-def main():
-    p = argparse.ArgumentParser()
-    here = os.path.dirname(os.path.abspath(__file__))
-    p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
-    p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
-    args = p.parse_args()
-
-    rows = load_csv(args.csv)
-    os.makedirs(args.out_dir, exist_ok=True)
-    print(f"loaded {len(rows)} rows from {args.csv}")
-    plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
-    plot_actions(rows,    os.path.join(args.out_dir, "actions.png"))
-    plot_obs(rows,        os.path.join(args.out_dir, "obs.png"))
-    print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
-
-
-if __name__ == "__main__":
-    main()
@@ -1,285 +0,0 @@
-"""
-Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
-
-Setup
-----
-1. Copy your trained files into this directory:
-       controllers/shepherd_dog_rl/final_model.zip
-       controllers/shepherd_dog_rl/vecnorm.pkl
-
-2. In field.wbt, set the ShepherdDog robot's controller field to
-   "shepherd_dog_rl".  You can do this in the Webots GUI:
-       click the robot → Controller → shepherd_dog_rl
-
-3. Optional: set controllerArgs to ["5"] (number of sheep) if it differs
-   from the default of 5.
-
-The controller reads GPS (dog position) and Receiver (sheep broadcasts),
-builds the same 16-dim flock observation the training env used, normalises
-it with the saved VecNormalize stats, and converts the (vx, vy) policy
-output into differential wheel speeds.
-
-Debug logging
-------------
-Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
-raw obs, normalised obs, action) to debug.csv alongside this script. Use
-plot_debug.py to render trajectories from it.
-"""
-
-import sys
-import os
-import math
-import struct
-import numpy as np
-
-# ── make training code importable ───────────────────────────────────────────
-_HERE = os.path.dirname(os.path.abspath(__file__))
-_TRAINING = os.path.join(_HERE, "..", "..", "training")
-sys.path.insert(0, _TRAINING)
-
-from controller import Robot
-from stable_baselines3 import PPO
-from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
-from herding_env import HerdingEnv
-
-# ── constants (must match herding_env.py) ───────────────────────────────────
-FIELD      = 15.0
-PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
-PEN_X      = (10.0, 13.0)
-PEN_Y      = (-15.0, -8.0)
-DOG_SPEED  = 2.5         # m/s
-WHEEL_R    = 0.038       # wheel radius (metres) — from ShepherdDog.proto
-K_TURN     = 4.0         # heading-error gain (rad/s per rad)
-EAR_AMPLITUDE = 0.35
-EAR_RATE      = 8.0
-
-# ── model paths ─────────────────────────────────────────────────────────────
-MODEL_PATH   = os.path.join(_HERE, "final_model.zip")
-VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
-DEBUG_CSV    = os.path.join(_HERE, "debug.csv")
-DEBUG_ENABLED = True   # set False to disable debug.csv logging
-
-# ── action smoothing ─────────────────────────────────────────────────────────
-# EMA on policy output to suppress the rapid oscillation (vx/vy flipping
-# between -1 and +1 every step) that stalls the physical dog.  0 = no
-# smoothing (raw policy), 1 = frozen.  0.3 keeps ~30% of previous action.
-ACTION_SMOOTH = 0.3
-prev_action   = np.zeros(2, dtype=np.float32)
-
-
-def norm_angle(a: float) -> float:
-    while a >  math.pi: a -= 2 * math.pi
-    while a < -math.pi: a += 2 * math.pi
-    return a
-
-
-def in_pen(x: float, y: float) -> bool:
-    return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
-
-
-def build_obs(dog_pos: np.ndarray,
-              sheep_dict: dict,
-              n_sheep: int,
-              dog_heading: float = 0.0) -> np.ndarray:
-    """
-    Build the 18-dim flock observation — identical to HerdingEnv._obs().
-
-    sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
-    dog_heading: dog's current world-frame heading in radians.
-    """
-    D = 2 * FIELD
-
-    # Split active vs penned
-    active_pos = np.array(
-        [v for v in sheep_dict.values() if not in_pen(*v)],
-        dtype=np.float32
-    )
-    n_active = len(active_pos)
-
-    if n_active > 0:
-        com        = active_pos.mean(axis=0)
-        d_from_com = np.linalg.norm(active_pos - com, axis=1)
-        sorted_idx = np.argsort(d_from_com)[::-1]
-        radius     = float(d_from_com[sorted_idx[0]])
-        def nth(n):
-            return active_pos[sorted_idx[n]] if len(sorted_idx) > n else com
-        far1, far2, far3 = nth(0), nth(1), nth(2)
-    else:
-        com = PEN_CENTER.copy()
-        radius = 0.0
-        far1 = far2 = far3 = PEN_CENTER.copy()
-
-    frac_active = n_active / max(n_sheep, 1)
-
-    return np.array([
-        dog_pos[0] / FIELD,  dog_pos[1] / FIELD,
-        (com[0]  - dog_pos[0]) / D, (com[1]  - dog_pos[1]) / D,
-        (far1[0] - com[0]) / D, (far1[1] - com[1]) / D,
-        (far2[0] - com[0]) / D, (far2[1] - com[1]) / D,
-        (far3[0] - com[0]) / D, (far3[1] - com[1]) / D,
-        (PEN_CENTER[0] - com[0])  / D, (PEN_CENTER[1] - com[1])  / D,
-        (PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
-        radius / D,
-        frac_active,
-        math.cos(dog_heading), math.sin(dog_heading),
-    ], dtype=np.float32)
-
-
-# ── Webots setup ─────────────────────────────────────────────────────────────
-robot    = Robot()
-timestep = int(robot.getBasicTimeStep())
-
-# Drive motors
-left_motor  = robot.getDevice("left wheel motor")
-right_motor = robot.getDevice("right wheel motor")
-left_motor.setPosition(float("inf"))
-right_motor.setPosition(float("inf"))
-left_motor.setVelocity(0.0)
-right_motor.setVelocity(0.0)
-MOTOR_MAX = left_motor.getMaxVelocity()
-
-# Sensors
-gps      = robot.getDevice("gps");      gps.enable(timestep)
-compass  = robot.getDevice("compass");  compass.enable(timestep)
-receiver = robot.getDevice("receiver"); receiver.enable(timestep)
-emitter  = robot.getDevice("emitter")
-
-# Cosmetic
-left_ear  = robot.getDevice("left ear motor")
-right_ear = robot.getDevice("right ear motor")
-left_ear.setPosition(float("inf"));  right_ear.setPosition(float("inf"))
-left_ear.setVelocity(0.0);           right_ear.setVelocity(0.0)
-ear_phase = 0.0
-
-# Number of sheep (from controllerArgs or default)
-try:
-    n_sheep = int(sys.argv[1])
-except (IndexError, ValueError):
-    n_sheep = 3
-
-# ── Load model ───────────────────────────────────────────────────────────────
-print(f"[RL dog] Loading model from {MODEL_PATH}")
-print(f"[RL dog] Loading vecnorm from {VECNORM_PATH}")
-
-dummy_env = DummyVecEnv([lambda: HerdingEnv(n_sheep=n_sheep)])
-vecnorm   = VecNormalize.load(VECNORM_PATH, dummy_env)
-vecnorm.training    = False
-vecnorm.norm_reward = False
-
-model = PPO.load(MODEL_PATH, device="cpu")
-print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
-
-# ── Runtime state ─────────────────────────────────────────────────────────────
-sheep_positions: dict = {}   # {name: (x, y)} — updated every step from receiver
-step_count = 0
-
-# Debug CSV — written every step when DOG_DEBUG=1
-debug_file = None
-if DEBUG_ENABLED:
-    import csv
-    debug_file = open(DEBUG_CSV, "w", newline="")
-    debug_writer = csv.writer(debug_file)
-    debug_writer.writerow([
-        "step", "dog_x", "dog_y", "heading",
-        "sheep_xs", "sheep_ys", "n_active", "n_penned",
-        "raw_obs", "norm_obs", "vx", "vy",
-    ])
-    print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
-
-
-def bearing() -> float:
-    """Current robot heading in world frame (radians)."""
-    n = compass.getValues()
-    return math.atan2(n[0], n[1])
-
-
-def drive(action_vx: float, action_vy: float) -> None:
-    """Convert (vx, vy) policy action to differential wheel speeds."""
-    speed_ms = math.sqrt(action_vx ** 2 + action_vy ** 2) * DOG_SPEED
-    if speed_ms < 0.05:
-        left_motor.setVelocity(0.0)
-        right_motor.setVelocity(0.0)
-        return
-
-    target_heading = math.atan2(action_vy, action_vx)
-    err = norm_angle(target_heading - bearing())
-
-    fwd_ms  = speed_ms * max(0.0, math.cos(err))
-    fwd_rad = fwd_ms / WHEEL_R
-    turn    = K_TURN * err    # rad/s correction
-
-    l = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad - turn))
-    r = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad + turn))
-    left_motor.setVelocity(l)
-    right_motor.setVelocity(r)
-
-
-# ── Main loop ─────────────────────────────────────────────────────────────────
-while robot.step(timestep) != -1:
-    step_count += 1
-
-    # 1. Drain receiver — update sheep position table
-    while receiver.getQueueLength() > 0:
-        try:
-            msg = receiver.getString()
-            parts = msg.split(":")
-            if parts[0] == "sheep" and len(parts) == 4:
-                sheep_positions[parts[1]] = (float(parts[2]), float(parts[3]))
-        except Exception:
-            pass
-        receiver.nextPacket()
-
-    # 2. Dog GPS
-    gps_vals = gps.getValues()
-    dog_pos  = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)
-
-    # 3. Build and normalise observation (heading from compass)
-    raw_obs  = build_obs(dog_pos, sheep_positions, n_sheep,
-                         dog_heading=bearing())
-    obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis])  # (1, 13)
-
-    # 4. Policy inference + smoothing
-    action, _ = model.predict(obs_norm, deterministic=True)
-    raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
-    if ACTION_SMOOTH > 0:
-        smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
-        prev_action[:] = smoothed
-        vx, vy = float(smoothed[0]), float(smoothed[1])
-    else:
-        vx, vy = float(raw_a[0]), float(raw_a[1])
-
-    # 5. Drive
-    drive(vx, vy)
-
-    # 6. Broadcast dog position so sheep can compute flee forces
-    emitter.send(f"dog:{dog_pos[0]:.4f}:{dog_pos[1]:.4f}")
-
-    # 7. Ear animation
-    ear_phase += 0.12
-    ep = EAR_AMPLITUDE * math.sin(ear_phase)
-    left_ear.setVelocity(EAR_RATE);  right_ear.setVelocity(EAR_RATE)
-    left_ear.setPosition( ep);        right_ear.setPosition(-ep)
-
-    # Periodic status
-    if step_count % 100 == 0:
-        n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
-        print(f"[RL dog] step={step_count}  known_sheep={len(sheep_positions)}"
-              f"  penned={n_in_pen}/{n_sheep}  dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
-              f"  action=({vx:.2f}, {vy:.2f})")
-
-    # Debug CSV row
-    if debug_file is not None:
-        n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
-        n_in_pen = len(sheep_positions) - n_active
-        debug_writer.writerow([
-            step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
-            f"{bearing():.4f}",
-            ";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
-            ";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
-            n_active, n_in_pen,
-            ";".join(f"{x:.4f}" for x in raw_obs),
-            ";".join(f"{x:.4f}" for x in obs_norm[0]),
-            f"{vx:.4f}", f"{vy:.4f}",
-        ])
-        if step_count % 200 == 0:
-            debug_file.flush()