Results from last checkpoinr

Checkpoint 8
Checkpoint 7
2026-05-13 07:49:17 +00:00 · 2026-05-12 22:41:03 +01:00 · 2026-05-11 12:21:51 +01:00 · 2026-05-11 10:35:48 +01:00 · 2026-05-11 10:35:39 +01:00 · 2026-05-11 00:42:52 +01:00
75 changed files with 34147 additions and 3000 deletions
@@ -1,21 +1,15 @@
 # Stuff
 #_example/
 .claude/
 # Python
 __pycache__/
-# Training
+# Training artefacts: ignore all run outputs except deployable policies
 training/**/events.out.tfevents.*
 training/**/checkpoints/
 training/runs/**
 !training/runs/
 !training/runs/.gitkeep
 !training/runs/*/
 !training/runs/*/policy.zip
-# Controller runtime artefacts
+# Webots launcher scratch
-controllers/shepherd_dog_rl/debug*.csv
+worlds/**
-controllers/shepherd_dog_rl/debug_out*/
+!worlds/field.wbt
-controllers/shepherd_dog_rl/final_model*.zip
+!worlds/field_round.wbt
-controllers/shepherd_dog_rl/vecnorm*.pkl
+herding_runtime.cfg
 # Optional env parity debug
 dog_debug.csv
@@ -0,0 +1,187 @@
 # Training pipeline for the shepherd-dog herding project.
 # Stages chain via output files in training/.
 #
 # Usage:
 #   make            # full pipeline: bc_demos -> bc -> rl -> eval
 #   make bc_demos   # generate sim demos
 #   make bc         # behaviour clone (rebuilds bc_demos if missing)
 #   make rl         # KL-PPO fine-tune (rebuilds bc if missing)
 #   make eval       # 10-seed env eval of rl
 #   make test       # pytest suite
 #   make webots N=10 MODE=rl   # launch Webots in the chosen mode
 #   WEBOTS_HEADLESS=1 make webots   # no 3D view, fast mode (still needs DISPLAY or xvfb-run)
 #   make clean      # delete bc_demos and run artefacts
 #   make clean_all  # delete artefacts for all combinations
 #   make help       # print the target table
 #
 # Override any hyperparameter on the command line, for example:
 #   make rl PPO_STEPS=2000000 KL=0.02
 #   make eval EVAL_SEEDS=20
 #
 # Drive mode selects the locomotion model:
 #   make DRIVE=differential       2-wheel diff-drive (default)
 #   make DRIVE=mecanum             4-wheel omnidirectional
 #
 # World shape:
 #   make WORLD=field              rectangular (default)
 #   make WORLD=field_round        circular fence
 #
 # To train all 4 combinations:
 #   make train_all
 PY               := python
 # Drive mode and world shape — each combination gets its own artefacts.
 DRIVE            ?= differential
 WORLD            ?= field
 # Derived tag and paths.
 TAG               = $(DRIVE)_$(WORLD)
 BC_DEMOS          = training/bc/demos_$(TAG).npz
 BC_DIR            = training/runs/bc_$(TAG)
 RL_DIR            = training/runs/rl_$(TAG)
 BC_POLICY         = $(BC_DIR)/policy.zip
 RL_POLICY         = $(RL_DIR)/policy.zip
 # --- Demo collection ---
 TEACHER          ?= universal
 # Round field is fundamentally harder (narrow gate at south of a circle).
 # Default to more demos there to give BC a fair shot at 60%+.
 ifeq ($(WORLD),field_round)
 SEEDS_PER_N      ?= 40
 else
 SEEDS_PER_N      ?= 25
 endif
 SUBSAMPLE        ?= 3
 FRAME_STACK      ?= 4
 DEMO_MAX_STEPS   ?= 100000
 # --- Behaviour cloning ---
 ifeq ($(WORLD),field_round)
 BC_EPOCHS        ?= 100
 else
 BC_EPOCHS        ?= 60
 endif
 BC_NET_ARCH      ?= 512,512
 # --- KL-PPO fine-tune ---
 # Round field: longer training, looser KL, no time penalty (success
 # must be learned before speed is rewarded).
 ifeq ($(WORLD),field_round)
 PPO_STEPS        ?= 4000000
 KL               ?= 0.02
 TIME_W           ?= 0.0
 else
 PPO_STEPS        ?= 2000000
 KL               ?= 0.05
 TIME_W           ?= -0.05
 endif
 IMITATE          ?= 0.0
 # PPO rollouts at full difficulty so the training distribution matches
 # eval (deployment).  Anything lower causes a train/eval mismatch that
 # can make RL eval worse than BC.
 DIFFICULTY       ?= 1.0
 # --- Evaluation ---
 EVAL_SEEDS       ?= 10
 EVAL_MAX_STEPS   ?= 15000
 # --- Webots launcher ---
 N                ?= 10
 MODE             ?= rl
 .PHONY: all bc_demos bc rl eval test webots clean clean_all help \
        train_all train_diff_rect train_diff_round \
        train_mec_rect train_mec_round
 all: eval
 # Export HERDING_WORLD so that geometry.py picks it up at import time.
 export HERDING_WORLD = $(WORLD)
 bc_demos: $(BC_DEMOS)
 $(BC_DEMOS):
 	$(PY) -m training.bc.collect \
 		--teacher $(TEACHER) --out $(BC_DEMOS) \
 		--seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \
 		--frame-stack $(FRAME_STACK) --drive-mode $(DRIVE) \
 		--world $(WORLD) \
 		--max-steps $(DEMO_MAX_STEPS)
 bc: $(BC_POLICY)
 $(BC_POLICY): $(BC_DEMOS)
 	$(PY) -m training.bc.pretrain \
 		--demos $(BC_DEMOS) --out $(BC_DIR) \
 		--epochs $(BC_EPOCHS) --net-arch $(BC_NET_ARCH)
 rl: $(RL_POLICY)
 $(RL_POLICY): $(BC_POLICY)
 	$(PY) -m training.rl.train \
 		--bc $(BC_DIR) --out $(RL_DIR) \
 		--total-timesteps $(PPO_STEPS) --kl-coef $(KL) \
 		--imitate-weight $(IMITATE) --time-weight $(TIME_W) \
 		--difficulty $(DIFFICULTY) \
 		--drive-mode $(DRIVE) --world $(WORLD)
 eval: $(RL_POLICY)
 	$(PY) -m training.eval --policy $(RL_DIR) \
 		--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
 		--drive-mode $(DRIVE) --world $(WORLD)
 test:
 	$(PY) -m pytest tests/
 webots:
 	tools/run_webots.sh $(N) $(MODE) $(DRIVE) $(WORLD)
 clean:
 	rm -f $(BC_DEMOS)
 	rm -rf $(BC_DIR) $(RL_DIR)
 clean_all:
 	rm -f training/bc/demos_*.npz
 	rm -rf training/runs/bc_* training/runs/rl_*
 # --- Train all 4 combinations ---
 train_diff_rect:
 	$(MAKE) DRIVE=differential WORLD=field
 train_diff_round:
 	$(MAKE) DRIVE=differential WORLD=field_round
 train_mec_rect:
 	$(MAKE) DRIVE=mecanum WORLD=field
 train_mec_round:
 	$(MAKE) DRIVE=mecanum WORLD=field_round
 train_all: train_diff_rect train_diff_round train_mec_rect train_mec_round
 help:
 	@echo "Targets:"
 	@echo "  make              full pipeline (bc_demos -> bc -> rl -> eval)"
 	@echo "  make bc_demos     sim demos via the '$(TEACHER)' teacher"
 	@echo "  make bc           train BC (rebuilds bc_demos if missing)"
 	@echo "  make rl           KL-PPO fine-tune (rebuilds bc if missing)"
 	@echo "  make eval         $(EVAL_SEEDS)-seed env eval of rl"
 	@echo "  make test         pytest suite"
 	@echo "  make webots [N=$(N)] [MODE=$(MODE)] [DRIVE=$(DRIVE)] [WORLD=$(WORLD)]"
 	@echo "                    launch Webots in the chosen mode"
 	@echo "  WEBOTS_HEADLESS=1 make webots …   no 3D view + fast + --batch"
 	@echo "  make clean        delete artefacts for current DRIVE+WORLD"
 	@echo "  make clean_all    delete artefacts for all combinations"
 	@echo ""
 	@echo "Combinations:"
 	@echo "  make DRIVE=differential WORLD=field       diff + rectangular (default)"
 	@echo "  make DRIVE=differential WORLD=field_round  diff + circular"
 	@echo "  make DRIVE=mecanum     WORLD=field         mecanum + rectangular"
 	@echo "  make DRIVE=mecanum     WORLD=field_round   mecanum + circular"
 	@echo "  make train_all                            all 4 in sequence"
 	@echo ""
 	@echo "Hyperparameter overrides (showing defaults):"
 	@echo "  TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK) DEMO_MAX_STEPS=$(DEMO_MAX_STEPS)"
 	@echo "  BC_EPOCHS=$(BC_EPOCHS) BC_NET_ARCH=$(BC_NET_ARCH)"
 	@echo "  PPO_STEPS=$(PPO_STEPS) KL=$(KL) IMITATE=$(IMITATE) TIME_W=$(TIME_W)"
 	@echo "  EVAL_SEEDS=$(EVAL_SEEDS) EVAL_MAX_STEPS=$(EVAL_MAX_STEPS)"
@@ -0,0 +1,199 @@
 # Autonomous Shepherd-Dog Herding (Webots + RL)
 Group G25 — *Diogo Costa, Johnny Fernandes, Nelson Neto*
 A differential-drive shepherd dog that herds 1–10 sheep through a 3 m
 gate into an external pen. The dog has three deployable modes:
 | Mode | Source | Role |
 |---|---|---|
 | `strombom` | Strömbom et al. (2014) collect/drive heuristic | Analytic baseline |
 | `bc` | Behaviour cloning of the Strömbom teacher | Imitation learning result |
 | `rl` | KL-regularised PPO fine-tune of `bc` | Reward-driven refinement |
 `sequential` (single-target pin-and-push) is kept as an alternative
 analytic baseline.
 ## Perception
 The dog perceives sheep **only through its front-mounted 140° LiDAR**
 (180 rays, 12 m max range — see `protos/ShepherdDog.proto`). Each
 control step:
 1. Read `lidar.getRangeImage()`,
 2. Cluster returns into world-frame `(x, y)` estimates
   (`herding/perception/lidar_perception.py`),
 3. Fold them into a multi-target tracker that maintains last-seen
   positions for sheep currently outside the FOV
   (`herding/perception/sheep_tracker.py`).
 **LiDAR validation** (intermediate-goal item v from `docs/project.md`):
 during development a diagnostic-dump controller captured 80 real
 Webots scans plus the ground-truth sheep positions. Comparing
 detections against GT showed clustered centroids match GT positions
 within 0.15 m after the +SHEEP_RADIUS surface-to-centre correction —
 i.e. the LiDAR pipeline produces correct sheep-position estimates
 from the real Webots scan, validating the sensor for the herding
 task.
 The tracker outputs a `{name: (x, y)}` dict shaped exactly like the
 prior receiver-based one, so Strömbom, Sequential, and the BC obs
 builder all run unchanged on top of it. The 2D Gymnasium env
 (`herding/perception/lidar_sim.py`) raycasts sheep discs at training time, so
 demos collected in the env match the perception the deployed
 controller sees in Webots.
 Privileged ground-truth perception is available for ablation —
 `HerdingEnv(use_lidar=False)`.
 ## Quick start
 ```bash
 # 1. Set up the Python env (any venv with PyTorch + SB3)
 pip install -r training/requirements.txt
 # 2. Smoke test (70 pytest cases, < 1 s)
 make test
 # 3. Reproduce the full pipeline (~30–60 min CPU)
 make            # demos -> bc -> rl -> eval
 # Individual stages (each rebuilds upstream artefacts if missing):
 make bc_demos   # sim demos
 make bc         # behaviour clone
 make rl         # KL-PPO fine-tune
 make eval       # 10-seed env eval of rl
 # 4. Run in Webots
 make webots N=10 MODE=bc          # behaviour-cloned MLP
 make webots N=10 MODE=rl          # KL-PPO fine-tune
 make webots N=10 MODE=strombom    # analytic baseline
 # (or invoke directly: tools/run_webots.sh 10 rl)
 ```
 `make help` lists every target and the overridable hyperparameters
 (e.g. `make rl PPO_STEPS=2000000 KL=0.02`).
 ## Documentation map
 - This README is the project overview: architecture, quick start, and
  headline results.
 - `training/README.md` has the command-level training and evaluation
  details for demo collection, BC, PPO fine-tuning, and policy artifacts.
 - `docs/project.md` is the original course proposal/goals document, kept
  for traceability rather than as run instructions.
 ## Layout
 ```
 herding/                  — perception / control / world primitives
  world/                  — environment-side physics & geometry
    geometry.py             field/pen constants, robot specs
    diffdrive.py            differential-drive kinematics
    flocking_sim.py         Reynolds + Strömbom 2014 sheep dynamics
  perception/             — LiDAR → tracked-sheep pipeline
    lidar_sim.py            fast 2D raycast for the env
    lidar_perception.py     scan → world-frame cluster centroids + filters
    sheep_tracker.py        multi-target NN tracker with FOV memory
    obs.py                  32-D order-invariant observation builder
  control/                — every dog mode's action source
    strombom.py             canonical CoM collect/drive heuristic
    sequential.py           single-target "pin-and-push" alternative
    active_scan.py          wraps a base teacher with opening rotation +
                            walk-to-centre fallback
    modulation.py           shared near-sheep speed-modulation helper
 controllers/
  sheep/sheep.py          — Webots sheep controller (uses herding.world.flocking_sim)
  shepherd_dog/
    shepherd_dog.py       — Webots dog controller, mode-switched
    policy_loader.py      — lazy SB3 policy loader (auto-detects frame stack)
 training/
  herding_env.py          — Gymnasium env (LiDAR + tracker by default)
  bc/collect.py           — sim demos via the active-scan teacher
  bc/pretrain.py          — supervised BC of (obs, action) demos into MLP
  rl/train.py             — KL-regularised PPO fine-tune of BC
  eval.py                 — analytic + learned policy comparison harness
  bc/demos.npz            — collected demonstrations (gitignored)
  runs/                   — checkpoints (whitelisted in .gitignore)
  requirements.txt
 tests/
  conftest.py             — pytest setup (adds project root to sys.path)
  test_geometry.py        — geometric predicates + constants
  test_diffdrive.py       — kinematics and (vx, vy) → wheel-speed map
  test_obs.py             — observation builder (shape, normalisation, order)
  test_control.py         — speed modulation + analytic teachers + active scan
  test_perception.py      — LiDAR sim + clustering + tracker
  test_env.py             — Gymnasium contract + determinism + reward
 tools/
  run_webots.sh           — launch Webots with N sheep + chosen mode
 Makefile                  — pipeline orchestrator (make / make rl / make test / …)
 worlds/
  field.wbt               — main world (3 m gate, external pen)
 protos/                   — Sheep / ShepherdDog robot definitions
 docs/project.md           — original course proposal/goals
 ```
 ## Shared low-level control
 Every dog mode (Strömbom, Sequential, BC, RL) routes its action
 through `herding/control/modulation.py:modulate_speed_near_sheep`,
 which scales action magnitude down when within ~2.5 m of the nearest
 tracked sheep. This stops the dog from charging in at full speed and
 scattering the flock. Direction (intent) is preserved.
 All modes also share the same EMA action smoother in
 `controllers/shepherd_dog/shepherd_dog.py:ACTION_SMOOTH = 0.55`.
 ## Results — env eval, 10 seeds × n=1..10
 `max_steps=15000`, full-field spawn distribution. Success rate per
 flock size, then mean steps over successful seeds.
 ### Success rate (%)
 | n  | Strömbom | `bc` | `rl` |
 |---:|---:|---:|---:|
 |  1 |  30 |  80 | **90** |
 |  2 |  90 |  50 | **90** |
 |  3 |  60 |  90 | **90** |
 |  4 |  40 |  80 | **90** |
 |  5 |  60 |  70 | **100** |
 |  6 |  30 |  80 | 80 |
 |  7 |  70 |  80 | **100** |
 |  8 |  30 | 100 | **100** |
 |  9 |  40 |  90 | **100** |
 | 10 |  50 | 100 | **100** |
 ### Mean penned per episode (out of n)
 | n  | Strömbom | `bc` | `rl` |
 |---:|---:|---:|---:|
 |  1 | 0.30 | 0.80 | **0.90** |
 |  5 | 3.90 | 4.10 | **5.00** |
 |  8 | 4.20 | 8.00 | **8.00** |
 | 10 | 7.40 | 10.00 | **10.00** |
 ### Takeaways
 - **BC clearly beats Strömbom** under realistic LiDAR conditions (full
  field, partial observability). Strömbom struggles on small flocks
  where a single sheep can spawn beyond the LiDAR's 12 m range; BC
  learned active perception from the demos.
 - **RL refines BC** without regressing on any cell. Ties or beats BC
  at every flock size; biggest gains at n=1 and n=4 where BC's
  imitation of Strömbom's drive heuristic was sub-optimal.
 - **Aggressive reward shaping doesn't help** — a more aggressive
  variant (β=0.02, W_TIME=-0.1, W_IMITATE=0, 3 M steps) trained as
  an ablation was strictly worse than the conservative tune shipped
  here (β=0.05, W_IMITATE=0.5, 1 M steps).
 ## License
 Educational project for the *Topics in Intelligent Robotics* course.
@@ -0,0 +1,30 @@
 """Backwards-compat shim — flocking logic now lives in ``herding.world.flocking_sim``.
 Kept so any external reference still resolves.
 """
 import os
 import sys
 _HERE = os.path.dirname(os.path.abspath(__file__))
 _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
 if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)
 from herding.world.flocking_sim import (  # noqa: F401
    MAX_SPEED, FLEE_SPEED, WANDER_SPEED,
    WALL_MARGIN, WALL_HARD_MARGIN, WALL_HARD_GAIN,
    FLEE_DIST, SEPARATION_DIST, COHESION_DIST,
    PEN_MARGIN,
    compute_heading_speed,
 )
 from herding.world.geometry import (  # noqa: F401
    FIELD_X, FIELD_Y, PEN_X, PEN_Y,
    in_pen,
 )
 # Original module-level names retained for any code still importing them.
 X_MIN, X_MAX = FIELD_X
 Y_MIN, Y_MAX = FIELD_Y
 PEN_X_MIN, PEN_X_MAX = PEN_X
 PEN_Y_MIN, PEN_Y_MAX = PEN_Y
@@ -1,50 +1,37 @@
-"""
+"""Sheep flocking controller (Webots).
 Sheep flocking controller (Webots, Reynolds boids variant).
-Each sheep broadcasts its GPS position every 3 steps on channel 1 and
+Each sheep emits its GPS position every 3 steps and listens for the
-listens for the dog and peer sheep positions.  Peers are keyed by robot
+dog's position and peer-sheep positions. The behavioural step is
-name so each neighbour has exactly one current entry in the dict.
+delegated to :func:`herding.world.flocking_sim.compute_heading_speed`
 so the env and Webots use identical sheep dynamics.
-Force stack each step (summed then converted to a heading + speed):
+A sheep latches penned the first time it crosses the gate plane south;
-    flee       — away from dog, quadratic ramp, dominant when close
+the wool turns pink (via the exposed ``woolColor`` PROTO field) and
-    cohesion   — toward flock centre, halved while fleeing
+the dynamics switch to in-pen containment.
    separation — inverse-distance push, prevents physical overlap
    walls      — linear repulsion from field boundary
    wander     — small persistent drift for natural idle motion
 Pen behaviour: on first entry into the quarantine pen the sheep latches
 permanently — it turns pink (via the exposed woolColor PROTO field) and
 the normal force stack is replaced by pen-confinement forces only.
 """
 import random
 import math
 import os
 import random
 import sys
 # --- Make the shared herding/ package importable from this controller dir ---
 _HERE = os.path.dirname(os.path.abspath(__file__))
 _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
 if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)
 from controller import Supervisor
-# ---------------------------------------------------------------------------
+from herding.world.diffdrive import heading_speed_to_wheels
-# Tuning constants
+from herding.world.flocking_sim import MAX_SPEED, compute_heading_speed
-# ---------------------------------------------------------------------------
+from herding.world.geometry import (
    SHEEP_MAX_WHEEL_OMEGA,
    is_penned_position,
 )
 MAX_SPEED    = 22.0   # rad/s hard clamp on both motors
 FLEE_SPEED   = 20.0   # rad/s upper bound while panicking
 WANDER_SPEED =  3.0   # rad/s lower bound during calm wandering
 X_MIN, X_MAX = -14.5, 14.5   # stone wall inner edges (metres)
 Y_MIN, Y_MAX = -14.5, 14.5
 WALL_MARGIN  =  3.5           # avoidance starts this far from the wall
 FLEE_DIST       = 7.0   # dog within this radius triggers flee (metres)
 SEPARATION_DIST = 2.5   # inverse-distance push active inside this radius
 COHESION_DIST   = 8.0   # pull toward flock centre active inside this radius
 PEN_X_MIN, PEN_X_MAX = 10.0, 13.0   # quarantine pen extents (metres)
 PEN_Y_MIN, PEN_Y_MAX = -15.0, -8.0  # open entrance at y=-8, gate at y=-15
 PEN_MARGIN = 0.8                     # confinement force starts this far from pen wall
 # ---------------------------------------------------------------------------
 # Device setup
 # ---------------------------------------------------------------------------
 # --- Devices ---
 robot = Supervisor()
 timestep = int(robot.getBasicTimeStep())
 name = robot.getName()
@@ -56,178 +43,89 @@ left_motor.setPosition(float("inf"))
 right_motor.setPosition(float("inf"))
 left_motor.setVelocity(0.0)
 right_motor.setVelocity(0.0)
 MOTOR_MAX = min(left_motor.getMaxVelocity(), SHEEP_MAX_WHEEL_OMEGA)
 gps = robot.getDevice("gps");           gps.enable(timestep)
 compass = robot.getDevice("compass");   compass.enable(timestep)
 receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter = robot.getDevice("emitter")
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def norm_angle(a):
    return math.atan2(math.sin(a), math.cos(a))
 # --- Helpers ---
 def bearing():
-    # Compass returns north direction in sensor frame; for this Z-up world
+    """World-frame heading (0 = east, π/2 = north)."""
    # with north = +Y, atan2(n[0], n[1]) gives the standard math angle
    # (0 = east, π/2 = north) matching atan2(fy, fx) used for heading.
    n = compass.getValues()
    return math.atan2(n[0], n[1])
-def drive(heading, speed):
+def drive(heading, speed_motor):
-    err = norm_angle(heading - bearing())
+    left_w, right_w = heading_speed_to_wheels(
-    # Scale forward component by cos(err): at 90° error fwd→0 so the robot
+        heading, min(speed_motor, MAX_SPEED), bearing(), MOTOR_MAX, k_turn=4.0
-    # spins in place to realign rather than driving sideways at full speed.
+    )
-    fwd = speed * max(0.0, math.cos(err))
+    left_motor.setVelocity(left_w)
-    k = 4.0
+    right_motor.setVelocity(right_w)
    left_motor.setVelocity( max(-MAX_SPEED, min(MAX_SPEED, fwd - k * err)))
    right_motor.setVelocity(max(-MAX_SPEED, min(MAX_SPEED, fwd + k * err)))
 def paint_pink():
-    # woolColor is declared as a PROTO field with IS binding to the DEF WOOL
+    """Switch the sheep's wool to pink via the exposed PROTO field."""
    # PBRAppearance baseColor.  Changing it here propagates to every USE WOOL
    # shape on the body.  Direct field access avoids PROTO-internal opacity.
    self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])
 # ---------------------------------------------------------------------------
 # State
 # ---------------------------------------------------------------------------
 # --- State ---
 wander_angle = random.uniform(-math.pi, math.pi)
-step   = 0
+step_count = 0
-dog_x  = None
+dog_x, dog_y = None, None
-dog_y  = None
+peers = {}                       # name → (x, y); periodically pruned
 peers  = {}   # name → (x, y), one entry per neighbour, cleared every 30 steps
 penned = False
-# ---------------------------------------------------------------------------
+# Safety net for differential-drive sheep pinned against a wall.
-# Main loop
+_prev_x, _prev_y = None, None
-# ---------------------------------------------------------------------------
+_stuck_count = 0
 STUCK_STEPS = 20
 STUCK_DIST = 0.05
 # --- Main loop ---
 while robot.step(timestep) != -1:
-    step += 1
+    step_count += 1
    pos = gps.getValues()
    x, y = pos[0], pos[1]
-    # Pen entry: one-way latch, never unset
+    if not penned and is_penned_position(x, y):
    if not penned and PEN_X_MIN < x < PEN_X_MAX and PEN_Y_MIN < y < PEN_Y_MAX:
        penned = True
        paint_pink()
-    # Refresh peer table (clear before receiving so fresh data is never lost)
+    # Stale peers get dropped periodically so a peer that's gone silent
-    if step % 30 == 0:
+    # doesn't permanently distort the local CoM.
    if step_count % 30 == 0:
        peers.clear()
    while receiver.getQueueLength() > 0:
        msg = receiver.getString()
        receiver.nextPacket()
-        p = msg.split(":")
+        parts = msg.split(":")
-        if p[0] == "dog" and len(p) >= 3:
+        if parts[0] == "dog" and len(parts) >= 3:
-            dog_x, dog_y = float(p[1]), float(p[2])
+            dog_x, dog_y = float(parts[1]), float(parts[2])
-        elif p[0] == "sheep" and len(p) >= 4 and p[1] != name:
+        elif parts[0] == "sheep" and len(parts) >= 4 and parts[1] != name:
-            peers[p[1]] = (float(p[2]), float(p[3]))
+            peers[parts[1]] = (float(parts[2]), float(parts[3]))
-    fx, fy = 0.0, 0.0
+    dog_xy = (dog_x, dog_y) if dog_x is not None and dog_y is not None else None
    heading, speed, wander_angle = compute_heading_speed(
        x=x, y=y, penned=penned, dog_xy=dog_xy, peers=peers,
        wander_angle=wander_angle,
    )
-    # Repel unpenned sheep from the exterior of the pen's side walls so they
+    # Stuck-against-wall recovery: drive toward the field centre.
-    # don't get pinned by flee forces. Only fires when strictly outside the pen
+    if _prev_x is not None:
-    # (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range).
+        moved = math.hypot(x - _prev_x, y - _prev_y)
-    # Entrance is open on the north (y > PEN_Y_MAX) — no force there.
+        _stuck_count = _stuck_count + 1 if moved < STUCK_DIST else 0
-    PEN_EXT_MARGIN = 0.8
+    if _stuck_count >= STUCK_STEPS:
-    if not penned and PEN_Y_MIN < y < PEN_Y_MAX:
+        heading = math.atan2(-y, -x)
-        if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN:
+        speed = MAX_SPEED
-            fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0
+        _stuck_count = 0
-        if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN:
+    _prev_x, _prev_y = x, y
            fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0
    if penned:
        # Inside pen: wander freely, strong boundary forces prevent exit,
        # separation still active to avoid collisions with other penned sheep.
        pm = PEN_MARGIN
        if x < PEN_X_MIN + pm: fx += ((PEN_X_MIN + pm - x) / pm) * 15.0
        if x > PEN_X_MAX - pm: fx -= ((x - (PEN_X_MAX - pm)) / pm) * 15.0
        if y < PEN_Y_MIN + pm: fy += ((PEN_Y_MIN + pm - y) / pm) * 15.0
        if y > PEN_Y_MAX - pm: fy -= ((y - (PEN_Y_MAX - pm)) / pm) * 15.0
        for px, py in peers.values():
            dx, dy = px - x, py - y
            d = math.hypot(dx, dy)
            if 0.05 < d < SEPARATION_DIST:
                push = (SEPARATION_DIST - d) / d
                fx -= (dx / d) * push * 2.5
                fy -= (dy / d) * push * 2.5
        if random.random() < 0.02:
            wander_angle += random.uniform(-0.6, 0.6)
        fx += math.cos(wander_angle) * 0.5
        fy += math.sin(wander_angle) * 0.5
    else:
        fleeing = False
        # Flee — quadratic ramp so force grows rapidly as the dog closes in
        if dog_x is not None:
            dx   = dog_x - x
            dy   = dog_y - y
            dist = math.hypot(dx, dy)
            if 0.01 < dist < FLEE_DIST:
                fleeing = True
                t = 1.0 - dist / FLEE_DIST
                s = t * t * 20.0
                fx -= (dx / dist) * s
                fy -= (dy / dist) * s
        # Cohesion — halved while fleeing to reduce mid-panic collisions
        cx, cy, cn = 0.0, 0.0, 0
        for px, py in peers.values():
            d = math.hypot(px - x, py - y)
            if 0.3 < d < COHESION_DIST:
                cx += px; cy += py; cn += 1
        if cn > 0:
            w = 0.08 if fleeing else 0.15
            fx += (cx / cn - x) * w
            fy += (cy / cn - y) * w
        # Separation — inverse-distance: huge when nearly overlapping, fades quickly
        for px, py in peers.values():
            dx, dy = px - x, py - y
            d = math.hypot(dx, dy)
            if 0.05 < d < SEPARATION_DIST:
                push = (SEPARATION_DIST - d) / d
                fx -= (dx / d) * push * 2.5
                fy -= (dy / d) * push * 2.5
        # Walls
        if x < X_MIN + WALL_MARGIN: fx += ((X_MIN + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
        if x > X_MAX - WALL_MARGIN: fx -= ((x - (X_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
        if y < Y_MIN + WALL_MARGIN: fy += ((Y_MIN + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
        if y > Y_MAX - WALL_MARGIN: fy -= ((y - (Y_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
        # Wander — suppressed while fleeing so drift cannot deflect the flee heading
        if not fleeing:
            if random.random() < 0.02:
                wander_angle += random.uniform(-0.6, 0.6)
            fx += math.cos(wander_angle) * 0.5
            fy += math.sin(wander_angle) * 0.5
    # Hard-stop clamp: within 0.5 m of a wall, zero any force component that
    # would push further into it.  Prevents the flee force from pinning a sheep
    # against the boundary when the dog approaches from outside.
    HS = 0.5
    if x < X_MIN + HS and fx < 0: fx = 0.0
    if x > X_MAX - HS and fx > 0: fx = 0.0
    if y < Y_MIN + HS and fy < 0: fy = 0.0
    if y > Y_MAX - HS and fy > 0: fy = 0.0
    heading = math.atan2(fy, fx)
    mag     = math.hypot(fx, fy)
    speed   = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
    drive(heading, speed)
-    if step % 3 == 0:
+    if step_count % 3 == 0:
        emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
@@ -0,0 +1,90 @@
 """Lazy SB3 policy loader for the dog controller.
 SB3 is imported only when a learned policy is actually requested,
 so the analytic modes can run on installs without stable-baselines3
 or torch.
 The handle auto-detects frame stacking from the policy's expected
 observation dimension: if it's a multiple of the single-frame
 ``OBS_DIM``, an internal buffer of the last K frames is maintained
 and concatenated on each ``predict`` call.
 """
 import os
 from pathlib import Path
 class PolicyHandle:
    """Wrap a loaded policy (+ optional VecNormalize) for ``predict(obs)``."""
    def __init__(self, model, vecnorm):
        self.model = model
        self.vecnorm = vecnorm
        from herding.perception.obs import OBS_DIM
        policy_dim = int(model.observation_space.shape[0])
        if policy_dim % OBS_DIM == 0 and policy_dim // OBS_DIM >= 1:
            self.frame_stack = policy_dim // OBS_DIM
        else:
            self.frame_stack = 1
        self._buffer: list = []
        self._single_dim = OBS_DIM
    def predict(self, obs):
        import numpy as np
        single = np.asarray(obs, dtype=np.float32).reshape(-1)
        if single.shape[0] != self._single_dim:
            # Caller passed an already-stacked obs.
            stacked = single
        elif self.frame_stack > 1:
            if not self._buffer:
                self._buffer = [single.copy() for _ in range(self.frame_stack)]
            else:
                self._buffer.append(single)
                if len(self._buffer) > self.frame_stack:
                    self._buffer = self._buffer[-self.frame_stack:]
            stacked = np.concatenate(self._buffer, axis=0)
        else:
            stacked = single
        obs_b = stacked.reshape(1, -1)
        if self.vecnorm is not None:
            obs_b = self.vecnorm.normalize_obs(obs_b)
        action, _ = self.model.predict(obs_b, deterministic=True)
        return action[0]
 def load(model_path: str, vecnorm_path: str | None = None) -> PolicyHandle:
    """Load a policy zip (+ optional VecNormalize pickle) from disk.
    ``model_path`` may be a ``.zip`` file or a directory; in the
    latter case ``policy.zip`` is preferred, with ``final.zip`` as
    a fallback for partially-completed RL runs.
    """
    p = Path(model_path)
    if p.is_dir():
        zip_candidates = [p / "policy.zip", p / "final.zip"]
        zip_path = next((z for z in zip_candidates if z.exists()), None)
        if zip_path is None:
            raise FileNotFoundError(
                f"No policy zip in {p} (looked for policy.zip, final.zip)"
            )
        if vecnorm_path is None:
            vn = p / "vecnormalize.pkl"
            if vn.exists():
                vecnorm_path = str(vn)
    else:
        zip_path = p
    # Deferred imports so the analytic path doesn't require SB3.
    from stable_baselines3 import PPO
    from stable_baselines3.common.vec_env import VecNormalize  # noqa: F401
    model = PPO.load(str(zip_path), device="auto")
    vecnorm = None
    if vecnorm_path and os.path.exists(vecnorm_path):
        import pickle
        with open(vecnorm_path, "rb") as f:
            vecnorm = pickle.load(f)
        vecnorm.training = False
        vecnorm.norm_reward = False
    return PolicyHandle(model=model, vecnorm=vecnorm)
@@ -1,88 +1,408 @@
-"""
+"""Shepherd Dog controller (Webots).
 Shepherd Dog controller (Webots, manual keyboard control).
-WASD / arrow keys drive the robot.  +/- adjust speed in 10 % increments.
+Mode is selected by ``HERDING_MODE`` (env var, or via the
-GPS position is broadcast every step on channel 1 so sheep controllers
+``herding_runtime.cfg`` file the launcher writes since Webots strips
-can compute flee forces.  Ears wag continuously via sinusoidal position
+env vars on some setups):
-targets — purely cosmetic.
+
    strombom    → canonical Strömbom (2014) collect/drive heuristic
                  wrapped in ActiveScanTeacher (opening rotation +
                  walk-to-centre when the tracker briefly empties).
    sequential  → single-target "pin-and-push", same wrapper.
    bc          → behaviour-cloned MLP, trained on Strömbom demos.
                  Default policy: training/runs/bc/policy.zip.
    rl          → KL-regularised PPO fine-tune of bc. Same obs/action
                  space as bc; refines time-to-pen via reward while
                  staying anchored to bc.
                  Default policy: training/runs/rl/policy.zip.
 Sheep perception
 ----------------
 The dog perceives sheep through its **front-mounted 140° LiDAR**
 (``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step:
    1. Reads ``lidar.getRangeImage()``.
    2. Runs ``herding.perception.lidar_perception.detections_from_scan``
       to cluster returns into world-frame ``(x, y)`` sheep estimates.
    3. Folds those into a ``SheepTracker`` which maintains last-seen
       positions for sheep currently out of FOV and latches "penned"
       once a track crosses the gate plane south.
 Sheep ``emitter`` messages are read **for diagnostic logging only**
 (GT_penned counter + auto-finish sentinel); they are never used to
 drive the policy. Perception for control comes entirely from LiDAR.
 Auto-finish
 -----------
 When the dog observes (via GT, read off the receiver) that all sheep
 are penned, it writes ``training/.run_done`` and the launcher
 (``tools/run_webots.sh``) detects it and closes Webots. This keeps
 batch evaluation runs bounded.
 """
 import math
-from controller import Robot, Keyboard
+import os
 import sys
 # --- Make the shared herding/ package importable from this controller dir ---
 _HERE = os.path.dirname(os.path.abspath(__file__))
 _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
 if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)
 # --- Read runtime cfg early so env vars are set before geometry import ---
 def _load_runtime_config():
    cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
    if not os.path.exists(cfg_path):
        return {}
    out = {}
    try:
        with open(cfg_path) as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith("#") or "=" not in line:
                    continue
                k, _, v = line.partition("=")
                out[k.strip().upper()] = v.strip()
    except OSError:
        return {}
    return out
 _runtime_cfg = _load_runtime_config()
 # Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
 for _rk, _rv in _runtime_cfg.items():
    if _rk.startswith("HERDING_") and _rk not in os.environ:
        os.environ[_rk] = _rv
 import numpy as np
 from controller import Robot
 from herding.control.active_scan import ActiveScanTeacher
 from herding.control.modulation import modulate_speed_near_sheep
 from herding.control.sequential import compute_action as sequential_action
 from herding.control.strombom import compute_action as strombom_action
 from herding.control.universal import compute_action as universal_action
 from herding.perception.obs import build_obs
 from herding.perception.lidar_perception import detections_from_scan
 from herding.perception.sheep_tracker import SheepTracker
 from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
 from herding.world.geometry import (
    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
    DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
    DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
    PEN_ENTRY, is_penned_position,
 )
 # ---------------------------------------------------------------------------
 # Mode + policy resolution (cfg already loaded above)
 # ---------------------------------------------------------------------------
 MODE = (os.environ.get("HERDING_MODE")
        or _runtime_cfg.get("HERDING_MODE")
        or "bc").lower()
 def _resolve_policy_dir(mode: str) -> str:
    """Where to look for the trained policy for the given mode.
    Priority:
      1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
         to a real directory.
      2. Drive-mode-specific default:
            bc → training/runs/bc_differential (or bc_mecanum)
            rl → training/runs/rl_differential (or rl_mecanum)
      3. Legacy path (no drive suffix):
            bc → training/runs/bc
            rl → training/runs/rl
    """
    env_dir = (os.environ.get("HERDING_POLICY_DIR")
               or _runtime_cfg.get("HERDING_POLICY_DIR"))
    if env_dir and os.path.isdir(env_dir):
        return env_dir
    drive = DRIVE_MODE
    mode_default = {
        "bc": os.path.join(_PROJECT_ROOT, "training", "runs",
                           f"bc_{drive}"),
        "rl": os.path.join(_PROJECT_ROOT, "training", "runs",
                           f"rl_{drive}"),
    }
    primary = mode_default.get(mode, mode_default["bc"])
    if os.path.isdir(primary):
        return primary
    # Fallback: legacy paths without drive suffix.
    legacy = {
        "bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
        "rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
    }
    fallback = legacy.get(mode, legacy["bc"])
    if os.path.isdir(fallback):
        return fallback
    return env_dir or primary
 _VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
 if MODE not in _VALID_MODES:
    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
    MODE = "strombom"
 POLICY_DIR = _resolve_policy_dir(MODE)
 policy_handle = None
 if MODE in ("bc", "rl"):
    print(f"[dog] resolved POLICY_DIR={POLICY_DIR}  exists={os.path.isdir(POLICY_DIR)}")
    try:
        from policy_loader import load as _load_policy
        policy_handle = _load_policy(POLICY_DIR)
        print(f"[dog] policy loaded from {POLICY_DIR}")
    except Exception as exc:
        print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
        MODE = "strombom"
 print(f"[dog] running in mode={MODE}")
 # Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
 DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
              or _runtime_cfg.get("HERDING_DRIVE")
              or "differential").lower()
 if DRIVE_MODE not in ("differential", "mecanum"):
    print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
    DRIVE_MODE = "differential"
 print(f"[dog] drive mode={DRIVE_MODE}")
 # ---------------------------------------------------------------------------
 # Control parameters
 # ---------------------------------------------------------------------------
 ACTION_SMOOTH = 0.55           # EMA on (vx, vy) — kills frame-to-frame jitter
 RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")
 def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
    """If the dog is near the south barrier and the action would push it
    further south, override with a northward action. Hard invariant: the
    dog never enters the pen."""
    if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
        return (0.0, 1.0)
    if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
        return (vx * 0.5, max(0.0, vy + 0.5))
    return (vx, vy)
 def drive_diff(vx: float, vy: float, left_motor, right_motor,
               compass, motor_max: float):
    if math.hypot(vx, vy) < 1e-3:
        left_motor.setVelocity(0.0)
        right_motor.setVelocity(0.0)
        return
    n = compass.getValues()
    h = math.atan2(n[0], n[1])
    left, right = velocity_to_wheels(
        vx, vy, h,
        max_linear=DOG_MAX_LINEAR,
        wheel_radius=DOG_WHEEL_RADIUS,
        max_wheel_omega=motor_max,
        k_turn=4.0,
    )
    left_motor.setVelocity(left)
    right_motor.setVelocity(right)
 def drive_mecanum(vx: float, vy: float, omega: float,
                  fl_motor, fr_motor, rl_motor, rr_motor,
                  compass, motor_max: float):
    if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
        fl_motor.setVelocity(0.0)
        fr_motor.setVelocity(0.0)
        rl_motor.setVelocity(0.0)
        rr_motor.setVelocity(0.0)
        return
    n = compass.getValues()
    h = math.atan2(n[0], n[1])
    w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
        vx, vy, omega, h,
        max_linear=DOG_MAX_LINEAR,
        wheel_radius=DOG_WHEEL_RADIUS,
        lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
        max_wheel_omega=motor_max,
        k_turn=4.0,
        wheel_base=DOG_WHEEL_BASE,
    )
    fl_motor.setVelocity(w_fl)
    fr_motor.setVelocity(w_fr)
    rl_motor.setVelocity(w_rl)
    rr_motor.setVelocity(w_rr)
 # ---------------------------------------------------------------------------
 # Webots devices
 # ---------------------------------------------------------------------------
 robot = Robot()
 timestep = int(robot.getBasicTimeStep())
 if DRIVE_MODE == "mecanum":
    fl_motor = robot.getDevice("front left wheel motor")
    fr_motor = robot.getDevice("front right wheel motor")
    rl_motor = robot.getDevice("rear left wheel motor")
    rr_motor = robot.getDevice("rear right wheel motor")
    for m in (fl_motor, fr_motor, rl_motor, rr_motor):
        m.setPosition(float("inf"))
        m.setVelocity(0.0)
    MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
 else:
    left_motor = robot.getDevice("left wheel motor")
    right_motor = robot.getDevice("right wheel motor")
    left_motor.setPosition(float("inf"))
    right_motor.setPosition(float("inf"))
    left_motor.setVelocity(0.0)
    right_motor.setVelocity(0.0)
-
+    MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
 lidar = robot.getDevice("lidar")
 lidar.enable(timestep)
 lidar.enablePointCloud()
 gps = robot.getDevice("gps");           gps.enable(timestep)
 compass = robot.getDevice("compass");   compass.enable(timestep)
 emitter = robot.getDevice("emitter")
 receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter = robot.getDevice("emitter")
 lidar = robot.getDevice("lidar");       lidar.enable(timestep)
 tracker = SheepTracker()
 # Cosmetic ear motors — animated; not used by control.
 left_ear = robot.getDevice("left ear motor")
 right_ear = robot.getDevice("right ear motor")
 left_ear.setPosition(float("inf"))
 right_ear.setPosition(float("inf"))
 left_ear.setVelocity(0.0)
 right_ear.setVelocity(0.0)
 keyboard = robot.getKeyboard()
 keyboard.enable(timestep)
 MOTOR_MAX   = left_motor.getMaxVelocity()
 speed_level = 0.5   # fraction of MOTOR_MAX; adjusted by +/-
 EAR_AMPLITUDE = 0.35   # rad, peak ear deflection
 EAR_RATE      = 8.0    # rad/s, how fast the ears are driven
 ear_phase = 0.0
 EAR_AMPLITUDE = 0.35
 EAR_RATE = 8.0
 # ---------------------------------------------------------------------------
 # Main loop
 # ---------------------------------------------------------------------------
 # Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay
 # the import-time cost). Each gets the same ActiveScanTeacher treatment:
 # rotate-on-empty, walk-to-centre, near-sheep speed modulation.
 analytic_teacher = None
 if MODE in ("strombom", "sequential"):
    base_fn = strombom_action if MODE == "strombom" else sequential_action
    analytic_teacher = ActiveScanTeacher(base_fn)
 elif MODE == "universal":
    analytic_teacher = ActiveScanTeacher(universal_action)
 # GT positions from sheep emitters — used **only** for the auto-finish
 # sentinel and the GT_penned diagnostic line. Never fed into control.
 _gt_sheep: dict = {}
 _run_done = False
 prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
 step_count = 0
 while robot.step(timestep) != -1:
-    speed = MOTOR_MAX * speed_level
+    step_count += 1
    turn  = speed * 0.6   # differential turn radius
-    left_vel  = 0.0
+    # Drain sheep emitter messages → GT (diagnostic only).
-    right_vel = 0.0
+    while receiver.getQueueLength() > 0:
-    key = keyboard.getKey()
+        msg = receiver.getString()
-    while key > 0:
+        receiver.nextPacket()
-        if   key in (ord('W'), Keyboard.UP):
+        parts = msg.split(":")
-            left_vel  = speed
+        if len(parts) == 4 and parts[0] == "sheep":
-            right_vel = speed
+            try:
-        elif key in (ord('S'), Keyboard.DOWN):
+                _gt_sheep[parts[1]] = (float(parts[2]), float(parts[3]))
-            left_vel  = -speed
+            except ValueError:
-            right_vel = -speed
+                pass
        elif key in (ord('A'), Keyboard.LEFT):
            left_vel  = -turn
            right_vel =  turn
        elif key in (ord('D'), Keyboard.RIGHT):
            left_vel  =  turn
            right_vel = -turn
        elif key in (ord('+'), ord('=')):
            speed_level = min(1.0, speed_level + 0.1)
            print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
        elif key in (ord('-'), ord('_')):
            speed_level = max(0.1, speed_level - 0.1)
            print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
        key = keyboard.getKey()
    left_motor.setVelocity(left_vel)
    right_motor.setVelocity(right_vel)
    pos = gps.getValues()
-    emitter.send(f"dog:{pos[0]}:{pos[1]}")
+    dog_xy = (pos[0], pos[1])
    n = compass.getValues()
    dog_heading = math.atan2(n[0], n[1])
    # ---- LiDAR perception → tracker → active sheep positions ----
    ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
    detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading)
    sheep_positions = tracker.update(detections)
    sheep_xy_list = list(sheep_positions.values())
    sheep_penned_list = [False] * len(sheep_xy_list)
    single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
    # ---- Action selection ----
    omega = 0.0
    if MODE in ("bc", "rl") and policy_handle is not None:
        action = policy_handle.predict(single_obs)
        vx, vy = float(action[0]), float(action[1])
        if DRIVE_MODE == "mecanum" and len(action) >= 3:
            omega = float(action[2])
    else:
        result = analytic_teacher(
            dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
            DRIVE_MODE,
        )
        if len(result) == 4:
            vx, vy, omega, _mode_str = result
        else:
            vx, vy, _mode_str = result
    # Near-sheep speed modulation (shared by every mode).
    vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
    # EMA smoothing — kills frame-to-frame action jitter.
    if DRIVE_MODE == "mecanum":
        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
        omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
    else:
        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
    # Safety: dog must never enter the pen.
    vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
    prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)
    if DRIVE_MODE == "mecanum":
        drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
                      compass, MOTOR_MAX)
    else:
        drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
    emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
    # Cosmetic ear wiggle.
    ear_phase += 0.12
    ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
    left_ear.setVelocity(EAR_RATE)
    right_ear.setVelocity(EAR_RATE)
    left_ear.setPosition(ear_pos)
    right_ear.setPosition(-ear_pos)
    # Auto-finish: when all GT sheep are penned, write the sentinel.
    # The launcher polls for it and closes Webots so batch evals don't
    # hang after the task is done. Bounded by `_gt_sheep` so we don't
    # fire during the first few steps while the receiver fills.
    if _gt_sheep and not _run_done:
        gt_active = sum(1 for x, y in _gt_sheep.values()
                        if not is_penned_position(x, y))
        if gt_active == 0:
            os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True)
            open(RUN_DONE_FILE, "w").close()
            _run_done = True
            print(f"[dog] all {len(_gt_sheep)} sheep penned at step "
                  f"{step_count} — wrote sentinel, launcher will close Webots")
    if step_count % 200 == 0:
        gt_penned = sum(1 for x, y in _gt_sheep.values()
                        if is_penned_position(x, y))
        gt_total = len(_gt_sheep)
        print(f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
              f"GT_penned={gt_penned}/{gt_total} "
              f"tracks_active={tracker.n_active()} "
              f"tracks_penned={tracker.n_penned()} "
              f"detections={len(detections)} "
              f"action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})"
              if DRIVE_MODE == "mecanum" else
              f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
              f"GT_penned={gt_penned}/{gt_total} "
              f"tracks_active={tracker.n_active()} "
              f"tracks_penned={tracker.n_penned()} "
              f"detections={len(detections)} action=({vx:+.2f}, {vy:+.2f})")
@@ -1,153 +0,0 @@
 """
 Render Webots-side debug trajectory from debug.csv.
 The shepherd_dog_rl controller writes per-step state to debug.csv when
 DOG_DEBUG=1. This script reads it and produces:
  trajectory.png   — dog path + sheep paths overlaid on the field
  obs_drift.png    — normalized observation distribution over time
  actions.png      — vx, vy time series
 Run:
    python plot_debug.py                    # uses debug.csv next to this file
    python plot_debug.py --csv path/to.csv --out-dir somewhere/
 """
 import argparse
 import csv
 import os
 import sys
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 import numpy as np
 def load_csv(path):
    rows = []
    with open(path) as f:
        rd = csv.DictReader(f)
        for r in rd:
            rows.append(r)
    if not rows:
        sys.exit(f"empty CSV: {path}")
    return rows
 def parse_floats(s):
    return [float(x) for x in s.split(";") if x]
 def plot_trajectory(rows, out_path):
    fig, ax = plt.subplots(figsize=(7, 7))
    ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
    ax.set_facecolor("#dcedc8")
    ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
                 fill=False, edgecolor="#795548", lw=2))
    ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
                 facecolor="#ffe082", edgecolor="#795548", lw=2))
    ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
    dog_x = [float(r["dog_x"]) for r in rows]
    dog_y = [float(r["dog_y"]) for r in rows]
    ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
    ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
    ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
    # Sheep — re-shape into per-sheep tracks
    sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
    sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
    if sx_all and sx_all[-1]:
        n_sheep = len(sx_all[-1])
        palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
                   "#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
        for i in range(n_sheep):
            xs = [r[i] if i < len(r) else None for r in sx_all]
            ys = [r[i] if i < len(r) else None for r in sy_all]
            xs = [x for x in xs if x is not None]
            ys = [y for y in ys if y is not None]
            if xs:
                c = palette[i % len(palette)]
                ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
                ax.plot(xs[0], ys[0], "o", color=c, ms=6)
                ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
    n_in_pen = int(rows[-1]["n_penned"])
    ax.set_title(f"Webots trajectory  {len(rows)} steps  penned={n_in_pen}",
                 fontsize=12)
    ax.legend(loc="upper left", fontsize=7, ncol=2)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_actions(rows, out_path):
    t = np.arange(len(rows))
    vx = np.array([float(r["vx"]) for r in rows])
    vy = np.array([float(r["vy"]) for r in rows])
    mag = np.sqrt(vx ** 2 + vy ** 2)
    fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
    axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
    axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
    axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
    axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
    axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
    axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
    axes[2].axhline(1.0, color="gray", ls="--", lw=1)
    axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
    fig.suptitle("Webots action time series")
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_obs(rows, out_path):
    norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
    raw  = np.array([parse_floats(r["raw_obs"])  for r in rows])
    if norm.size == 0:
        return
    n_dims = norm.shape[1]
    labels = [
        "dog_x", "dog_y", "com-dog_x", "com-dog_y",
        "far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
        "far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
        "pen-far1_x", "pen-far1_y", "radius", "frac_active",
    ][:n_dims]
    t = np.arange(norm.shape[0])
    fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
    if n_dims == 1: axes = [axes]
    for i in range(n_dims):
        axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
        axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
        axes[i].set_ylabel(labels[i], fontsize=8)
        axes[i].tick_params(labelsize=7)
        if i == 0:
            axes[i].legend(fontsize=7, loc="upper right")
    axes[-1].set_xlabel("step")
    fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
    plt.tight_layout()
    fig.savefig(out_path, dpi=110)
    plt.close(fig)
 def main():
    p = argparse.ArgumentParser()
    here = os.path.dirname(os.path.abspath(__file__))
    p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
    p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
    args = p.parse_args()
    rows = load_csv(args.csv)
    os.makedirs(args.out_dir, exist_ok=True)
    print(f"loaded {len(rows)} rows from {args.csv}")
    plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
    plot_actions(rows,    os.path.join(args.out_dir, "actions.png"))
    plot_obs(rows,        os.path.join(args.out_dir, "obs.png"))
    print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
 if __name__ == "__main__":
    main()
@@ -1,285 +0,0 @@
 """
 Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
 Setup
 -----
 1. Copy your trained files into this directory:
       controllers/shepherd_dog_rl/final_model.zip
       controllers/shepherd_dog_rl/vecnorm.pkl
 2. In field.wbt, set the ShepherdDog robot's controller field to
   "shepherd_dog_rl".  You can do this in the Webots GUI:
       click the robot → Controller → shepherd_dog_rl
 3. Optional: set controllerArgs to ["5"] (number of sheep) if it differs
   from the default of 5.
 The controller reads GPS (dog position) and Receiver (sheep broadcasts),
 builds the same 16-dim flock observation the training env used, normalises
 it with the saved VecNormalize stats, and converts the (vx, vy) policy
 output into differential wheel speeds.
 Debug logging
 -------------
 Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
 raw obs, normalised obs, action) to debug.csv alongside this script. Use
 plot_debug.py to render trajectories from it.
 """
 import sys
 import os
 import math
 import struct
 import numpy as np
 # ── make training code importable ───────────────────────────────────────────
 _HERE = os.path.dirname(os.path.abspath(__file__))
 _TRAINING = os.path.join(_HERE, "..", "..", "training")
 sys.path.insert(0, _TRAINING)
 from controller import Robot
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
 from herding_env import HerdingEnv
 # ── constants (must match herding_env.py) ───────────────────────────────────
 FIELD      = 15.0
 PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
 PEN_X      = (10.0, 13.0)
 PEN_Y      = (-15.0, -8.0)
 DOG_SPEED  = 2.5         # m/s
 WHEEL_R    = 0.038       # wheel radius (metres) — from ShepherdDog.proto
 K_TURN     = 4.0         # heading-error gain (rad/s per rad)
 EAR_AMPLITUDE = 0.35
 EAR_RATE      = 8.0
 # ── model paths ─────────────────────────────────────────────────────────────
 MODEL_PATH   = os.path.join(_HERE, "final_model.zip")
 VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
 DEBUG_CSV    = os.path.join(_HERE, "debug.csv")
 DEBUG_ENABLED = True   # set False to disable debug.csv logging
 # ── action smoothing ─────────────────────────────────────────────────────────
 # EMA on policy output to suppress the rapid oscillation (vx/vy flipping
 # between -1 and +1 every step) that stalls the physical dog.  0 = no
 # smoothing (raw policy), 1 = frozen.  0.3 keeps ~30% of previous action.
 ACTION_SMOOTH = 0.3
 prev_action   = np.zeros(2, dtype=np.float32)
 def norm_angle(a: float) -> float:
    while a >  math.pi: a -= 2 * math.pi
    while a < -math.pi: a += 2 * math.pi
    return a
 def in_pen(x: float, y: float) -> bool:
    return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
 def build_obs(dog_pos: np.ndarray,
              sheep_dict: dict,
              n_sheep: int,
              dog_heading: float = 0.0) -> np.ndarray:
    """
    Build the 18-dim flock observation — identical to HerdingEnv._obs().
    sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
    dog_heading: dog's current world-frame heading in radians.
    """
    D = 2 * FIELD
    # Split active vs penned
    active_pos = np.array(
        [v for v in sheep_dict.values() if not in_pen(*v)],
        dtype=np.float32
    )
    n_active = len(active_pos)
    if n_active > 0:
        com        = active_pos.mean(axis=0)
        d_from_com = np.linalg.norm(active_pos - com, axis=1)
        sorted_idx = np.argsort(d_from_com)[::-1]
        radius     = float(d_from_com[sorted_idx[0]])
        def nth(n):
            return active_pos[sorted_idx[n]] if len(sorted_idx) > n else com
        far1, far2, far3 = nth(0), nth(1), nth(2)
    else:
        com = PEN_CENTER.copy()
        radius = 0.0
        far1 = far2 = far3 = PEN_CENTER.copy()
    frac_active = n_active / max(n_sheep, 1)
    return np.array([
        dog_pos[0] / FIELD,  dog_pos[1] / FIELD,
        (com[0]  - dog_pos[0]) / D, (com[1]  - dog_pos[1]) / D,
        (far1[0] - com[0]) / D, (far1[1] - com[1]) / D,
        (far2[0] - com[0]) / D, (far2[1] - com[1]) / D,
        (far3[0] - com[0]) / D, (far3[1] - com[1]) / D,
        (PEN_CENTER[0] - com[0])  / D, (PEN_CENTER[1] - com[1])  / D,
        (PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
        radius / D,
        frac_active,
        math.cos(dog_heading), math.sin(dog_heading),
    ], dtype=np.float32)
 # ── Webots setup ─────────────────────────────────────────────────────────────
 robot    = Robot()
 timestep = int(robot.getBasicTimeStep())
 # Drive motors
 left_motor  = robot.getDevice("left wheel motor")
 right_motor = robot.getDevice("right wheel motor")
 left_motor.setPosition(float("inf"))
 right_motor.setPosition(float("inf"))
 left_motor.setVelocity(0.0)
 right_motor.setVelocity(0.0)
 MOTOR_MAX = left_motor.getMaxVelocity()
 # Sensors
 gps      = robot.getDevice("gps");      gps.enable(timestep)
 compass  = robot.getDevice("compass");  compass.enable(timestep)
 receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter  = robot.getDevice("emitter")
 # Cosmetic
 left_ear  = robot.getDevice("left ear motor")
 right_ear = robot.getDevice("right ear motor")
 left_ear.setPosition(float("inf"));  right_ear.setPosition(float("inf"))
 left_ear.setVelocity(0.0);           right_ear.setVelocity(0.0)
 ear_phase = 0.0
 # Number of sheep (from controllerArgs or default)
 try:
    n_sheep = int(sys.argv[1])
 except (IndexError, ValueError):
    n_sheep = 3
 # ── Load model ───────────────────────────────────────────────────────────────
 print(f"[RL dog] Loading model from {MODEL_PATH}")
 print(f"[RL dog] Loading vecnorm from {VECNORM_PATH}")
 dummy_env = DummyVecEnv([lambda: HerdingEnv(n_sheep=n_sheep)])
 vecnorm   = VecNormalize.load(VECNORM_PATH, dummy_env)
 vecnorm.training    = False
 vecnorm.norm_reward = False
 model = PPO.load(MODEL_PATH, device="cpu")
 print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
 # ── Runtime state ─────────────────────────────────────────────────────────────
 sheep_positions: dict = {}   # {name: (x, y)} — updated every step from receiver
 step_count = 0
 # Debug CSV — written every step when DOG_DEBUG=1
 debug_file = None
 if DEBUG_ENABLED:
    import csv
    debug_file = open(DEBUG_CSV, "w", newline="")
    debug_writer = csv.writer(debug_file)
    debug_writer.writerow([
        "step", "dog_x", "dog_y", "heading",
        "sheep_xs", "sheep_ys", "n_active", "n_penned",
        "raw_obs", "norm_obs", "vx", "vy",
    ])
    print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
 def bearing() -> float:
    """Current robot heading in world frame (radians)."""
    n = compass.getValues()
    return math.atan2(n[0], n[1])
 def drive(action_vx: float, action_vy: float) -> None:
    """Convert (vx, vy) policy action to differential wheel speeds."""
    speed_ms = math.sqrt(action_vx ** 2 + action_vy ** 2) * DOG_SPEED
    if speed_ms < 0.05:
        left_motor.setVelocity(0.0)
        right_motor.setVelocity(0.0)
        return
    target_heading = math.atan2(action_vy, action_vx)
    err = norm_angle(target_heading - bearing())
    fwd_ms  = speed_ms * max(0.0, math.cos(err))
    fwd_rad = fwd_ms / WHEEL_R
    turn    = K_TURN * err    # rad/s correction
    l = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad - turn))
    r = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad + turn))
    left_motor.setVelocity(l)
    right_motor.setVelocity(r)
 # ── Main loop ─────────────────────────────────────────────────────────────────
 while robot.step(timestep) != -1:
    step_count += 1
    # 1. Drain receiver — update sheep position table
    while receiver.getQueueLength() > 0:
        try:
            msg = receiver.getString()
            parts = msg.split(":")
            if parts[0] == "sheep" and len(parts) == 4:
                sheep_positions[parts[1]] = (float(parts[2]), float(parts[3]))
        except Exception:
            pass
        receiver.nextPacket()
    # 2. Dog GPS
    gps_vals = gps.getValues()
    dog_pos  = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)
    # 3. Build and normalise observation (heading from compass)
    raw_obs  = build_obs(dog_pos, sheep_positions, n_sheep,
                         dog_heading=bearing())
    obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis])  # (1, 13)
    # 4. Policy inference + smoothing
    action, _ = model.predict(obs_norm, deterministic=True)
    raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
    if ACTION_SMOOTH > 0:
        smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
        prev_action[:] = smoothed
        vx, vy = float(smoothed[0]), float(smoothed[1])
    else:
        vx, vy = float(raw_a[0]), float(raw_a[1])
    # 5. Drive
    drive(vx, vy)
    # 6. Broadcast dog position so sheep can compute flee forces
    emitter.send(f"dog:{dog_pos[0]:.4f}:{dog_pos[1]:.4f}")
    # 7. Ear animation
    ear_phase += 0.12
    ep = EAR_AMPLITUDE * math.sin(ear_phase)
    left_ear.setVelocity(EAR_RATE);  right_ear.setVelocity(EAR_RATE)
    left_ear.setPosition( ep);        right_ear.setPosition(-ep)
    # Periodic status
    if step_count % 100 == 0:
        n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
        print(f"[RL dog] step={step_count}  known_sheep={len(sheep_positions)}"
              f"  penned={n_in_pen}/{n_sheep}  dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
              f"  action=({vx:.2f}, {vy:.2f})")
    # Debug CSV row
    if debug_file is not None:
        n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
        n_in_pen = len(sheep_positions) - n_active
        debug_writer.writerow([
            step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
            f"{bearing():.4f}",
            ";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
            ";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
            n_active, n_in_pen,
            ";".join(f"{x:.4f}" for x in raw_obs),
            ";".join(f"{x:.4f}" for x in obs_norm[0]),
            f"{vx:.4f}", f"{vy:.4f}",
        ])
        if step_count % 200 == 0:
            debug_file.flush()
@@ -1,33 +1,37 @@
 # Group G25 - Formal & Title & Goals
 This is the original course proposal/goals document. For current setup,
 training, evaluation, and Webots run instructions, see `../README.md`
 and `../training/README.md`.
 ## Team members
 - Diogo Costa <up202502576@up.pt>
 - Johnny Fernandes <up202402612@up.pt>
 - Nelson Neto <up202108117@up.pt>
 ## (i) Title and General objectives
-**RL-Based Autonomous Shepherd Robot for Livestock Herding**
+**Autonomous Shepherd Robot for Livestock Herding (Strömbom)**
 - Implement effective herding behaviors through proximity and movement strategies
 - Build a 3D environment with realistic robot dynamics and LIDAR-based perception
- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using Reinforcement Learning
+- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using the Strömbom heuristic approach
 # Group G25 - (ii) Intermediate Goals
 ## Intermediate goals
 - Set up the Webots simulation environment with an open field and target zone
- Implement lightweight Gymnasium-based 2D herding environment
+- Implement lightweight 2D herding environment for algorithm evaluation
 - Design a Sheep and Dog robot
- Implement a sheep flocking model for fast RL iteration
+- Implement a sheep flocking model for fast Strömbom iteration
 - Validate LiDAR sensor feedback for sheep detection and distance estimation
 # Group G25 - Course Project (Final) Goals
 ## (iii) Main goals
- State-of-the-art survey on shepherding algorithms and multi-agent RL herding
+- State-of-the-art survey on shepherding algorithms with focus on Strömbom herding
- Train the robot using PPO to successfully herd a single sheep into the goal
+- Implement and tune Strömbom controller to successfully herd a single sheep into the goal
 - Achieve fully autonomous herding of multiple sheep and a full flock into the target area
 - Optimize robot trajectory to minimize the time required to group the flock
 - Ensure zero collisions between the robot and the sheep during the task
@@ -35,7 +39,7 @@
 - Article, demo video, and final presentation
 ## (iv) Extra Merit
- Curriculum Learning (scaling from 1 sheep to a flock)
+- Progressive evaluation (scaling from 1 sheep to a flock)
 - Comparison of performance between Differential Drive and Mecanum wheels
 - Robustness testing under sensor noise or varying sheep speeds, configurations and parameters
 - Multi-shepherd cooperative mode: 2 dogs learn role specialization (collector vs. driver)
@@ -46,11 +50,10 @@
 ## (v) Tools
 - Webots for 3D physics simulation with ROS2 integration via `webots_ros2` package
- Stable-Baselines3 for the PPO algorithm implementation
+- Gymnasium (OpenAI) for the simulation wrapper and evaluation tooling
 - Gymnasium (OpenAI) for the RL environment wrapper (lightweight 2D herding env for fast RL training)
 - Python as the primary programming language (sheep flocking model, reward shaping, evaluation)
 ## (vi) Limitations
- Computational Power: Training time might be high for complex flock behaviors
+- Computational Power: Large batch evaluation and parameter sweeps can still be time-consuming
 - Sim-to-Real Gap: No real-world validation of the herding controller; project is simulation-only (2D + Webots 3D)
 - Model Complexity: Simplified sheep behavior (scripted) may not account for all biological livestock nuances
@@ -0,0 +1,8 @@
 """Shared core for the shepherd herding project.
 This package is the single source of truth for world geometry, sheep
 flocking dynamics, differential-drive kinematics, observation building,
 and the Strömbom heuristic. It is imported both by the Webots
 controllers (for inference) and by the Gymnasium training environment
 (for fast PPO rollouts), so the two paths cannot drift apart.
 """
@@ -0,0 +1,122 @@
 """Active-perception wrapper for the analytic shepherd teachers.
 Under partial-observability LiDAR perception the tracker starts empty
 — a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
 stops. This wrapper interleaves the underlying teacher with two
 exploration behaviours:
 * opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
  guaranteeing the LiDAR sweeps a full circle before driving;
 * walk-to-centre when the tracker has been empty for at least
  ``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
  beyond the 12 m LiDAR range).
 When the tracker has detections the base teacher's action is used,
 post-processed by ``modulate_speed_near_sheep`` so the dog doesn't
 charge the flock.
 """
 from __future__ import annotations
 import math
 from herding.control.modulation import modulate_speed_near_sheep
 INITIAL_SCAN_STEPS = 80         # ≈1.3 s — covers one full rotation
 EXPLORE_SPEED = 0.7             # action norm while walking blind
 EMPTY_DEBOUNCE_STEPS = 8        # consecutive empty frames before exploring
 class ActiveScanTeacher:
    """Stateful wrapper. Construct one per episode (or call ``reset``).
    Call signature::
        vx, vy, omega, mode = teacher(dog_xy, dog_heading, sheep_positions,
                                      pen_target, drive_mode="differential")
    ``omega`` is the yaw-rate intent (mecanum only); 0.0 for differential
    drive and during blind exploration phases.
    """
    def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
        self.base = base_action_fn
        self.initial_scan = int(initial_scan_steps)
        self.reset()
    def reset(self) -> None:
        self.step = 0
        self.empty_streak = 0
        self.last_action: tuple[float, float] = (0.0, 0.0)
    @staticmethod
    def _scan_action(dog_heading: float) -> tuple[float, float]:
        # Target opposite to current heading; velocity_to_wheels'
        # cos(err) clamp drives forward speed to ~0 → in-place rotation.
        target = dog_heading + math.pi
        return math.cos(target), math.sin(target)
    @staticmethod
    def _explore_action(dog_xy) -> tuple[float, float]:
        """Walk toward (0, 0) while the LiDAR keeps sweeping."""
        dx, dy = -dog_xy[0], -dog_xy[1]
        d = math.hypot(dx, dy)
        if d < 0.5:
            return 0.0, 0.0
        return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
    def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target,
                 drive_mode="differential"):
        self.step += 1
        n_visible = len(sheep_positions)
        if n_visible == 0:
            self.empty_streak += 1
        else:
            self.empty_streak = 0
        # Phase 1: opening rotation.
        if self.step <= self.initial_scan:
            vx, vy = self._scan_action(dog_heading)
            self.last_action = (vx, vy)
            return vx, vy, 0.0, "scan_initial"
        # Phase 2: walk-to-centre after a sustained empty tracker.
        if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
            ex, ey = self._explore_action(dog_xy)
            if ex == 0.0 and ey == 0.0:
                vx, vy = self._scan_action(dog_heading)
                mode = "scan_at_centre"
            else:
                vx, vy = ex, ey
                mode = "explore"
            self.last_action = (vx, vy)
            return vx, vy, 0.0, mode
        # Phase 2b: brief tracker blink — hold the previous action.
        if n_visible == 0:
            vx, vy = self.last_action
            return vx, vy, 0.0, "hold"
        # Phase 3: hand off to the underlying analytic teacher, then
        # apply the shared near-sheep speed modulation.
        # Handle both old-style (dog_xy, sheep, pen) and new-style
        # (dog_xy, heading, sheep, pen, drive_mode) teachers.
        try:
            result = self.base(dog_xy, dog_heading, sheep_positions,
                               pen_target, drive_mode)
        except TypeError:
            try:
                result = self.base(dog_xy, dog_heading, sheep_positions,
                                   pen_target)
            except TypeError:
                result = self.base(dog_xy, sheep_positions, pen_target)
        if len(result) == 4:
            vx, vy, omega, mode = result
        else:
            vx, vy, mode = result
            omega = 0.0
        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
        self.last_action = (vx, vy)
        return vx, vy, omega, mode
@@ -0,0 +1,42 @@
 """Shared action post-processing.
 Every dog mode routes its action through ``modulate_speed_near_sheep``
 so the magnitude is reduced near sheep — direction (intent) is
 preserved.
 """
 from __future__ import annotations
 import math
 SLOW_NEAR_SHEEP = 2.5  # m — distance below which action norm is scaled down
 MIN_SPEED = 0.30       # action norm at zero distance
 def modulate_speed_near_sheep(
    vx: float, vy: float,
    dog_xy: tuple[float, float],
    sheep_positions,
    slow_dist: float = SLOW_NEAR_SHEEP,
    min_scale: float = MIN_SPEED,
 ) -> tuple[float, float]:
    """Linearly ramp action magnitude from ``min_scale`` at distance 0
    to 1.0 at ``slow_dist``. ``sheep_positions`` may be a
    ``{name: (x, y)}`` dict or an iterable of ``(x, y)`` tuples.
    """
    if not sheep_positions:
        return vx, vy
    if hasattr(sheep_positions, "values"):
        positions = sheep_positions.values()
    else:
        positions = sheep_positions
    nearest = float("inf")
    for sx, sy in positions:
        d = math.hypot(sx - dog_xy[0], sy - dog_xy[1])
        if d < nearest:
            nearest = d
    if nearest >= slow_dist or nearest == float("inf"):
        return vx, vy
    scale = min_scale + (1.0 - min_scale) * (nearest / slow_dist)
    return vx * scale, vy * scale
@@ -0,0 +1,74 @@
 """Sequential "pin-and-push" shepherd-dog controller.
 Single-target alternative to Strömbom: each step, target the sheep
 closest to the pen, park behind it, drive it through; once it latches
 penned the next-closest sheep becomes the target. Naturally queues
 the flock through a narrow gate.
 """
 import math
 from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen
 DELTA_DRIVE = 1.5     # standoff behind the target sheep
 APPROACH_GAIN = 1.0   # action magnitude scale (1 = full speed)
 def _unit(x, y):
    d = math.hypot(x, y)
    if d < 1e-6:
        return 0.0, 0.0
    return x / d, y / d
 def _is_active(x, y) -> bool:
    return (not in_pen(x, y)) and y > GATE_Y
 def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    """Return ``(vx, vy, mode)`` — same call signature as Strömbom."""
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"
    name, sx, sy = min(
        active,
        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
    )
    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
    tx = sx + DELTA_DRIVE * ux
    ty = sy + DELTA_DRIVE * uy
    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
 def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    """``compute_action`` plus a debug dict (target, drive point)."""
    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle", {
            "n_active": 0, "target_name": "",
            "target_x": 0.0, "target_y": 0.0,
            "drive_x": dog_xy[0], "drive_y": dog_xy[1],
        }
    name, sx, sy = min(
        active,
        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
    )
    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
    tx = sx + DELTA_DRIVE * ux
    ty = sy + DELTA_DRIVE * uy
    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
        "n_active": len(active), "target_name": name,
        "target_x": sx, "target_y": sy,
        "drive_x": tx, "drive_y": ty,
    }
@@ -0,0 +1,95 @@
 """Strömbom (2014) collect/drive heuristic for the shepherd dog.
 When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
 to a point behind the furthest sheep and pushes it back toward the
 flock CoM. Otherwise it drives, parking behind the CoM relative to
 the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.
 Reference: Strömbom et al. 2014, "Solving the shepherding problem."
 """
 import math
 from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen
 F_FACTOR = 4.0       # collect/drive threshold scaled by √n
 DELTA_COLLECT = 1.5  # drive-position offset behind the furthest sheep
 DELTA_DRIVE = 2.0    # drive-position offset behind the flock CoM
 def _unit(x, y):
    d = math.hypot(x, y)
    if d < 1e-6:
        return 0.0, 0.0
    return x / d, y / d
 def _is_active(x, y) -> bool:
    """A sheep still in the field counts; one south of the gate doesn't."""
    return (not in_pen(x, y)) and y > GATE_Y
 def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    """Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"
    n = len(active)
    com_x = sum(p[0] for p in active) / n
    com_y = sum(p[1] for p in active) / n
    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
    radius = max(dists)
    if radius > F_FACTOR * math.sqrt(n):
        # Collect: aim behind the furthest sheep, opposite the CoM.
        idx = max(range(n), key=lambda i: dists[i])
        sx, sy = active[idx]
        ux, uy = _unit(sx - com_x, sy - com_y)
        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
        mode = "collect"
    else:
        # Drive: aim behind the CoM, opposite the pen.
        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
        mode = "drive"
    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
    return ax, ay, mode
 def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
    """``compute_action`` plus a small debug dict (CoM, target, radius)."""
    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle", {
            "n_active": 0, "radius": 0.0, "threshold": 0.0,
            "com_x": 0.0, "com_y": 0.0,
            "target_x": dog_xy[0], "target_y": dog_xy[1],
        }
    n = len(active)
    com_x = sum(p[0] for p in active) / n
    com_y = sum(p[1] for p in active) / n
    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
    radius = max(dists)
    threshold = F_FACTOR * math.sqrt(n)
    if radius > threshold:
        idx = max(range(n), key=lambda i: dists[i])
        sx, sy = active[idx]
        ux, uy = _unit(sx - com_x, sy - com_y)
        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
        mode = "collect"
    else:
        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
        mode = "drive"
    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
    dbg = {
        "n_active": n, "radius": radius, "threshold": threshold,
        "com_x": com_x, "com_y": com_y,
        "target_x": tx, "target_y": ty,
    }
    return ax, ay, mode, dbg
@@ -0,0 +1,187 @@
 """Universal shepherd teacher — Strömbom core + mecanum omega + straggler recovery.
 The core collect/drive logic is **identical** to :mod:`strombom` (same
 ``F_FACTOR``, ``DELTA_COLLECT``, ``DELTA_DRIVE`` thresholds and target
 computation) so it inherits the proven ~100 % success rate at n ≤ 8.
 Two additions make it useful as a universal teacher:
 1. **Omega for mecanum.**  When ``drive_mode="mecanum"``, the teacher
   outputs a non-zero ``omega`` channel so the dog **faces the
   direction of travel**.  During collect the dog faces the target
   sheep; during drive it faces the pen.  This gives the BC student a
   real rotation signal to learn from.
 2. **Last-straggler recovery.**  When exactly one sheep remains active
   and it is near the gate, the dog positions itself behind that
   straggler (opposite the gate) and pushes it straight through.  This
   handles the edge case where the last sheep circles the gate posts.
 Call signature::
    vx, vy, omega, mode = compute_action(
        dog_xy, dog_heading, sheep_positions, pen_target,
        drive_mode="differential",
    )
 For differential drive ``omega`` is always 0.0 and can be ignored.
 """
 import math
 from herding.world.geometry import (
    PEN_ENTRY, GATE_X, GATE_Y, in_pen,
 )
 # ---------------------------------------------------------------------------
 # Tuning constants — match Strömbom exactly for proven success rates.
 # ---------------------------------------------------------------------------
 F_FACTOR = 4.0          # collect/drive threshold scaled by √n
 DELTA_COLLECT = 1.5      # standoff behind the furthest sheep
 DELTA_DRIVE = 2.0        # standoff behind flock CoM
 # Omega gain for mecanum (how strongly the dog turns to face target)
 OMEGA_GAIN = 0.6
 # Recovery: push the last straggler straight through the gate.
 RECOVERY_GATE_DIST = 6.0  # only when straggler is this close to gate centre
 RECOVERY_PUSH_DIST = 1.2   # stand-off behind straggler, away from gate
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def _unit(x, y):
    d = math.hypot(x, y)
    if d < 1e-6:
        return 0.0, 0.0
    return x / d, y / d
 def _is_active(x, y) -> bool:
    return (not in_pen(x, y)) and y > GATE_Y
 def _angle_diff(a, b):
    """Signed shortest angular difference a - b, in [-π, π]."""
    return math.atan2(math.sin(a - b), math.cos(a - b))
 def _gate_center():
    """Centre of the gate opening."""
    return (0.5 * (GATE_X[0] + GATE_X[1]), GATE_Y)
 # ---------------------------------------------------------------------------
 # Core teacher
 # ---------------------------------------------------------------------------
 def compute_action(dog_xy, dog_heading, sheep_positions,
                   pen_target=PEN_ENTRY, drive_mode="differential"):
    """Return ``(vx, vy, omega, mode)``.
    Parameters
    ----------
    dog_xy : (float, float)
        Dog position in world frame.
    dog_heading : float
        Dog heading in world frame (rad), 0 = +x axis.
    sheep_positions : dict[str, (float, float)]
        Visible sheep positions.
    pen_target : (float, float)
        Centre of the pen gate (defaults to geometry.PEN_ENTRY).
    drive_mode : str
        ``"differential"`` or ``"mecanum"``.
    Returns
    -------
    vx, vy : float
        Velocity intent in [-1, 1].
    omega : float
        Yaw intent in [-1, 1] (0 for differential).
    mode : str
        Phase label: ``"idle"``, ``"collect"``, ``"drive"``, ``"recovery"``.
    """
    active = [(x, y) for (x, y) in sheep_positions.values()
              if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, 0.0, "idle"
    n = len(active)
    com_x = sum(p[0] for p in active) / n
    com_y = sum(p[1] for p in active) / n
    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
    radius = max(dists)
    # ---- Last-straggler recovery (single sheep circling near gate) ----
    gc = _gate_center()
    if n == 1:
        sx, sy = active[0]
        d_to_gate = math.hypot(sx - gc[0], sy - gc[1])
        if d_to_gate < RECOVERY_GATE_DIST:
            dx_g = sx - gc[0]
            dy_g = sy - gc[1]
            d_g = math.hypot(dx_g, dy_g)
            if d_g > 0.3:
                ux, uy = dx_g / d_g, dy_g / d_g
            else:
                ux, uy = 0.0, 1.0
            tx = sx + RECOVERY_PUSH_DIST * ux
            ty = sy + RECOVERY_PUSH_DIST * uy
            ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
            mode = "recovery"
            face_target = (sx, sy)
            omega = 0.0
            if drive_mode == "mecanum":
                desired = math.atan2(
                    face_target[1] - dog_xy[1],
                    face_target[0] - dog_xy[0],
                )
                err = _angle_diff(desired, dog_heading)
                omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
            return ax, ay, omega, mode
    # ---- Standard Strömbom collect/drive (proven core) ----
    if radius > F_FACTOR * math.sqrt(n):
        # Collect: aim behind the furthest sheep, opposite the CoM.
        idx = max(range(n), key=lambda i: dists[i])
        sx, sy = active[idx]
        ux, uy = _unit(sx - com_x, sy - com_y)
        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
        mode = "collect"
        face_target = (sx, sy)
    else:
        # Drive: aim behind the CoM, opposite the pen.
        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
        mode = "drive"
        face_target = pen_target
    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
    # ---- Omega (mecanum only) ----
    omega = 0.0
    if drive_mode == "mecanum" and mode != "idle":
        desired_heading = math.atan2(
            face_target[1] - dog_xy[1],
            face_target[0] - dog_xy[0],
        )
        err = _angle_diff(desired_heading, dog_heading)
        omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
    return ax, ay, omega, mode
 def compute_action_diff(dog_xy, dog_heading, sheep_positions,
                        pen_target=PEN_ENTRY):
    """Compatibility wrapper returning ``(vx, vy, mode)`` — same as Strömbom.
    Use this when plugging into existing differential-drive code that
    doesn't expect omega.
    """
    vx, vy, _omega, mode = compute_action(
        dog_xy, dog_heading, sheep_positions, pen_target,
        drive_mode="differential",
    )
    return vx, vy, mode
@@ -0,0 +1,208 @@
 """Cluster a 2D LiDAR scan into world-frame sheep position estimates.
 Pipeline:
    ranges (N,) → hit mask → world-frame points
                                │
                                ▼
                         adjacency clustering (gap > GAP_THRESHOLD
                         starts a new cluster, walking rays in
                         angular order)
                                │
                                ▼
                         centroid + span + region + structure filters
                                │
                                ▼
                         list of (x, y) detections
 The downstream tracker handles association across frames.
 """
 from __future__ import annotations
 import math
 import numpy as np
 from herding.world.geometry import (
    FIELD_SHAPE, FIELD_ROUND_R,
    FIELD_X, FIELD_Y, GATE_X, GATE_Y,
    PEN_X, PEN_Y,
 )
 from herding.perception.lidar_sim import (
    LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_N_RAYS, SHEEP_RADIUS, POST_RADIUS,
    ray_angles,
 )
 GAP_THRESHOLD = 0.6      # m — adjacent ray-points farther apart start a new cluster
 MAX_CLUSTER_SPAN = 1.5   # m — wider clusters are walls / structures
 RANGE_HIT_EPS = 0.05     # m — hit if range < max_range - eps
 WALL_REJECT = 0.5        # m — drop detections this close to a known wall line
 # Multi-peak splitting: within a single cluster, if the range profile
 # has a local dip (i.e. the range increases then decreases) deeper than
 # SPLIT_RANGE_GAP, the cluster is split into two detections.
 SPLIT_RANGE_GAP = 0.20   # m — range increase that triggers a split
 # Sheep-sized static features. A cluster centred within STATIC_REJECT of
 # any of these is never a sheep.
 _STATIC_FEATURES_RECT = (
    ( 10.0, -15.0), ( 13.0, -15.0),                   # gate posts
    ( 15.0,  15.0), ( 15.0, -15.0),
    (-15.0,  15.0), (-15.0, -15.0),                   # field corners
 )
 _STATIC_FEATURES_ROUND = (
    (GATE_X[0], GATE_Y),
    (GATE_X[1], GATE_Y),
 )
 STATIC_REJECT = 0.8
 def _get_static_features():
    if FIELD_SHAPE == "field_round":
        return _STATIC_FEATURES_ROUND
    return _STATIC_FEATURES_RECT
 _STATIC_FEATURES = _get_static_features()
 def _in_field_region(cx: float, cy: float) -> bool:
    """Check if a detection is inside the field (with small margin)."""
    if FIELD_SHAPE == "field_round":
        r = math.hypot(cx, cy)
        return r < FIELD_ROUND_R + 0.2
    return (FIELD_X[0] - 0.2 < cx < FIELD_X[1] + 0.2 and
            FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)
 def _near_wall(cx: float, cy: float) -> bool:
    """True if the detection is too close to a wall to be a sheep."""
    if FIELD_SHAPE == "field_round":
        r = math.hypot(cx, cy)
        return r > FIELD_ROUND_R - WALL_REJECT
    return (
        cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or
        cy > FIELD_Y[1] - WALL_REJECT or
        (cy < FIELD_Y[0] + WALL_REJECT and not (PEN_X[0] <= cx <= PEN_X[1]))
    )
 def _split_cluster_by_range(
    points: list[tuple[float, float]],
    range_vals: list[float],
 ) -> list[list[tuple[float, float]]]:
    """Split a cluster at range-profile local maxima (gaps between sheep).
    When two sheep are close, the LiDAR sees them as one arc, but the
    range profile has a local peak between them (the ray passes between
    the two discs). This function finds those peaks and splits.
    """
    if len(points) < 4:
        return [points]
    # Find the minimum range in the cluster (closest point to dog).
    r_min = min(range_vals)
    # Find the maximum range (the dip/gap between sheep).
    r_max = max(range_vals)
    # If the range variation is small, it's a single target.
    if r_max - r_min < SPLIT_RANGE_GAP:
        return [points]
    # Find the split point: the index with the maximum range.
    split_idx = range_vals.index(r_max)
    if split_idx <= 1 or split_idx >= len(points) - 2:
        return [points]
    # Split into two sub-clusters.
    left = points[:split_idx]
    right = points[split_idx + 1:]
    # Recursively split each half.
    result = []
    for sub_pts, sub_ranges in [
        (left, range_vals[:split_idx]),
        (right, range_vals[split_idx + 1:]),
    ]:
        if len(sub_pts) >= 1:
            result.extend(_split_cluster_by_range(sub_pts, sub_ranges))
    return result if result else [points]
 def detections_from_scan(
    ranges: np.ndarray,
    dog_x: float, dog_y: float, dog_heading: float,
    max_range: float = LIDAR_MAX_RANGE,
 ) -> list[tuple[float, float]]:
    """Return list of (x, y) world-frame sheep position estimates."""
    ranges = np.asarray(ranges, dtype=np.float32)
    n_rays = ranges.shape[0]
    if n_rays == 0:
        return []
    angles = ray_angles(n_rays, LIDAR_FOV)
    hit = ranges < max_range - RANGE_HIT_EPS
    world_a = dog_heading + angles
    px = dog_x + ranges * np.cos(world_a)
    py = dog_y + ranges * np.sin(world_a)
    # Walk rays in angular order; a large jump between consecutive
    # world-frame hit points closes the current cluster.
    # Store (x, y, range) per hit ray for multi-peak splitting.
    clusters: list[list[tuple[float, float, float]]] = []
    current: list[tuple[float, float, float]] = []
    prev_xy: tuple[float, float] | None = None
    for i in range(n_rays):
        if not bool(hit[i]):
            if current:
                clusters.append(current)
                current = []
            prev_xy = None
            continue
        pt = (float(px[i]), float(py[i]), float(ranges[i]))
        if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > GAP_THRESHOLD:
            clusters.append(current)
            current = []
        current.append(pt)
        prev_xy = (pt[0], pt[1])
    if current:
        clusters.append(current)
    detections: list[tuple[float, float]] = []
    for cluster in clusters:
        points_xy = [(p[0], p[1]) for p in cluster]
        range_vals = [p[2] for p in cluster]
        # Multi-peak splitting.
        if len(cluster) >= 4:
            sub_clusters = _split_cluster_by_range(points_xy, range_vals)
        else:
            sub_clusters = [points_xy]
        for sub in sub_clusters:
            if len(sub) < 1:
                continue
            xs = [p[0] for p in sub]
            ys = [p[1] for p in sub]
            cx, cy = sum(xs) / len(xs), sum(ys) / len(ys)
            span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
            if span > MAX_CLUSTER_SPAN:
                continue
            # Rays hit the front edge of the sheep; offset outward by
            # SHEEP_RADIUS along the dog→cluster direction.
            dx, dy = cx - dog_x, cy - dog_y
            d = math.hypot(dx, dy)
            if d > 1e-3:
                cx += SHEEP_RADIUS * dx / d
                cy += SHEEP_RADIUS * dy / d
            in_main = _in_field_region(cx, cy)
            in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
                             GATE_Y - 1.0 < cy < GATE_Y + 0.2)
            if not (in_main or in_gate_strip):
                continue
            if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT
                   for fx, fy in _STATIC_FEATURES):
                continue
            if _near_wall(cx, cy):
                continue
            detections.append((cx, cy))
    return detections
@@ -0,0 +1,235 @@
 """Fast 2D LiDAR simulator for the Gymnasium env.
 Raycasts against sheep (discs) and static world geometry. For rectangular
 fields this is axis-aligned walls + gate posts; for round fields it is a
 circular wall + gate posts. The env reproduces the false-positive cluster
 distribution Webots produces from real 3D geometry.
 Returns a range array matching the Webots Lidar device:
 180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise.
 See ``protos/ShepherdDog.proto``.
 """
 from __future__ import annotations
 import math
 import numpy as np
 from herding.world.geometry import (
    FIELD_SHAPE, FIELD_ROUND_R,
    FIELD_X, FIELD_Y,
    GATE_X, GATE_Y,
    PEN_X, PEN_Y,
 )
 # Match protos/ShepherdDog.proto Lidar device — extended to 360° for
 # full situational awareness. The original Webots device is 140° FOV /
 # 180 rays; we use 360 rays for full-circle coverage.
 LIDAR_N_RAYS = 360
 LIDAR_FOV = 2.0 * math.pi  # 360° full circle
 LIDAR_MAX_RANGE = 12.0
 LIDAR_NOISE = 0.005    # m, gaussian std
 # Sheep cross-section in the LiDAR plane (horizontal cylinder approx).
 SHEEP_RADIUS = 0.30
 POST_RADIUS = 0.25
 # ---------------------------------------------------------------------------
 # Rectangular-field static geometry
 # ---------------------------------------------------------------------------
 _VERTICAL_WALLS_RECT = (
    ( 15.0, -15.0,  15.0),  # field east
    (-15.0, -15.0,  15.0),  # field west
    ( 10.0, -22.0, -15.0),  # pen west
    ( 13.0, -22.0, -15.0),  # pen east
 )
 _HORIZONTAL_WALLS_RECT = (
    ( 15.0, -15.0,  15.0),  # field north
    (-15.0, -15.0,  10.0),  # field south-west of gate
    (-15.0,  13.0,  15.0),  # field south-east of gate
    (-22.0,  10.0,  13.0),  # pen south
 )
 _POSTS_RECT = np.array([
    ( 10.0, -15.0), ( 13.0, -15.0),
    ( 15.0,  15.0), ( 15.0, -15.0),
    (-15.0,  15.0), (-15.0, -15.0),
 ], dtype=np.float64)
 # ---------------------------------------------------------------------------
 # Round-field static geometry
 # ---------------------------------------------------------------------------
 # Circular wall with gate gap. Gate posts at the edges of the gate gap.
 _gate_cx = 0.5 * (GATE_X[0] + GATE_X[1])
 _POSTS_ROUND = np.array([
    (GATE_X[0], GATE_Y),
    (GATE_X[1], GATE_Y),
 ], dtype=np.float64)
 # Pen walls for round field
 _VERTICAL_WALLS_ROUND = (
    (GATE_X[0], PEN_Y[0], GATE_Y),   # pen west
    (GATE_X[1], PEN_Y[0], GATE_Y),   # pen east
 )
 _HORIZONTAL_WALLS_ROUND = (
    (PEN_Y[0], GATE_X[0], GATE_X[1]),  # pen south
 )
 def _build_static_geometry():
    """Select the correct static geometry for the active field shape."""
    if FIELD_SHAPE == "field_round":
        return (
            _VERTICAL_WALLS_ROUND,
            _HORIZONTAL_WALLS_ROUND,
            _POSTS_ROUND,
            FIELD_ROUND_R,
        )
    return (
        _VERTICAL_WALLS_RECT,
        _HORIZONTAL_WALLS_RECT,
        _POSTS_RECT,
        None,  # no circular wall
    )
 _VERTS, _HORIZS, _POSTS, _CIRC_R = _build_static_geometry()
 # ---------------------------------------------------------------------------
 # Ray helpers
 # ---------------------------------------------------------------------------
 def ray_angles(n: int = LIDAR_N_RAYS, fov: float = LIDAR_FOV) -> np.ndarray:
    """Local-frame ray angles, CCW from forward, sweeping +fov/2 → -fov/2."""
    return np.linspace(fov / 2.0, -fov / 2.0, n, dtype=np.float64)
 _ANGLES = ray_angles()
 _COS = np.cos(_ANGLES)
 _SIN = np.sin(_ANGLES)
 def _raycast_static(
    ox: float, oy: float, cos_w: np.ndarray, sin_w: np.ndarray,
 ) -> np.ndarray:
    """Per-ray distance to the nearest wall or post hit (∞ if none)."""
    n_rays = cos_w.shape[0]
    best = np.full(n_rays, np.inf, dtype=np.float64)
    EPS = 1e-3
    safe_cos = np.where(np.abs(cos_w) < 1e-9, 1e-9, cos_w)
    safe_sin = np.where(np.abs(sin_w) < 1e-9, 1e-9, sin_w)
    # Vertical walls (x = const)
    for wx, ymin, ymax in _VERTS:
        t = (wx - ox) / safe_cos
        y_at = oy + t * sin_w
        valid = (t > EPS) & (y_at >= ymin - EPS) & (y_at <= ymax + EPS)
        cand = np.where(valid, t, np.inf)
        np.minimum(best, cand, out=best)
    # Horizontal walls (y = const)
    for wy, xmin, xmax in _HORIZS:
        t = (wy - oy) / safe_sin
        x_at = ox + t * cos_w
        valid = (t > EPS) & (x_at >= xmin - EPS) & (x_at <= xmax + EPS)
        cand = np.where(valid, t, np.inf)
        np.minimum(best, cand, out=best)
    # Circular wall (round field only)
    if _CIRC_R is not None:
        # Ray: P(t) = O + t·D.  ||P(t)||² = R²
        # t² - 2t(O·D) + (||O||² - R²) = 0
        # a = 1 (rays are unit), b = -2(O·D), c = ||O||² - R²
        a = 1.0  # cos_w² + sin_w² = 1
        b = -(ox * cos_w + oy * sin_w)
        c = ox * ox + oy * oy - _CIRC_R * _CIRC_R
        disc = b * b - a * c
        valid_disc = disc >= 0.0
        sqrt_disc = np.sqrt(np.maximum(disc, 0.0))
        # Two intersection candidates: t = (-b ± sqrt(disc)) / a
        t1 = -b - sqrt_disc
        t2 = -b + sqrt_disc
        # We want the smallest positive t.
        t1_valid = valid_disc & (t1 > EPS)
        t2_valid = valid_disc & (t2 > EPS)
        t_circ = np.where(t1_valid, t1, np.where(t2_valid, t2, np.inf))
        # Exclude rays that hit the gate gap: the hit point must not lie
        # in the gate column (between GATE_X and above GATE_Y).
        hx = ox + t_circ * cos_w
        hy = oy + t_circ * sin_w
        in_gate = ((hx > GATE_X[0]) & (hx < GATE_X[1]) &
                   (hy > GATE_Y - 2.0) & (hy < GATE_Y + 2.0))
        t_circ = np.where(in_gate, np.inf, t_circ)
        np.minimum(best, t_circ, out=best)
    # Posts (treat as discs)
    if _POSTS.size:
        px = _POSTS[:, 0] - ox
        py = _POSTS[:, 1] - oy
        t_post = np.outer(px, cos_w) + np.outer(py, sin_w)
        d2 = (px ** 2 + py ** 2)[:, None]
        perp2 = d2 - t_post ** 2
        R2 = POST_RADIUS ** 2
        hit = (perp2 < R2) & (t_post > 0.0)
        half = np.sqrt(np.clip(R2 - perp2, 0.0, None))
        cand = np.where(hit, t_post - half, np.inf)
        nearest = cand.min(axis=0)
        np.minimum(best, nearest, out=best)
    return best
 def simulate_scan(
    dog_x: float, dog_y: float, dog_heading: float,
    sheep_xy: list[tuple[float, float]],
    noise: float = LIDAR_NOISE,
    max_range: float = LIDAR_MAX_RANGE,
    rng: np.random.Generator | None = None,
 ) -> np.ndarray:
    """Return a (N,) float32 range array. No-hit entries equal ``max_range``.
    ``sheep_xy`` is every sheep (penned or active) in the scene.
    """
    ch, sh = math.cos(dog_heading), math.sin(dog_heading)
    cos_w = ch * _COS - sh * _SIN
    sin_w = sh * _COS + ch * _SIN
    best = _raycast_static(dog_x, dog_y, cos_w, sin_w)
    if sheep_xy:
        sx = np.asarray([p[0] for p in sheep_xy], dtype=np.float64) - dog_x
        sy = np.asarray([p[1] for p in sheep_xy], dtype=np.float64) - dog_y
        t = np.outer(sx, cos_w) + np.outer(sy, sin_w)
        s_dist2 = (sx ** 2 + sy ** 2)[:, None]
        perp2 = s_dist2 - t ** 2
        R2 = SHEEP_RADIUS ** 2
        hit = (perp2 < R2) & (t > 0.0)
        half = np.sqrt(np.clip(R2 - perp2, 0.0, None))
        candidate = np.where(hit, t - half, np.inf)
        nearest = candidate.min(axis=0)
        np.minimum(best, nearest, out=best)
    ranges = np.minimum(best, max_range).astype(np.float32)
    return _add_noise(ranges, noise, rng, max_range)
 def _add_noise(ranges: np.ndarray, sigma: float,
               rng: np.random.Generator | None, max_range: float) -> np.ndarray:
    if sigma <= 0.0:
        return ranges
    if rng is None:
        rng = np.random.default_rng()
    hit_mask = ranges < max_range - 1e-3
    n_hit = int(hit_mask.sum())
    if n_hit:
        ranges = ranges.copy()
        ranges[hit_mask] += rng.normal(0.0, sigma, size=n_hit).astype(np.float32)
        np.clip(ranges, 0.0, max_range, out=ranges)
    return ranges
@@ -0,0 +1,122 @@
 """Observation builder for the shepherd-dog policy.
 Order-invariant 32-D feature vector. Sheep never appear by index in
 the observation, only via summary statistics, a polar histogram, and
 two "named" channels (closest-to-pen, rearmost-from-pen) — so the
 policy generalises across flock sizes 1..MAX_SHEEP.
 Layout (all components normalised so values stay roughly in [-1, 1]):
    idx    field
    -----  ----------------------------------------------------------
     0..3  dog pose: x/15, y/15, cos(h), sin(h)
     4..5  active-sheep CoM x/15, y/15
     6..8  flock dispersion: max_radius/15, std_x/15, std_y/15
     9..11 dog → CoM: dx/30, dy/30, dist/30
    12..14 dog → pen entry: dx/30, dy/30, dist/30
    15..16 furthest sheep → CoM: dx/15, dy/15
    17..18 min sheep-to-wall, min dog-to-wall (both /15)
       19  active sheep count / MAX_SHEEP
    20..27 8-bin polar histogram of active sheep in the dog's body frame
    28..29 dog → closest-to-pen sheep: dx/15, dy/15
    30..31 dog → rearmost (furthest-from-pen) sheep: dx/15, dy/15
 """
 import math
 import numpy as np
 from herding.world.geometry import (
    PEN_ENTRY, MAX_SHEEP, distance_to_wall,
 )
 OBS_DIM = 32
 def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
              n_max: int = MAX_SHEEP,
              n_expected: int | None = None) -> np.ndarray:
    """Assemble the dog policy's observation vector.
    Parameters
    ----------
    dog_xy : tuple (x, y) of the dog's GPS position (m)
    dog_heading : dog heading in rad
    sheep_xy_list : iterable of (x, y) for ALL known sheep
    sheep_penned_list : parallel iterable of bool — True if sheep is penned
    n_max : maximum supported flock size used for the count normaliser
    n_expected : unused, kept for API compatibility.
    """
    dog_x, dog_y = dog_xy
    obs = np.zeros(OBS_DIM, dtype=np.float32)
    obs[0] = dog_x / 15.0
    obs[1] = dog_y / 15.0
    obs[2] = math.cos(dog_heading)
    obs[3] = math.sin(dog_heading)
    active = [(x, y) for (x, y), p
              in zip(sheep_xy_list, sheep_penned_list) if not p]
    n = len(active)
    pdx0, pdy0 = PEN_ENTRY[0] - dog_x, PEN_ENTRY[1] - dog_y
    obs[12] = pdx0 / 30.0
    obs[13] = pdy0 / 30.0
    obs[14] = math.hypot(pdx0, pdy0) / 30.0
    if n == 0:
        obs[19] = 0.0
        return obs
    arr = np.asarray(active, dtype=np.float32)
    com_x = float(arr[:, 0].mean())
    com_y = float(arr[:, 1].mean())
    rel = arr - np.array([com_x, com_y], dtype=np.float32)
    dists = np.hypot(rel[:, 0], rel[:, 1])
    radius = float(dists.max())
    std_x = float(arr[:, 0].std())
    std_y = float(arr[:, 1].std())
    obs[4] = com_x / 15.0
    obs[5] = com_y / 15.0
    obs[6] = radius / 15.0
    obs[7] = std_x / 15.0
    obs[8] = std_y / 15.0
    cdx, cdy = com_x - dog_x, com_y - dog_y
    obs[9]  = cdx / 30.0
    obs[10] = cdy / 30.0
    obs[11] = math.hypot(cdx, cdy) / 30.0
    far_idx = int(np.argmax(dists))
    obs[15] = float(rel[far_idx, 0]) / 15.0
    obs[16] = float(rel[far_idx, 1]) / 15.0
    min_sheep_wall = float(min(distance_to_wall(sx, sy) for sx, sy in active))
    min_dog_wall = distance_to_wall(dog_x, dog_y)
    obs[17] = min_sheep_wall / 15.0
    obs[18] = float(min_dog_wall) / 15.0
    obs[19] = n / n_max
    # Polar histogram in the dog's body frame.
    rel_dx = arr[:, 0] - dog_x
    rel_dy = arr[:, 1] - dog_y
    angles = np.arctan2(rel_dy, rel_dx) - dog_heading
    angles = np.arctan2(np.sin(angles), np.cos(angles))
    bins = np.floor((angles + math.pi) / (2 * math.pi) * 8).astype(int)
    bins = np.clip(bins, 0, 7)
    hist = np.bincount(bins, minlength=8).astype(np.float32)
    hist /= max(1, n)
    obs[20:28] = hist
    # Closest-to-pen and rearmost (furthest-from-pen) sheep. Without
    # these named channels the obs cannot uniquely identify which sheep
    # the teacher is steering toward, and BC fails to mimic it.
    pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
    closest_idx = int(np.argmin(pen_dists))
    rearmost_idx = int(np.argmax(pen_dists))
    obs[28] = (float(arr[closest_idx, 0]) - dog_x) / 15.0
    obs[29] = (float(arr[closest_idx, 1]) - dog_y) / 15.0
    obs[30] = (float(arr[rearmost_idx, 0]) - dog_x) / 15.0
    obs[31] = (float(arr[rearmost_idx, 1]) - dog_y) / 15.0
    return obs
@@ -0,0 +1,237 @@
 """Multi-target tracker for LiDAR-detected sheep.
 Greedy nearest-neighbour data association across frames, with a wider
 re-acquisition gate for stale tracks (sheep flee during occlusion and
 reappear off-position), plus memory of last-seen positions for sheep
 out of FOV. Output is ``{name: (x, y)}`` — Strömbom / Sequential
 consume it directly.
 When **predictive mode** is enabled (the default), tracks carry a
 constant-velocity state ``(vx, vy)`` estimated from the last two
 observations. While a track is occluded its position is extrapolated
 using this velocity for up to ``PREDICT_STEPS`` frames, keeping the
 teacher's CoM estimate stable during brief losses. After prediction
 expires, the track falls back to its last-seen position (static memory)
 until ``FORGET_STEPS`` deletes it entirely.
 A track is marked penned once its estimated position crosses the gate
 plane south (``is_penned_position``). Penned tracks are excluded from
 ``get_positions`` and kept indefinitely.
 """
 from __future__ import annotations
 import math
 from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position
 GATE_M = 2.5              # m — primary NN gate (recently observed tracks)
 REACQUIRE_GATE_M = 4.5    # m — wider gate for re-binding stale tracks
 REACQUIRE_MIN_AGE = 20    # steps — track must be this stale to use the wider gate
 PENNED_GATE_M = 4.0       # m — gate for matching detections to existing penned tracks
 FORGET_STEPS = 200        # ~3.2 s — delete stale active tracks (penned ones kept forever)
 MAX_ACTIVE_TRACKS = MAX_SHEEP
 # Predictive tracking constants.
 PREDICT_STEPS = 120       # ~1.9 s — extrapolate velocity this many frames
 VELOCITY_CLAMP = 1.0      # m/s — max predicted speed (sheep max is ~0.78 m/s)
 class Track:
    """Single track with position, velocity, and age."""
    __slots__ = ("x", "y", "vx", "vy", "last_seen", "penned")
    def __init__(self, x: float, y: float, step: int, penned: bool = False):
        self.x = x
        self.y = y
        self.vx = 0.0
        self.vy = 0.0
        self.last_seen = step
        self.penned = penned
    @property
    def age(self) -> int:
        """Not-a-property in the hot loop — callers pass current step."""
        raise NotImplementedError
    def predicted_position(self, current_step: int) -> tuple[float, float]:
        """Extrapolated position using constant velocity, clamped."""
        dt = current_step - self.last_seen
        if dt <= 0 or dt > PREDICT_STEPS:
            return self.x, self.y
        speed = math.hypot(self.vx, self.vy)
        if speed < 1e-4:
            return self.x, self.y
        # Clamp extrapolation distance.
        max_d = VELOCITY_CLAMP * dt * 0.016  # steps → seconds
        d = min(speed * dt * 0.016, max_d)
        return (
            self.x + d * (self.vx / speed),
            self.y + d * (self.vy / speed),
        )
    def update(self, x: float, y: float, step: int) -> None:
        """Absorb a new detection and re-estimate velocity."""
        dt = step - self.last_seen
        if dt > 0:
            dt_s = dt * 0.016  # steps → seconds
            new_vx = (x - self.x) / dt_s
            new_vy = (y - self.y) / dt_s
            # Exponential smoothing on velocity.
            alpha = 0.6
            self.vx = alpha * new_vx + (1.0 - alpha) * self.vx
            self.vy = alpha * new_vy + (1.0 - alpha) * self.vy
        self.x = x
        self.y = y
        self.last_seen = step
 class SheepTracker:
    """Online tracker with NN association, prediction, and forgetful memory.
    Each track is a :class:`Track` with position, velocity estimate,
    last-seen step, and penned flag.
    """
    def __init__(self, gate: float = GATE_M):
        self.gate = gate
        self._tracks: dict[int, Track] = {}
        self._next_id = 0
        self.step = 0
    def reset(self) -> None:
        self._tracks.clear()
        self._next_id = 0
        self.step = 0
    def update(self, detections: list[tuple[float, float]]) -> dict[str, tuple[float, float]]:
        """Fold a new set of detections in and return active positions."""
        self.step += 1
        det_used: set[int] = set()
        updated_tids: set[int] = set()
        # Pass 1 — match active tracks within the primary gate.
        # Use predicted positions for matching, oldest-first.
        active_tids = [tid for tid, t in self._tracks.items() if not t.penned]
        active_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
        for tid in active_tids:
            track = self._tracks[tid]
            # Use predicted position for matching.
            tx, ty = track.predicted_position(self.step)
            best_j, best_d = -1, self.gate
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
                    continue
                d = math.hypot(dx - tx, dy - ty)
                if d < best_d:
                    best_d = d
                    best_j = j
            if best_j >= 0:
                dx, dy = detections[best_j]
                track.update(dx, dy, self.step)
                det_used.add(best_j)
                updated_tids.add(tid)
        # Pass 1b — re-acquisition with wider gate for stale tracks.
        for tid in active_tids:
            if tid in updated_tids:
                continue
            track = self._tracks[tid]
            if (self.step - track.last_seen) < REACQUIRE_MIN_AGE:
                continue
            tx, ty = track.predicted_position(self.step)
            best_j, best_d = -1, REACQUIRE_GATE_M
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
                    continue
                d = math.hypot(dx - tx, dy - ty)
                if d < best_d:
                    best_d = d
                    best_j = j
            if best_j >= 0:
                dx, dy = detections[best_j]
                track.update(dx, dy, self.step)
                det_used.add(best_j)
                updated_tids.add(tid)
        # Pass 2 — match remaining detections to penned tracks.
        penned_tids = [tid for tid, t in self._tracks.items() if t.penned]
        for tid in penned_tids:
            track = self._tracks[tid]
            best_j, best_d = -1, PENNED_GATE_M
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
                    continue
                d = math.hypot(dx - track.x, dy - track.y)
                if d < best_d:
                    best_d = d
                    best_j = j
            if best_j >= 0:
                dx, dy = detections[best_j]
                track.update(dx, dy, self.step)
                det_used.add(best_j)
        # Spawn new tracks for unmatched detections.
        for j, (dx, dy) in enumerate(detections):
            if j in det_used:
                continue
            penned = in_pen(dx, dy) or is_penned_position(dx, dy)
            self._tracks[self._next_id] = Track(dx, dy, self.step, penned)
            self._next_id += 1
        # Promote active tracks whose current estimate crosses the gate.
        for track in self._tracks.values():
            if track.penned:
                continue
            px, py = track.predicted_position(self.step)
            if is_penned_position(px, py):
                track.penned = True
        # Forget stale active tracks; penned tracks live forever.
        stale = [tid for tid, t in self._tracks.items()
                 if not t.penned and (self.step - t.last_seen) > FORGET_STEPS]
        for tid in stale:
            del self._tracks[tid]
        # Hard cap on the active set — drop the oldest-seen overflow.
        active = [(tid, t.last_seen) for tid, t in self._tracks.items()
                  if not t.penned]
        if len(active) > MAX_ACTIVE_TRACKS:
            active.sort(key=lambda kv: kv[1])
            for tid, _ in active[: len(active) - MAX_ACTIVE_TRACKS]:
                del self._tracks[tid]
        return self.get_positions()
    def get_positions(self) -> dict[str, tuple[float, float]]:
        """Active (not-penned) tracks as a ``{name: (x, y)}`` dict.
        For tracks currently being predicted (occluded but within
        PREDICT_STEPS), returns the extrapolated position so the teacher
        sees a smooth estimate.
        """
        result = {}
        for tid, track in self._tracks.items():
            if track.penned:
                continue
            px, py = track.predicted_position(self.step)
            result[f"t{tid}"] = (px, py)
        return result
    def get_penned_set(self) -> set[str]:
        return {f"t{tid}" for tid, t in self._tracks.items() if t.penned}
    def n_active(self) -> int:
        return sum(1 for t in self._tracks.values() if not t.penned)
    def n_penned(self) -> int:
        return sum(1 for t in self._tracks.values() if t.penned)
    def n_predicted(self) -> int:
        """Number of active tracks currently being extrapolated (not directly observed)."""
        return sum(1 for t in self._tracks.values()
                   if not t.penned and (self.step - t.last_seen) > 0
                   and (self.step - t.last_seen) <= PREDICT_STEPS)
@@ -0,0 +1,190 @@
 """Differential-drive and mecanum kinematics, shared by the env and Webots
 controllers.
 First-order rigid-body model — no slip, wheel-accel limits, or contact
 forces. Webots' ODE physics handles those at inference; the env stays
 close enough to first order that a policy trained here transfers.
 """
 import math
 def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
    """Integrate one step of differential-drive forward kinematics.
    Inputs
    ------
    x, y : robot position (m)
    h    : robot heading (rad), 0 = +x axis
    w_left, w_right : wheel angular velocities (rad/s)
    wheel_radius, wheel_base : robot dimensions (m)
    dt   : timestep (s)
    Returns (new_x, new_y, new_h).
    """
    v = (w_right + w_left) * wheel_radius * 0.5
    omega = (w_right - w_left) * wheel_radius / wheel_base
    new_x = x + v * math.cos(h) * dt
    new_y = y + v * math.sin(h) * dt
    new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
    return new_x, new_y, new_h
 def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
                       k_turn=4.0):
    """Convert a desired (vx, vy) intent in [-1, 1]² to wheel speeds.
    Forward speed scales by ``cos(err)`` (clamped to ±90°); a P
    controller on heading error contributes the wheel-rate differential.
    """
    speed_ms = math.hypot(vx, vy) * max_linear
    if speed_ms < 1e-3:
        return 0.0, 0.0
    target_h = math.atan2(vy, vx)
    err = math.atan2(math.sin(target_h - h), math.cos(target_h - h))
    clamped_err = max(-math.pi / 2, min(math.pi / 2, err))
    fwd_ms = speed_ms * math.cos(clamped_err)
    fwd_rad = fwd_ms / wheel_radius
    turn = k_turn * err
    left = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad - turn))
    right = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad + turn))
    return left, right
 def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
                            k_turn=4.0):
    """Sheep variant: speed in wheel rad/s, target as a heading angle."""
    err = math.atan2(math.sin(heading - h), math.cos(heading - h))
    fwd = max(0.0, math.cos(err)) * speed_motor
    turn = k_turn * err
    left = max(-max_wheel_omega, min(max_wheel_omega, fwd - turn))
    right = max(-max_wheel_omega, min(max_wheel_omega, fwd + turn))
    return left, right
 # ---------------------------------------------------------------------------
 # Mecanum (4-wheel omnidirectional) kinematics
 # ---------------------------------------------------------------------------
 def mecanum_kinematics_step(x, y, h, w_fl, w_fr, w_rl, w_rr,
                             wheel_radius, lx, ly, dt):
    """Integrate one step of mecanum forward kinematics.
    Parameters
    ----------
    x, y : robot position (m)
    h    : robot heading (rad), 0 = +x axis
    w_fl, w_fr, w_rl, w_rr : wheel angular velocities (rad/s)
    wheel_radius : wheel radius (m)
    lx   : half the front-to-back axle distance (m)
    ly   : half the left-to-right axle distance (m)
    dt   : timestep (s)
    Returns (new_x, new_y, new_h).
    """
    r = wheel_radius
    vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
    vy_body = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
    omega = (-w_fl + w_fr - w_rl + w_rr) * r / (4.0 * (lx + ly))
    cos_h = math.cos(h)
    sin_h = math.sin(h)
    vx_world = vx_body * cos_h - vy_body * sin_h
    vy_world = vx_body * sin_h + vy_body * cos_h
    new_x = x + vx_world * dt
    new_y = y + vy_world * dt
    new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
    return new_x, new_y, new_h
 def mecanum_inverse(vx_body, vy_body, omega, wheel_radius, lx, ly,
                    max_wheel_omega):
    """Mecanum inverse kinematics: body-frame velocities to 4 wheel speeds.
    Parameters
    ----------
    vx_body, vy_body : desired body-frame linear velocities (m/s)
    omega            : desired yaw rate (rad/s)
    wheel_radius     : wheel radius (m)
    lx               : half front-to-back axle distance (m)
    ly               : half left-to-right axle distance (m)
    max_wheel_omega  : wheel angular velocity clamp (rad/s)
    Returns (w_fl, w_fr, w_rl, w_rr).
    """
    r = wheel_radius
    k = lx + ly
    w_fl = (vx_body - vy_body - k * omega) / r
    w_fr = (vx_body + vy_body + k * omega) / r
    w_rl = (vx_body + vy_body - k * omega) / r
    w_rr = (vx_body - vy_body + k * omega) / r
    scale = max(abs(w_fl), abs(w_fr), abs(w_rl), abs(w_rr), 1e-9)
    if scale > max_wheel_omega:
        ratio = max_wheel_omega / scale
        w_fl *= ratio
        w_fr *= ratio
        w_rl *= ratio
        w_rr *= ratio
    return w_fl, w_fr, w_rl, w_rr
 def velocity_to_mecanum_wheels(vx, vy, omega, h, max_linear, wheel_radius,
                               lx, ly, max_wheel_omega,
                               k_turn=4.0, wheel_base=0.28):
    """Convert world-frame (vx, vy, omega) action in [-1, 1]^3 to 4 wheel speeds.
    Truly holonomic interpretation: (vx, vy) is the desired *world-frame*
    velocity (magnitude up to ``max_linear`` m/s) and ``omega`` is the
    desired yaw rate (independent of motion direction). The dog can
    crab-walk and rotate at the same time.
    This matches the universal teacher's signal: drive toward a standoff
    point while facing the sheep / pen separately. With the older
    non-holonomic version, ``omega`` from the teacher fought against the
    forward-only kinematics and dropped success rates instead of helping.
    Parameters
    ----------
    vx, vy : desired world-frame velocity intent in [-1, 1] (clamped on
             magnitude to ≤ 1)
    omega  : desired yaw rate intent in [-1, 1]
    h      : current heading (rad), 0 = +x
    max_linear     : max linear speed (m/s)
    wheel_radius   : wheel radius (m)
    lx, ly         : half axle distances (m)
    max_wheel_omega : wheel angular velocity clamp (rad/s)
    k_turn         : unused (kept for signature compatibility)
    wheel_base     : unused (kept for signature compatibility)
    Returns (w_fl, w_fr, w_rl, w_rr).
    """
    # Clamp the action magnitude in the (vx, vy) unit disk.
    norm = math.hypot(vx, vy)
    if norm > 1.0:
        vx /= norm
        vy /= norm
    # World-frame velocity → body-frame velocity (rotate by -h).
    vx_world = vx * max_linear
    vy_world = vy * max_linear
    cos_h = math.cos(h)
    sin_h = math.sin(h)
    vx_body =  cos_h * vx_world + sin_h * vy_world
    vy_body = -sin_h * vx_world + cos_h * vy_world
    # Yaw rate: omega ∈ [-1, 1] maps to ± max_linear / (lx + ly) — same
    # peak yaw as the old "omega_extra" channel, but used directly
    # rather than added to a heading-tracker.
    yaw_max = max_linear / max(lx + ly, 1e-6)
    omega_rad = omega * yaw_max
    if abs(vx_body) < 1e-3 and abs(vy_body) < 1e-3 and abs(omega_rad) < 1e-3:
        return 0.0, 0.0, 0.0, 0.0
    return mecanum_inverse(
        vx_body, vy_body, omega_rad,
        wheel_radius, lx, ly, max_wheel_omega,
    )
@@ -0,0 +1,181 @@
 """Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987.
 Per-sheep behavioural step used by both the Webots sheep controller
 and the training environment. Each step a force stack is summed:
    flee       — quadratic ramp away from dog within FLEE_DIST
                 (Strömbom 2014, term ρa)
    cohesion   — drift toward local centre of mass of peers within
                 COHESION_DIST (Strömbom 2014, term c). Weight is
                 higher while fleeing — fear-induced cohesion.
    separation — short-range inverse-distance repulsion from peers
                 (Strömbom 2014 term α; Reynolds 1987)
    wander     — small persistent drift (Strömbom 2014 noise term ε)
 Walls, the south-wall gate column, and in-pen containment are
 environment-specific additions for the fenced Webots field.
 References
 ----------
 - Strömbom et al. (2014). "Solving the shepherding problem: heuristics
  for herding autonomous, interacting agents." J R Soc Interface 11.
 - Reynolds (1987). "Flocks, herds and schools: A distributed
  behavioural model." SIGGRAPH '87.
 """
 import math
 import random
 from herding.world.geometry import (
    FIELD_SHAPE, FIELD_ROUND_R,
    FIELD_X, FIELD_Y,
    PEN_X, PEN_Y,
    GATE_X, GATE_Y,
 )
 # Speeds are in wheel rad/s (motor units); m/s = speed * SHEEP_WHEEL_RADIUS.
 MAX_SPEED = 22.0
 FLEE_SPEED = 20.0
 WANDER_SPEED = 3.0
 WALL_MARGIN = 5.0
 WALL_HARD_MARGIN = 1.0
 WALL_HARD_GAIN = 50.0
 FLEE_DIST = 7.0
 SEPARATION_DIST = 2.5
 COHESION_DIST = 12.0
 PEN_MARGIN = 0.8
 def _peers_iter(peers):
    """Accept either a {name: (x, y)} dict or an iterable of (x, y) tuples."""
    if isinstance(peers, dict):
        return list(peers.values())
    return list(peers)
 def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
    """Return ``(heading, speed, new_wander_angle)`` for one sheep step.
    ``speed`` is in wheel rad/s, bounded by ``[WANDER_SPEED, FLEE_SPEED]``.
    ``heading`` is the world-frame target heading (atan2 convention).
    ``rng`` is an optional ``random.Random`` used for wander jitter; if
    ``None`` uses the module's global ``random``.
    """
    fx, fy = 0.0, 0.0
    peer_list = _peers_iter(peers)
    rnd = rng if rng is not None else random
    if penned:
        # Pen containment: bounce off all four pen walls.
        pm = PEN_MARGIN
        if x < PEN_X[0] + pm:
            fx += ((PEN_X[0] + pm - x) / pm) * 15.0
        if x > PEN_X[1] - pm:
            fx -= ((x - (PEN_X[1] - pm)) / pm) * 15.0
        if y < PEN_Y[0] + pm:
            fy += ((PEN_Y[0] + pm - y) / pm) * 15.0
        if y > PEN_Y[1] - pm:
            fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0
        # Mild peer separation so penned sheep don't crowd one corner.
        for px, py in peer_list:
            dx, dy = px - x, py - y
            d = math.hypot(dx, dy)
            if 0.05 < d < SEPARATION_DIST:
                push = (SEPARATION_DIST - d) / d
                fx -= (dx / d) * push * 2.5
                fy -= (dy / d) * push * 2.5
        if rnd.random() < 0.02:
            wander_angle += rnd.uniform(-0.6, 0.6)
        fx += math.cos(wander_angle) * 0.5
        fy += math.sin(wander_angle) * 0.5
    else:
        # Free-roaming sheep in the field.
        fleeing = False
        if dog_xy is not None:
            ddx = dog_xy[0] - x
            ddy = dog_xy[1] - y
            dist = math.hypot(ddx, ddy)
            if 0.01 < dist < FLEE_DIST:
                fleeing = True
                t = 1.0 - dist / FLEE_DIST
                s = t * t * 20.0
                fx -= (ddx / dist) * s
                fy -= (ddy / dist) * s
        # Cohesion: drift toward the local CoM of peers within
        # COHESION_DIST. Stronger while fleeing — fear-induced
        # cohesion keeps the flock together through the gate.
        cx, cy, cn = 0.0, 0.0, 0
        for px, py in peer_list:
            d = math.hypot(px - x, py - y)
            if 0.3 < d < COHESION_DIST:
                cx += px
                cy += py
                cn += 1
        if cn > 0:
            w = 3.0 if fleeing else 1.0
            fx += (cx / cn - x) * w
            fy += (cy / cn - y) * w
        # Separation — inverse-distance push from peers.
        for px, py in peer_list:
            ddx, ddy = px - x, py - y
            d = math.hypot(ddx, ddy)
            if 0.05 < d < SEPARATION_DIST:
                push = (SEPARATION_DIST - d) / d
                fx -= (ddx / d) * push * 2.5
                fy -= (ddy / d) * push * 2.5
        # Wall soft repulsion.
        if FIELD_SHAPE == "field_round":
            r = math.hypot(x, y)
            wall_d = FIELD_ROUND_R - r
            in_gate_col = (GATE_X[0] <= x <= GATE_X[1]
                           and y < GATE_Y + WALL_MARGIN)
            if wall_d < WALL_MARGIN and r > 1e-6 and not in_gate_col:
                gain = ((WALL_MARGIN - wall_d) / WALL_MARGIN) * 6.0
                fx -= (x / r) * gain
                fy -= (y / r) * gain
            # Hard escape band.
            if wall_d < WALL_HARD_MARGIN and not in_gate_col:
                hgain = WALL_HARD_GAIN * (1.0 - wall_d / WALL_HARD_MARGIN)
                fx -= (x / r) * hgain
                fy -= (y / r) * hgain
        else:
            # Rectangular: south wall absent inside the gate column.
            if x < FIELD_X[0] + WALL_MARGIN:
                fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
            if x > FIELD_X[1] - WALL_MARGIN:
                fx -= ((x - (FIELD_X[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
            if y > FIELD_Y[1] - WALL_MARGIN:
                fy -= ((y - (FIELD_Y[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
            if y < FIELD_Y[0] + WALL_MARGIN and not (GATE_X[0] <= x <= GATE_X[1]):
                fy += ((FIELD_Y[0] + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
            # Hard escape band — overrides everything else near a wall.
            m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
            if x - FIELD_X[0] < m:
                fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
            if FIELD_X[1] - x < m:
                fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
            if FIELD_Y[1] - y < m:
                fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
            if (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
                fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))
        if not fleeing:
            if rnd.random() < 0.02:
                wander_angle += rnd.uniform(-0.6, 0.6)
            fx += math.cos(wander_angle) * 0.5
            fy += math.sin(wander_angle) * 0.5
    heading = math.atan2(fy, fx)
    mag = math.hypot(fx, fy)
    speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
    return heading, speed, wander_angle
@@ -0,0 +1,185 @@
 """World geometry and robot specs.
 Coordinates are metres; (0, 0) is the field centre, +x east, +y north.
 These constants mirror ``worlds/field.wbt`` and the proto files — if
 the world changes, this file is the single point of update.
    field (rectangular)
    +-----------+
    |           |
    |  ......   |
    +---||||----+   y = -15  (south wall, 3 m gate at x in [10, 13])
        ||||
        |pen|       y in [-22, -15]
        +---+
    field_round (circular, R = 15 m)
         .---.
       /  ...  \\
      |  .....  |    gate at south, x in [-1.83, 1.83]
       \\  ...  /
         '-+-'       pen y in [-22, -15]
 """
 import os
 import math
 # ---------------------------------------------------------------------------
 # Field shape selection — controlled by HERDING_WORLD env var at runtime.
 # Defaults to "field" (rectangular). The launcher writes it into the
 # runtime cfg so the controller can pick it up too.
 # ---------------------------------------------------------------------------
 FIELD_SHAPE = (os.environ.get("HERDING_WORLD", "field")).lower()
 # ==================== Rectangular field (field.wbt) ====================
 FIELD_X = (-15.0, 15.0)
 FIELD_Y = (-15.0, 15.0)
 FIELD_INSIDE_MARGIN = 0.5
 # Pen (external, south of the field)
 PEN_X = (10.0, 13.0)
 PEN_Y = (-22.0, -15.0)
 PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
 PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
 # Gate (hole in the south wall)
 GATE_X = PEN_X
 GATE_Y = -15.0
 # ==================== Round field (field_round.wbt) ====================
 FIELD_ROUND_R = 15.0
 FIELD_ROUND_PEN_X = (-1.5, 1.5)
 FIELD_ROUND_PEN_Y = (-22.0, -15.0)
 FIELD_ROUND_PEN_CENTER = (
    0.5 * (FIELD_ROUND_PEN_X[0] + FIELD_ROUND_PEN_X[1]),
    0.5 * (FIELD_ROUND_PEN_Y[0] + FIELD_ROUND_PEN_Y[1]),
 )
 FIELD_ROUND_PEN_ENTRY = (0.0, -15.0)
 FIELD_ROUND_GATE_X = FIELD_ROUND_PEN_X
 FIELD_ROUND_GATE_Y = -15.0
 # ==================== Active geometry (resolved at import) ===============
 # Rectangular defaults are already assigned above.  Override for round.
 if FIELD_SHAPE == "field_round":
    PEN_X = FIELD_ROUND_PEN_X
    PEN_Y = FIELD_ROUND_PEN_Y
    PEN_CENTER = FIELD_ROUND_PEN_CENTER
    PEN_ENTRY = FIELD_ROUND_PEN_ENTRY
    GATE_X = FIELD_ROUND_GATE_X
    GATE_Y = FIELD_ROUND_GATE_Y
 def configure(shape: str) -> None:
    """Switch the active field geometry at runtime.
    Call this **before** importing any other ``herding.*`` module that
    depends on the constants below (flocking_sim, lidar_sim, obs, etc.).
    The import-time env-var path (``HERDING_WORLD``) still works; this
    function is for scripts that need to choose the world via a CLI flag.
    """
    global FIELD_SHAPE, PEN_X, PEN_Y, PEN_CENTER, PEN_ENTRY, GATE_X, GATE_Y
    shape = shape.lower()
    FIELD_SHAPE = shape
    if shape == "field_round":
        PEN_X = FIELD_ROUND_PEN_X
        PEN_Y = FIELD_ROUND_PEN_Y
        PEN_CENTER = FIELD_ROUND_PEN_CENTER
        PEN_ENTRY = FIELD_ROUND_PEN_ENTRY
        GATE_X = FIELD_ROUND_GATE_X
        GATE_Y = FIELD_ROUND_GATE_Y
    else:
        PEN_X = (10.0, 13.0)
        PEN_Y = (-22.0, -15.0)
        PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
        PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
        GATE_X = PEN_X
        GATE_Y = -15.0
 # Dog spec — protos/ShepherdDog.proto
 DOG_WHEEL_RADIUS = 0.038         # m
 DOG_WHEEL_BASE = 0.28            # m, axle-to-axle
 DOG_MAX_WHEEL_OMEGA = 70.0       # rad/s
 DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA  # ≈ 2.66 m/s
 # Dog mecanum spec — 4-wheel omnidirectional layout
 DOG_WHEEL_BASE_X = 0.28            # m, front-to-back axle distance
 DOG_WHEEL_BASE_Y = 0.28            # m, left-to-right axle distance
 # Sheep spec — protos/Sheep.proto
 SHEEP_WHEEL_RADIUS = 0.031       # m
 SHEEP_WHEEL_BASE = 0.20          # m
 SHEEP_MAX_WHEEL_OMEGA = 25.0     # rad/s
 SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA  # ≈ 0.78 m/s
 WEBOTS_DT = 0.016                # seconds (matches WorldInfo.basicTimeStep)
 # Virtual south wall — env and controller both keep the dog north of this.
 DOG_SOUTH_LIMIT = -14.5
 MAX_SHEEP = 10
 def in_pen(x: float, y: float) -> bool:
    """True if (x, y) lies inside the external pen rectangle."""
    return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
 def in_field(x: float, y: float, margin: float = 0.0) -> bool:
    if FIELD_SHAPE == "field_round":
        r = FIELD_ROUND_R - margin
        return x * x + y * y <= r * r
    return (FIELD_X[0] + margin <= x <= FIELD_X[1] - margin
            and FIELD_Y[0] + margin <= y <= FIELD_Y[1] - margin)
 def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:
    """True if (x, y) lies in the column of the gate (between field and pen)."""
    return (GATE_X[0] - margin <= x <= GATE_X[1] + margin
            and PEN_Y[0] - margin <= y <= GATE_Y + margin)
 def is_penned_position(x: float, y: float, latch_margin: float = 0.2) -> bool:
    """True iff (x, y) is in the gate column and south of the gate line."""
    return (GATE_X[0] - latch_margin <= x <= GATE_X[1] + latch_margin
            and y <= GATE_Y)
 def distance_to_pen_entry(x: float, y: float) -> float:
    return math.hypot(x - PEN_ENTRY[0], y - PEN_ENTRY[1])
 def distance_to_wall(x: float, y: float) -> float:
    """Shortest distance from (x, y) to the nearest field wall.
    For a rectangular field this is the minimum Manhattan distance to the
    four bounding walls.  For a round field it is ``R - sqrt(x²+y²)``.
    Returns a negative value if the point is outside the field.
    """
    if FIELD_SHAPE == "field_round":
        return FIELD_ROUND_R - math.hypot(x, y)
    return min(
        x - FIELD_X[0], FIELD_X[1] - x,
        y - FIELD_Y[0], FIELD_Y[1] - y,
    )
 def clip_to_field(x: float, y: float, margin: float = 0.2) -> tuple[float, float]:
    """Clip (x, y) inside the field boundary with a small margin.
    For round fields the point is projected radially inward if it exceeds
    the circular boundary.
    """
    if FIELD_SHAPE == "field_round":
        r = math.hypot(x, y)
        limit = FIELD_ROUND_R - margin
        if r > limit and r > 1e-6:
            scale = limit / r
            return x * scale, y * scale
        return x, y
    return (
        max(FIELD_X[0] + margin, min(FIELD_X[1] - margin, x)),
        max(FIELD_Y[0] + margin, min(FIELD_Y[1] - margin, y)),
    )
@@ -0,0 +1,885 @@
 #VRML_SIM R2025a utf8
 # Shepherd Dog Robot - mecanum-wheeled base with dog character on top
 # 4-wheel omnidirectional drive (front-left, front-right, rear-left, rear-right).
 PROTO ShepherdDogMecanum [
  field SFVec3f    translation     0 0 0
  field SFRotation rotation        0 1 0 0
  field SFString   name            "ShepherdDog"
  field SFString   controller      "shepherd_dog"
  field MFString   controllerArgs  []
  field SFString   customData      ""
  field SFBool     supervisor      FALSE
  field SFBool     synchronization TRUE
 ]
 {
  Robot {
    translation IS translation
    rotation IS rotation
    name IS name
    controller IS controller
    controllerArgs IS controllerArgs
    customData IS customData
    supervisor IS supervisor
    synchronization IS synchronization
    children [
      # ========== CHASSIS / BASE ==========
      DEF CHASSIS Transform {
        translation 0 0 0.05
        children [
          Shape {
            appearance DEF CHASSIS_APP PBRAppearance {
              baseColor 0.2 0.2 0.2
              roughness 0.6
              metalness 0.3
            }
            geometry Box {
              size 0.32 0.16 0.06
            }
          }
        ]
      }
      # Front slope
      DEF CHASSIS_FRONT Transform {
        translation 0.14 0 0.07
        children [
          Shape {
            appearance USE CHASSIS_APP
            geometry Box {
              size 0.06 0.14 0.04
            }
          }
        ]
      }
      # Rear slope
      DEF CHASSIS_REAR Transform {
        translation -0.14 0 0.07
        children [
          Shape {
            appearance USE CHASSIS_APP
            geometry Box {
              size 0.06 0.14 0.04
            }
          }
        ]
      }
      # ========== DOG BODY on top of chassis ==========
      DEF BODY Transform {
        translation 0 0 0.11
        children [
          Shape {
            appearance DEF FUR_BROWN PBRAppearance {
              baseColor 0.55 0.35 0.17
              roughness 0.85
              metalness 0.0
            }
            geometry Box {
              size 0.30 0.16 0.08
            }
          }
        ]
      }
      # ========== CHEST ==========
      DEF CHEST Transform {
        translation 0.12 0 0.11
        children [
          Shape {
            appearance DEF FUR_CREAM PBRAppearance {
              baseColor 0.85 0.72 0.55
              roughness 0.85
              metalness 0.0
            }
            geometry Box {
              size 0.08 0.18 0.08
            }
          }
        ]
      }
      # ========== HEAD ==========
      DEF HEAD Transform {
        translation 0.20 0 0.17
        children [
          Shape {
            appearance USE FUR_BROWN
            geometry Box {
              size 0.10 0.12 0.09
            }
          }
        ]
      }
      # ========== SNOUT + LIDAR ==========
      DEF SNOUT Transform {
        translation 0.28 0 0.155
        children [
          Shape {
            appearance USE FUR_CREAM
            geometry Box {
              size 0.08 0.07 0.05
            }
          }
          # Nose
          Transform {
            translation 0.04 0 0.01
            children [
              Shape {
                appearance PBRAppearance {
                  baseColor 0.1 0.1 0.1
                  roughness 0.4
                }
                geometry Sphere {
                  radius 0.013
                  subdivision 2
                }
              }
            ]
          }
          # Lidar — front-facing 140° FOV, mounted at snout tip
          Lidar {
            translation 0.05 0 0.01
            name "lidar"
            horizontalResolution 180
            fieldOfView 2.44
            numberOfLayers 1
            minRange 0.10
            maxRange 12.0
            noise 0.005
          }
        ]
      }
      # ========== LEFT EAR ==========
      DEF LEFT_EAR HingeJoint {
        jointParameters HingeJointParameters {
          axis 0 0 1
          anchor 0.19 0.055 0.21
        }
        device [
          RotationalMotor {
            name "left ear motor"
            maxVelocity 10.0
            minPosition -0.5
            maxPosition 0.5
          }
        ]
        endPoint Solid {
          translation 0.19 0.055 0.21
          rotation 0 0 1 0.2
          name "left ear"
          children [
            Shape {
              appearance DEF FUR_DARK PBRAppearance {
                baseColor 0.35 0.20 0.10
                roughness 0.85
                metalness 0.0
              }
              geometry Box {
                size 0.035 0.025 0.06
              }
            }
          ]
          boundingObject Box {
            size 0.035 0.025 0.06
          }
          physics Physics {
            density -1
            mass 0.005
          }
        }
      }
      # ========== RIGHT EAR ==========
      DEF RIGHT_EAR HingeJoint {
        jointParameters HingeJointParameters {
          axis 0 0 1
          anchor 0.19 -0.055 0.21
        }
        device [
          RotationalMotor {
            name "right ear motor"
            maxVelocity 10.0
            minPosition -0.5
            maxPosition 0.5
          }
        ]
        endPoint Solid {
          translation 0.19 -0.055 0.21
          rotation 0 0 -1 0.2
          name "right ear"
          children [
            Shape {
              appearance USE FUR_DARK
              geometry Box {
                size 0.035 0.025 0.06
              }
            }
          ]
          boundingObject Box {
            size 0.035 0.025 0.06
          }
          physics Physics {
            density -1
            mass 0.005
          }
        }
      }
      # ========== EYES ==========
      DEF LEFT_EYE Transform {
        translation 0.25 0.05 0.19
        children [
          Shape {
            appearance PBRAppearance {
              baseColor 0.95 0.95 0.95
              roughness 0.3
            }
            geometry Sphere {
              radius 0.016
              subdivision 2
            }
          }
          # Pupil
          Transform {
            translation 0.012 0 0.004
            children [
              Shape {
                appearance PBRAppearance {
                  baseColor 0.1 0.1 0.1
                  roughness 0.2
                }
                geometry Sphere {
                  radius 0.009
                  subdivision 2
                }
              }
            ]
          }
        ]
      }
      DEF RIGHT_EYE Transform {
        translation 0.25 -0.05 0.19
        children [
          Shape {
            appearance PBRAppearance {
              baseColor 0.95 0.95 0.95
              roughness 0.3
            }
            geometry Sphere {
              radius 0.016
              subdivision 2
            }
          }
          # Pupil
          Transform {
            translation 0.012 0 0.004
            children [
              Shape {
                appearance PBRAppearance {
                  baseColor 0.1 0.1 0.1
                  roughness 0.2
                }
                geometry Sphere {
                  radius 0.009
                  subdivision 2
                }
              }
            ]
          }
        ]
      }
      # ========== COLLAR ==========
      DEF COLLAR Transform {
        translation 0.16 0 0.125
        children [
          Shape {
            appearance PBRAppearance {
              baseColor 0.8 0.1 0.1
              roughness 0.5
            }
            geometry Cylinder {
              height 0.02
              radius 0.095
              subdivision 16
            }
          }
          # ID tag
          Transform {
            translation 0 0.10 0
            rotation 1 0 0 1.5708
            children [
              Shape {
                appearance PBRAppearance {
                  baseColor 0.75 0.75 0.0
                  metalness 0.8
                  roughness 0.2
                }
                geometry Cylinder {
                  height 0.003
                  radius 0.018
                  subdivision 8
                }
              }
            ]
          }
        ]
      }
      # ========== TAIL (lidar inside tail tip ball) ==========
      DEF TAIL HingeJoint {
        jointParameters HingeJointParameters {
          axis 0 1 0
          anchor -0.15 0 0.11
        }
        device [
          RotationalMotor {
            name "tail motor"
            maxVelocity 5.0
            minPosition -1.0
            maxPosition 1.0
          }
        ]
        endPoint Solid {
          translation -0.17 0 0.13
          name "tail solid"
          children [
            Shape {
              appearance USE FUR_BROWN
              geometry Capsule {
                height 0.12
                radius 0.013
                top FALSE
              }
            }
            # Tail tip ball
            Transform {
              translation 0 0 0.08
              children [
                Shape {
                  appearance PBRAppearance {
                    baseColor 0.2 0.2 0.2
                    roughness 0.3
                    metalness 0.6
                  }
                  geometry Sphere {
                    radius 0.028
                    subdivision 4
                  }
                }
              ]
            }
          ]
          boundingObject Group {
            children [
              Capsule {
                height 0.12
                radius 0.013
              }
              Transform {
                translation 0 0 0.08
                children [
                  Sphere {
                    radius 0.028
                  }
                ]
              }
            ]
          }
          physics Physics {
            density -1
            mass 0.08
          }
        }
      }
      # ========== AXLE ARMS (4 corners) ==========
      DEF FRONT_RIGHT_AXLE Transform {
        translation 0.14 -0.115 0.038
        children [
          Shape {
            appearance PBRAppearance {
              baseColor 0.5 0.5 0.5
              roughness 0.3
              metalness 0.8
            }
            geometry Box {
              size 0.02 0.08 0.02
            }
          }
        ]
      }
      DEF FRONT_LEFT_AXLE Transform {
        translation 0.14 0.115 0.038
        children [
          Shape {
            appearance PBRAppearance {
              baseColor 0.5 0.5 0.5
              roughness 0.3
              metalness 0.8
            }
            geometry Box {
              size 0.02 0.08 0.02
            }
          }
        ]
      }
      DEF REAR_RIGHT_AXLE Transform {
        translation -0.14 -0.115 0.038
        children [
          Shape {
            appearance PBRAppearance {
              baseColor 0.5 0.5 0.5
              roughness 0.3
              metalness 0.8
            }
            geometry Box {
              size 0.02 0.08 0.02
            }
          }
        ]
      }
      DEF REAR_LEFT_AXLE Transform {
        translation -0.14 0.115 0.038
        children [
          Shape {
            appearance PBRAppearance {
              baseColor 0.5 0.5 0.5
              roughness 0.3
              metalness 0.8
            }
            geometry Box {
              size 0.02 0.08 0.02
            }
          }
        ]
      }
      # ========== FRONT RIGHT WHEEL ==========
      DEF FRONT_RIGHT_WHEEL_JOINT HingeJoint {
        jointParameters HingeJointParameters {
          axis 0 1 0
          anchor 0.14 -0.14 0.038
        }
        device [
          RotationalMotor {
            name "front right wheel motor"
            maxVelocity 70.0
            maxTorque 20.0
          }
          PositionSensor {
            name "front right wheel sensor"
            resolution 0.00628
          }
        ]
        endPoint Solid {
          translation 0.14 -0.14 0.038
          rotation 0 -1 0 1.570796
          children [
            DEF WHEEL_VIS Pose {
              rotation 1 0 0 -1.5708
              children [
                # Hub drum
                Shape {
                  appearance PBRAppearance {
                    baseColor 0.5 0.5 0.5
                    roughness 0.3
                    metalness 0.7
                  }
                  geometry Cylinder {
                    height 0.018
                    radius 0.022
                    subdivision 16
                  }
                }
                # Axle boss
                Shape {
                  appearance PBRAppearance {
                    baseColor 0.6 0.6 0.6
                    roughness 0.2
                    metalness 0.8
                  }
                  geometry Cylinder {
                    height 0.022
                    radius 0.008
                    subdivision 8
                  }
                }
                # Mecanum roller 1 (top, +y)
                DEF ROLLER_1 Pose {
                  translation 0 0.031 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
                # Mecanum roller 2 (right, +x)
                DEF ROLLER_2 Pose {
                  translation 0.031 0 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
                # Mecanum roller 3 (bottom, -y)
                DEF ROLLER_3 Pose {
                  translation 0 -0.031 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
                # Mecanum roller 4 (left, -x)
                DEF ROLLER_4 Pose {
                  translation -0.031 0 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
                # Mecanum roller 5 (diagonal +x+y)
                DEF ROLLER_5 Pose {
                  translation 0.022 0.022 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
                # Mecanum roller 6 (diagonal +x-y)
                DEF ROLLER_6 Pose {
                  translation 0.022 -0.022 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
                # Mecanum roller 7 (diagonal -x-y)
                DEF ROLLER_7 Pose {
                  translation -0.022 -0.022 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
                # Mecanum roller 8 (diagonal -x+y)
                DEF ROLLER_8 Pose {
                  translation -0.022 0.022 0
                  rotation 0 0 1 0.7854
                  children [
                    Shape {
                      appearance PBRAppearance {
                        baseColor 0.12 0.12 0.12
                        roughness 0.7
                        metalness 0.1
                      }
                      geometry Capsule {
                        height 0.020
                        radius 0.007
                        subdivision 8
                      }
                    }
                  ]
                }
              ]
            }
          ]
          name "front right wheel"
          contactMaterial "MecanumWheel"
          boundingObject Pose {
            rotation 1 0 0 -1.5708
            children [
              Cylinder {
                height 0.022
                radius 0.038
              }
            ]
          }
          physics Physics {
            density -1
            mass 0.06
            centerOfMass [
              0 0 0
            ]
          }
        }
      }
      # ========== FRONT LEFT WHEEL ==========
      DEF FRONT_LEFT_WHEEL_JOINT HingeJoint {
        jointParameters HingeJointParameters {
          axis 0 1 0
          anchor 0.14 0.14 0.038
        }
        device [
          RotationalMotor {
            name "front left wheel motor"
            maxVelocity 70.0
            maxTorque 20.0
          }
          PositionSensor {
            name "front left wheel sensor"
            resolution 0.00628
          }
        ]
        endPoint Solid {
          translation 0.14 0.14 0.038
          rotation 0.707105 0 0.707109 -3.14159
          children [
            USE WHEEL_VIS
          ]
          name "front left wheel"
          contactMaterial "MecanumWheel"
          boundingObject Pose {
            rotation 1 0 0 -1.5708
            children [
              Cylinder {
                height 0.022
                radius 0.038
              }
            ]
          }
          physics Physics {
            density -1
            mass 0.06
            centerOfMass [
              0 0 0
            ]
          }
        }
      }
      # ========== REAR RIGHT WHEEL ==========
      DEF REAR_RIGHT_WHEEL_JOINT HingeJoint {
        jointParameters HingeJointParameters {
          axis 0 1 0
          anchor -0.14 -0.14 0.038
        }
        device [
          RotationalMotor {
            name "rear right wheel motor"
            maxVelocity 70.0
            maxTorque 20.0
          }
          PositionSensor {
            name "rear right wheel sensor"
            resolution 0.00628
          }
        ]
        endPoint Solid {
          translation -0.14 -0.14 0.038
          rotation 0 -1 0 1.570796
          children [
            USE WHEEL_VIS
          ]
          name "rear right wheel"
          contactMaterial "MecanumWheel"
          boundingObject Pose {
            rotation 1 0 0 -1.5708
            children [
              Cylinder {
                height 0.022
                radius 0.038
              }
            ]
          }
          physics Physics {
            density -1
            mass 0.06
            centerOfMass [
              0 0 0
            ]
          }
        }
      }
      # ========== REAR LEFT WHEEL ==========
      DEF REAR_LEFT_WHEEL_JOINT HingeJoint {
        jointParameters HingeJointParameters {
          axis 0 1 0
          anchor -0.14 0.14 0.038
        }
        device [
          RotationalMotor {
            name "rear left wheel motor"
            maxVelocity 70.0
            maxTorque 20.0
          }
          PositionSensor {
            name "rear left wheel sensor"
            resolution 0.00628
          }
        ]
        endPoint Solid {
          translation -0.14 0.14 0.038
          rotation 0.707105 0 0.707109 -3.14159
          children [
            USE WHEEL_VIS
          ]
          name "rear left wheel"
          contactMaterial "MecanumWheel"
          boundingObject Pose {
            rotation 1 0 0 -1.5708
            children [
              Cylinder {
                height 0.022
                radius 0.038
              }
            ]
          }
          physics Physics {
            density -1
            mass 0.06
            centerOfMass [
              0 0 0
            ]
          }
        }
      }
      # ========== IMU SENSORS ==========
      Accelerometer {
        translation 0 0 0.10
        name "accelerometer"
      }
      Gyro {
        translation 0 0 0.10
        name "gyro"
      }
      Compass {
        translation 0 0 0.10
        name "compass"
      }
      # ========== GPS ==========
      GPS {
        translation 0 0 0.17
        name "gps"
      }
      # ========== RECEIVER ==========
      Receiver {
        name "receiver"
        channel 1
      }
      # ========== EMITTER ==========
      Emitter {
        name "emitter"
        channel 1
        range 50.0
      }
    ]
    # ========== BOUNDING OBJECT ==========
    boundingObject Group {
      children [
        # Chassis box
        Transform {
          translation 0 0 0.05
          children [
            Box {
              size 0.32 0.16 0.06
            }
          ]
        }
        # Body box
        Transform {
          translation 0 0 0.11
          children [
            Box {
              size 0.30 0.16 0.08
            }
          ]
        }
      ]
    }
    # ========== PHYSICS ==========
    physics Physics {
      density -1
      mass 5.0
      centerOfMass [
        0 0 0.03
      ]
    }
  }
 }
@@ -0,0 +1,8 @@
 """Pytest configuration — ensure the project root is on ``sys.path``."""
 import os
 import sys
 _PROJECT_ROOT = os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
 if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)
@@ -0,0 +1,188 @@
 """Control primitives: speed modulation, Strömbom, Sequential, ActiveScan."""
 import math
 import pytest
 from herding.control.active_scan import (
    EMPTY_DEBOUNCE_STEPS, INITIAL_SCAN_STEPS, ActiveScanTeacher,
 )
 from herding.control.modulation import (
    MIN_SPEED, SLOW_NEAR_SHEEP, modulate_speed_near_sheep,
 )
 from herding.control.sequential import compute_action as sequential_action
 from herding.control.strombom import (
    DELTA_DRIVE, F_FACTOR, compute_action as strombom_action,
 )
 from herding.control.universal import compute_action as universal_action
 from herding.world.geometry import PEN_ENTRY
 # ---------------------------------------------------------------------------
 # Modulation
 # ---------------------------------------------------------------------------
 def test_modulation_empty_input_passthrough():
    assert modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), []) == (1.0, 0.0)
    assert modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), {}) == (1.0, 0.0)
 def test_modulation_far_sheep_passthrough():
    vx, vy = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), [(100.0, 0.0)])
    assert (vx, vy) == (1.0, 0.0)
 def test_modulation_close_sheep_min_speed():
    vx, vy = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), [(0.0, 0.0)])
    assert math.isclose(vx, MIN_SPEED)
    assert vy == 0.0
 def test_modulation_preserves_direction():
    vx, vy = modulate_speed_near_sheep(0.6, 0.8, (0.0, 0.0), [(1.0, 0.0)])
    ratio = math.hypot(vx, vy)
    # Direction preserved.
    assert math.isclose(vx / ratio, 0.6, abs_tol=1e-6)
    assert math.isclose(vy / ratio, 0.8, abs_tol=1e-6)
 def test_modulation_linear_ramp_midpoint():
    vx, _ = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0),
                                      [(SLOW_NEAR_SHEEP / 2, 0.0)])
    expected = MIN_SPEED + (1.0 - MIN_SPEED) * 0.5
    assert math.isclose(vx, expected, abs_tol=1e-6)
 def test_modulation_accepts_dict_input():
    vx_list, _ = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0),
                                           [(1.0, 0.0)])
    vx_dict, _ = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0),
                                           {"t0": (1.0, 0.0)})
    assert math.isclose(vx_list, vx_dict)
 # ---------------------------------------------------------------------------
 # Strömbom
 # ---------------------------------------------------------------------------
 def test_strombom_empty_input_idle():
    vx, vy, mode = strombom_action((0.0, 0.0), {}, PEN_ENTRY)
    assert (vx, vy, mode) == (0.0, 0.0, "idle")
 def test_strombom_tight_flock_drives():
    # A tight 3-sheep cluster centred at (0, 8): radius < F_FACTOR·√3.
    sheep = {"s0": (0.0, 8.0), "s1": (0.5, 8.5), "s2": (-0.5, 8.0)}
    vx, vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
    assert mode == "drive"
    assert math.isclose(math.hypot(vx, vy), 1.0, abs_tol=1e-3)
 def test_strombom_scattered_flock_collects():
    # Sparse, max radius > F_FACTOR·√n.
    sheep = {"s0": (10.0, 10.0), "s1": (-10.0, -10.0), "s2": (0.0, 0.0)}
    _vx, _vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
    assert mode == "collect"
 def test_strombom_ignores_already_penned_sheep():
    """Sheep south of the gate plane are excluded from the active set."""
    sheep = {
        "s_active": (5.0, 5.0),
        "s_penned": (11.5, -20.0),
    }
    # With one active sheep, Strömbom drives (radius = 0 < threshold).
    _vx, _vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
    assert mode == "drive"
 # ---------------------------------------------------------------------------
 # Sequential
 # ---------------------------------------------------------------------------
 def test_sequential_empty_input_idle():
    vx, vy, mode = sequential_action((0.0, 0.0), {}, PEN_ENTRY)
    assert (vx, vy, mode) == (0.0, 0.0, "idle")
 def test_sequential_targets_closest_to_pen():
    near = (10.0, -5.0)       # closer to pen entry (11.5, -15)
    far = (-10.0, 10.0)
    sheep = {"near": near, "far": far}
    _vx, _vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY)
    assert mode.startswith("drive:near")
 # ---------------------------------------------------------------------------
 # ActiveScan wrapper
 # ---------------------------------------------------------------------------
 def test_active_scan_initial_phase_rotates():
    teacher = ActiveScanTeacher(strombom_action)
    # First call → opening rotation regardless of input.
    vx, vy, omega, mode = teacher(
        (0.0, 0.0), 0.0, {"s0": (5.0, 0.0)}, PEN_ENTRY)
    assert mode == "scan_initial"
    assert omega == 0.0
    assert math.isclose(math.hypot(vx, vy), 1.0, abs_tol=1e-6)
 def test_active_scan_hands_off_to_base_after_opener():
    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=2)
    # Burn through the opener.
    for _ in range(2):
        teacher((0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
    _vx, _vy, _omega, mode = teacher(
        (0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
    # Either drive (Strömbom mode label) or collect; not scan_initial.
    assert "scan" not in mode
 def test_active_scan_holds_last_action_on_brief_empty():
    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=1)
    # Step once (opening), then once with a visible sheep — sets last_action.
    teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
    teacher((0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
    last = teacher.last_action
    # Now a single empty frame → hold.
    vx, vy, _omega, mode = teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
    assert mode == "hold"
    assert (vx, vy) == last
 def test_active_scan_explores_after_sustained_empty():
    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=1)
    teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)  # opener
    for _ in range(EMPTY_DEBOUNCE_STEPS):
        last_vx, last_vy, _omega, mode = teacher(
            (5.0, 5.0), 0.0, {}, PEN_ENTRY)
    assert mode in ("explore", "scan_at_centre")
 def test_active_scan_preserves_mecanum_omega():
    """Regression: ActiveScanTeacher must propagate omega from a mecanum
    base teacher, not silently drop it. Without this, BC mecanum demos
    have omega=0 everywhere and the policy never learns to rotate.
    """
    teacher = ActiveScanTeacher(universal_action, initial_scan_steps=1)
    # Burn the opener so we exit phase 1.
    teacher((0.0, 0.0), 0.0, {"s0": (8.0, 8.0)}, PEN_ENTRY,
            drive_mode="mecanum")
    # Place a sheep off to the side so the dog needs to face it.
    # Dog at origin facing +x (heading=0); target at (0, 8) → desired
    # heading +π/2, so omega should be positive.
    vx, vy, omega, mode = teacher(
        (0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY,
        drive_mode="mecanum")
    assert mode in ("collect", "drive", "recovery")
    assert abs(omega) > 0.05, f"omega should be non-zero on mecanum, got {omega}"
 def test_active_scan_reset_clears_state():
    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=5)
    for _ in range(3):
        teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
    assert teacher.step == 3
    teacher.reset()
    assert teacher.step == 0
    assert teacher.empty_streak == 0
@@ -0,0 +1,192 @@
 """Differential-drive and mecanum kinematics tests."""
 import math
 import pytest
 from herding.world.diffdrive import (
    heading_speed_to_wheels, kinematics_step,
    mecanum_inverse, mecanum_kinematics_step,
    velocity_to_mecanum_wheels, velocity_to_wheels,
 )
 WHEEL_R = 0.038
 WHEEL_B = 0.28
 MAX_OMEGA = 70.0
 MAX_LINEAR = WHEEL_R * MAX_OMEGA
 DT = 0.016
 def test_kinematics_zero_input_is_identity():
    x, y, h = kinematics_step(1.0, 2.0, 0.5, 0.0, 0.0, WHEEL_R, WHEEL_B, DT)
    assert (x, y, h) == (1.0, 2.0, 0.5)
 def test_kinematics_forward_motion():
    # Equal wheel speeds → pure translation along the heading.
    x, y, h = kinematics_step(0.0, 0.0, 0.0, 10.0, 10.0, WHEEL_R, WHEEL_B, DT)
    assert h == 0.0
    assert math.isclose(x, 10.0 * WHEEL_R * DT)
    assert y == 0.0
 def test_kinematics_pure_rotation():
    # Opposite wheel speeds → pure rotation, position unchanged.
    x, y, h = kinematics_step(0.0, 0.0, 0.0, -5.0, 5.0, WHEEL_R, WHEEL_B, DT)
    assert (x, y) == (0.0, 0.0)
    assert h > 0.0
 def test_kinematics_heading_wrapped_to_pi():
    _, _, h = kinematics_step(0.0, 0.0, math.pi - 0.01, 100.0, -100.0,
                              WHEEL_R, WHEEL_B, DT)
    assert -math.pi <= h <= math.pi
 def test_velocity_to_wheels_zero_velocity():
    left, right = velocity_to_wheels(0.0, 0.0, 0.0,
                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA)
    assert (left, right) == (0.0, 0.0)
 def test_velocity_to_wheels_aligned_forward():
    # Target straight ahead → equal positive wheel speeds.
    left, right = velocity_to_wheels(1.0, 0.0, 0.0,
                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=4.0)
    assert math.isclose(left, right, abs_tol=1e-6)
    assert left > 0.0
 def test_velocity_to_wheels_perpendicular_target_spins():
    # Target 90° from heading → forward speed ≈ 0, wheels equal-and-opposite.
    left, right = velocity_to_wheels(0.0, 1.0, 0.0,
                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=4.0)
    assert left + right == pytest.approx(0.0, abs=1e-6)
    assert right > 0.0  # turning CCW (left of heading is +y for h=0)
 def test_velocity_to_wheels_clamped_to_max_omega():
    # Far overshoot — both wheel commands clamped at ±MAX_OMEGA.
    left, right = velocity_to_wheels(-1.0, 0.0, 0.0,
                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=20.0)
    assert -MAX_OMEGA <= left <= MAX_OMEGA
    assert -MAX_OMEGA <= right <= MAX_OMEGA
 def test_heading_speed_to_wheels_aligned():
    left, right = heading_speed_to_wheels(0.0, 10.0, 0.0, MAX_OMEGA)
    assert math.isclose(left, right, abs_tol=1e-6)
    assert left > 0.0
 def test_heading_speed_to_wheels_reverse_target_forwards_zero():
    left, right = heading_speed_to_wheels(math.pi, 10.0, 0.0, MAX_OMEGA)
    # cos(π) clamped at 0 → no forward; pure rotation.
    assert left + right == pytest.approx(0.0, abs=1e-6)
 # ---------------------------------------------------------------------------
 # Mecanum kinematics tests
 # ---------------------------------------------------------------------------
 LX = 0.14   # half wheel_base_x
 LY = 0.14   # half wheel_base_y
 def test_mecanum_kinematics_zero_is_identity():
    x, y, h = mecanum_kinematics_step(
        1.0, 2.0, 0.5, 0.0, 0.0, 0.0, 0.0, WHEEL_R, LX, LY, DT,
    )
    assert (x, y, h) == (1.0, 2.0, 0.5)
 def test_mecanum_kinematics_pure_forward():
    # All 4 wheels equal → pure forward (vx_body > 0, vy_body = 0).
    w = 10.0
    x, y, h = mecanum_kinematics_step(
        0.0, 0.0, 0.0, w, w, w, w, WHEEL_R, LX, LY, DT,
    )
    assert h == pytest.approx(0.0, abs=1e-9)
    assert y == pytest.approx(0.0, abs=1e-9)
    assert math.isclose(x, w * WHEEL_R * DT, rel_tol=1e-6)
 def test_mecanum_kinematics_pure_strafe():
    # Strafe right (positive vy_body) with zero forward:
    # vx_body = (w_fl+w_fr+w_rl+w_rr)*r/4 = 0 → sum of wheels = 0
    # vy_body = (-w_fl+w_fr+w_rl-w_rr)*r/4 > 0
    # Use w_fl=-10, w_fr=10, w_rl=10, w_rr=-10.
    w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, 10.0, -10.0
    x, y, h = mecanum_kinematics_step(
        0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
    )
    assert h == pytest.approx(0.0, abs=1e-9)
    assert x == pytest.approx(0.0, abs=1e-9)
    expected_vy = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
    assert math.isclose(y, expected_vy * DT, rel_tol=1e-6)
 def test_mecanum_kinematics_pure_rotation():
    # Pure rotation: vx_body=0, vy_body=0, omega>0.
    # w_fl=-10, w_fr=10, w_rl=-10, w_rr=10 → all sums cancel except omega.
    w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, -10.0, 10.0
    x, y, h = mecanum_kinematics_step(
        0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
    )
    assert x == pytest.approx(0.0, abs=1e-9)
    assert y == pytest.approx(0.0, abs=1e-9)
    assert h > 0.0
 def test_mecanum_inverse_roundtrip():
    # Inverse → forward: pick desired body velocities, compute wheels,
    # then verify forward kinematics recovers the same velocities.
    vx_b = 0.5
    vy_b = 0.3
    omega = 0.2
    w_fl, w_fr, w_rl, w_rr = mecanum_inverse(
        vx_b, vy_b, omega, WHEEL_R, LX, LY, MAX_OMEGA,
    )
    vx_check = (w_fl + w_fr + w_rl + w_rr) * WHEEL_R / 4.0
    vy_check = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
    omega_check = (-w_fl + w_fr - w_rl + w_rr) * WHEEL_R / (4.0 * (LX + LY))
    assert math.isclose(vx_b, vx_check, rel_tol=1e-6)
    assert math.isclose(vy_b, vy_check, rel_tol=1e-6)
    assert math.isclose(omega, omega_check, rel_tol=1e-6)
 def test_mecanum_inverse_clamped():
    # Request an extreme velocity — all wheels should be clamped.
    w_fl, w_fr, w_rl, w_rr = mecanum_inverse(
        100.0, 100.0, 50.0, WHEEL_R, LX, LY, MAX_OMEGA,
    )
    assert max(abs(w_fl), abs(w_fr), abs(w_rl), abs(w_rr)) <= MAX_OMEGA
 def test_velocity_to_mecanum_wheels_zero():
    result = velocity_to_mecanum_wheels(
        0.0, 0.0, 0.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
        wheel_base=WHEEL_B,
    )
    assert result == (0.0, 0.0, 0.0, 0.0)
 def test_velocity_to_mecanum_wheels_forward():
    w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
        1.0, 0.0, 0.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
        wheel_base=WHEEL_B,
    )
    # All 4 wheels should be positive and roughly equal.
    assert all(w > 0.0 for w in (w_fl, w_fr, w_rl, w_rr))
    assert math.isclose(w_fl, w_rr, rel_tol=1e-6)
    assert math.isclose(w_fr, w_rl, rel_tol=1e-6)
 def test_velocity_to_mecanum_wheels_clamped():
    # Extreme input — all wheels within max.
    ws = velocity_to_mecanum_wheels(
        1.0, 1.0, 1.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
        wheel_base=WHEEL_B,
    )
    assert all(abs(w) <= MAX_OMEGA for w in ws)
@@ -0,0 +1,116 @@
 """Gymnasium env: contract, determinism, reward components."""
 import math
 import numpy as np
 import pytest
 from herding.world.geometry import MAX_SHEEP, PEN_ENTRY
 from herding.perception.obs import OBS_DIM
 from herding.control.strombom import compute_action as strombom_action
 from training.herding_env import HerdingEnv
 def test_env_obs_action_shapes_single_frame():
    env = HerdingEnv(n_sheep=3, seed=0, use_lidar=False)
    obs, info = env.reset()
    assert obs.shape == (OBS_DIM,)
    assert obs.dtype == np.float32
    obs, reward, term, trunc, info = env.step(
        np.array([0.5, 0.0], dtype=np.float32))
    assert obs.shape == (OBS_DIM,)
    assert isinstance(reward, float)
    assert isinstance(term, bool) and isinstance(trunc, bool)
 def test_env_observation_space_matches_frame_stack():
    env = HerdingEnv(n_sheep=2, seed=0, use_lidar=False, frame_stack=4)
    obs, _ = env.reset()
    assert obs.shape == (OBS_DIM * 4,)
    assert env.observation_space.shape == (OBS_DIM * 4,)
 def test_env_reset_determinism_same_seed():
    a = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
    b = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
    obs_a, _ = a.reset(seed=42)
    obs_b, _ = b.reset(seed=42)
    assert np.allclose(obs_a, obs_b)
 def test_env_constructor_seed_applies_to_first_reset():
    a = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
    b = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
    obs_a, _ = a.reset()
    obs_b, _ = b.reset()
    assert np.allclose(obs_a, obs_b)
 def test_env_curriculum_samples_full_range():
    env = HerdingEnv(seed=0, use_lidar=False)
    sizes = set()
    for _ in range(40):
        _, info = env.reset()
        sizes.add(info["n_sheep"])
    assert 1 in sizes
    assert max(sizes) <= MAX_SHEEP
 def test_env_step_returns_finite_values():
    env = HerdingEnv(n_sheep=2, max_steps=200, seed=1, use_lidar=False)
    obs, _ = env.reset()
    for _ in range(200):
        action = np.array([0.5, 0.5], dtype=np.float32)
        obs, reward, term, trunc, _ = env.step(action)
        assert np.isfinite(obs).all()
        assert math.isfinite(reward)
        if term or trunc:
            break
 def test_env_options_n_sheep_overrides_curriculum():
    env = HerdingEnv(seed=0, use_lidar=False)
    _, info = env.reset(options={"n_sheep": 7})
    assert info["n_sheep"] == 7
 def test_env_perceived_positions_lidar_vs_privileged():
    env_priv = HerdingEnv(n_sheep=3, seed=0, use_lidar=False)
    env_priv.reset(seed=0)
    pos_priv = env_priv.perceived_positions()
    assert len(pos_priv) == 3
    env_lidar = HerdingEnv(n_sheep=3, seed=0, use_lidar=True)
    env_lidar.reset(seed=0)
    pos_lidar = env_lidar.perceived_positions()
    # LiDAR mode returns whatever the tracker has — may be fewer than 3
    # if sheep are out of FOV / range, but never more.
    assert len(pos_lidar) <= 3
 def test_env_set_time_weight_affects_reward():
    env = HerdingEnv(n_sheep=1, seed=0, use_lidar=False)
    env.reset(seed=0)
    _, r_default, *_ = env.step(np.array([0.0, 0.0], dtype=np.float32))
    env.set_time_weight(-1.0)
    env.reset(seed=0)
    _, r_penalised, *_ = env.step(np.array([0.0, 0.0], dtype=np.float32))
    assert r_penalised < r_default
 def test_env_strombom_rollout_moves_dog():
    env = HerdingEnv(n_sheep=2, max_steps=400, seed=1, use_lidar=False)
    env.reset()
    start = (env.dog_x, env.dog_y)
    for _ in range(400):
        positions = env.perceived_positions()
        if not positions:
            break
        vx, vy, _ = strombom_action(
            (env.dog_x, env.dog_y), positions, PEN_ENTRY)
        obs, _r, term, trunc, _ = env.step(
            np.array([vx, vy], dtype=np.float32))
        if term or trunc:
            break
    displacement = math.hypot(env.dog_x - start[0], env.dog_y - start[1])
    assert displacement > 0.05
@@ -0,0 +1,75 @@
 """Geometric predicates and constants."""
 import math
 from herding.world.geometry import (
    FIELD_X, FIELD_Y, GATE_X, GATE_Y, MAX_SHEEP, PEN_ENTRY, PEN_X, PEN_Y,
    distance_to_pen_entry, in_field, in_gate_corridor, in_pen,
    is_penned_position,
 )
 def test_field_dimensions():
    assert FIELD_X == (-15.0, 15.0)
    assert FIELD_Y == (-15.0, 15.0)
 def test_pen_geometry():
    assert PEN_X == (10.0, 13.0)
    assert PEN_Y == (-22.0, -15.0)
    assert PEN_ENTRY == (11.5, -15.0)
    assert GATE_X == PEN_X
    assert GATE_Y == -15.0
 def test_in_pen_strict_interior():
    assert in_pen(11.5, -18.0)
    assert not in_pen(10.0, -18.0)        # boundary excluded
    assert not in_pen(11.5, -15.0)        # gate plane excluded
    assert not in_pen(0.0, 0.0)
 def test_in_field_with_margin():
    assert in_field(0.0, 0.0)
    assert in_field(14.0, 14.0)
    assert not in_field(15.5, 0.0)
    assert in_field(14.4, 0.0, margin=0.5)
    assert not in_field(14.6, 0.0, margin=0.5)
 def test_in_gate_corridor():
    assert in_gate_corridor(11.5, -18.0)
    assert in_gate_corridor(10.0, -15.0)
    assert not in_gate_corridor(11.5, -10.0)
    assert not in_gate_corridor(5.0, -18.0)
 def test_is_penned_position_latches_below_gate():
    # In the gate column and south of the gate plane → penned.
    assert is_penned_position(11.5, -15.0)
    assert is_penned_position(10.5, -18.0)
    assert is_penned_position(12.5, -22.0)
    # Above the gate plane → not yet.
    assert not is_penned_position(11.5, -14.9)
    # Outside the gate column → not penned even if south.
    assert not is_penned_position(0.0, -16.0)
    assert not is_penned_position(14.0, -16.0)
 def test_is_penned_position_latch_margin():
    # Slight tolerance on the gate column.
    assert is_penned_position(9.9, -15.5)
    assert is_penned_position(13.1, -15.5)
    assert not is_penned_position(9.7, -15.5)
 def test_distance_to_pen_entry():
    assert distance_to_pen_entry(*PEN_ENTRY) == 0.0
    assert math.isclose(distance_to_pen_entry(11.5, -10.0), 5.0)
    assert math.isclose(distance_to_pen_entry(0.0, 0.0),
                        math.hypot(11.5, 15.0))
 def test_max_sheep_positive_int():
    assert isinstance(MAX_SHEEP, int)
    assert MAX_SHEEP >= 1
@@ -0,0 +1,71 @@
 """Observation builder — shape, normalisation, order invariance."""
 import math
 import numpy as np
 import pytest
 from herding.perception.obs import OBS_DIM, build_obs
 def test_obs_shape_and_dtype():
    obs = build_obs((0.0, 0.0), 0.0, [(5.0, 5.0)], [False])
    assert obs.shape == (OBS_DIM,)
    assert obs.dtype == np.float32
 def test_obs_no_active_sheep_terminal():
    # All sheep penned → flock-summary fields zero, count zero.
    obs = build_obs((0.0, 0.0), 0.0, [(1.0, 1.0), (2.0, 2.0)], [True, True])
    assert obs[19] == 0.0
    # Aggregate fields (CoM, radius, std, vectors) should all be zero.
    assert np.allclose(obs[4:12], 0.0)
 def test_obs_dog_pose_normalised():
    obs = build_obs((15.0, -15.0), math.pi / 2, [(0.0, 0.0)], [False])
    assert math.isclose(obs[0], 1.0)
    assert math.isclose(obs[1], -1.0)
    assert math.isclose(obs[2], math.cos(math.pi / 2), abs_tol=1e-6)
    assert math.isclose(obs[3], math.sin(math.pi / 2), abs_tol=1e-6)
 def test_obs_order_invariance():
    """Sheep order in the input list must not affect the observation."""
    sheep = [(3.0, 2.0), (-5.0, 1.0), (0.0, 8.0)]
    p = [False] * 3
    a = build_obs((0.0, 0.0), 0.0, sheep, p)
    b = build_obs((0.0, 0.0), 0.0, list(reversed(sheep)), list(reversed(p)))
    assert np.allclose(a, b)
 def test_obs_count_field_normalised_by_n_max():
    sheep = [(1.0, 1.0)] * 5
    p = [False] * 5
    obs = build_obs((0.0, 0.0), 0.0, sheep, p, n_max=10)
    assert math.isclose(obs[19], 0.5)
 def test_obs_polar_histogram_sums_to_one():
    sheep = [(1.0, 0.0), (-1.0, 0.0), (0.0, 1.0), (0.0, -1.0)]
    obs = build_obs((0.0, 0.0), 0.0, sheep, [False] * 4)
    assert math.isclose(float(obs[20:28].sum()), 1.0, abs_tol=1e-6)
 def test_obs_named_channels_closest_rearmost():
    # Channels 28..29 = (closest_to_pen - dog) / 15
    # Channels 30..31 = (rearmost - dog) / 15
    pen_x, pen_y = 11.5, -15.0
    near = (pen_x + 1.0, pen_y + 1.0)
    far = (-10.0, 10.0)
    obs = build_obs((0.0, 0.0), 0.0, [near, far], [False, False])
    tol = 1e-5
    assert math.isclose(obs[28], near[0] / 15.0, abs_tol=tol)
    assert math.isclose(obs[29], near[1] / 15.0, abs_tol=tol)
    assert math.isclose(obs[30], far[0] / 15.0, abs_tol=tol)
    assert math.isclose(obs[31], far[1] / 15.0, abs_tol=tol)
 def test_obs_pen_vector_zero_at_pen_entry():
    obs = build_obs((11.5, -15.0), 0.0, [(0.0, 0.0)], [False])
    assert math.isclose(obs[14], 0.0)         # distance to pen
@@ -0,0 +1,166 @@
 """LiDAR simulation + perception pipeline + multi-target tracker."""
 import math
 import numpy as np
 import pytest
 from herding.perception.lidar_perception import (
    STATIC_REJECT, detections_from_scan,
 )
 from herding.perception.lidar_sim import (
    LIDAR_MAX_RANGE, LIDAR_N_RAYS, SHEEP_RADIUS, ray_angles, simulate_scan,
 )
 from herding.perception.sheep_tracker import (
    FORGET_STEPS, GATE_M, MAX_ACTIVE_TRACKS, REACQUIRE_GATE_M,
    REACQUIRE_MIN_AGE, SheepTracker,
 )
 # ---------------------------------------------------------------------------
 # Sim
 # ---------------------------------------------------------------------------
 def test_simulate_scan_shape_and_dtype():
    ranges = simulate_scan(0.0, 0.0, 0.0, [(5.0, 0.0)], noise=0.0)
    assert ranges.shape == (LIDAR_N_RAYS,)
    assert ranges.dtype == np.float32
 def test_simulate_scan_no_sheep_far_from_walls():
    # Dog at origin, no sheep, walls all ≥ 15 m away → all rays at max.
    ranges = simulate_scan(0.0, 0.0, 0.0, [], noise=0.0)
    # Walls (east/west at ±15) are beyond LIDAR_MAX_RANGE=12, so no hits.
    assert (ranges == LIDAR_MAX_RANGE).all()
 def test_simulate_scan_sheep_in_front_returns_centre_hit():
    # Sheep dead ahead at 5 m. Centre ray should hit ~ 5 - SHEEP_RADIUS.
    ranges = simulate_scan(0.0, 0.0, 0.0, [(5.0, 0.0)], noise=0.0)
    centre = ranges[LIDAR_N_RAYS // 2]
    assert math.isclose(float(centre), 5.0 - SHEEP_RADIUS, abs_tol=0.01)
 def test_simulate_scan_sheep_behind_dog_not_hit():
    # With 360° FOV, a sheep behind the dog IS now hit.
    ranges = simulate_scan(0.0, 0.0, 0.0, [(-5.0, 0.0)], noise=0.0)
    assert (ranges < LIDAR_MAX_RANGE).any()
    # Verify the closest hit is near 5m (sheep at distance 5).
    assert float(ranges.min()) < 5.3
 def test_simulate_scan_wall_hit():
    # Dog 1 m south of the north wall, facing north → centre ray ≈ 1 m.
    ranges = simulate_scan(0.0, 14.0, math.pi / 2, [], noise=0.0)
    centre = ranges[LIDAR_N_RAYS // 2]
    assert math.isclose(float(centre), 1.0, abs_tol=0.01)
 # ---------------------------------------------------------------------------
 # Perception
 # ---------------------------------------------------------------------------
 def test_detections_recover_sheep_position():
    sheep = [(5.0, 0.0), (3.0, 1.0)]
    ranges = simulate_scan(0.0, 0.0, 0.0, sheep, noise=0.0)
    det = detections_from_scan(ranges, 0.0, 0.0, 0.0)
    assert len(det) == 2
    # Centroid bias is corrected to within ~5 cm.
    for truth in sheep:
        assert any(math.hypot(d[0] - truth[0], d[1] - truth[1]) < 0.1
                   for d in det)
 def test_detections_filter_gate_post():
    # An empty scene at the dog right next to a gate post produces no
    # detections — the static-feature filter drops the post return.
    ranges = simulate_scan(11.5, -10.0, -math.pi / 2, [], noise=0.0)
    det = detections_from_scan(ranges, 11.5, -10.0, -math.pi / 2)
    for cx, cy in det:
        assert math.hypot(cx - 10.0, cy + 15.0) > STATIC_REJECT
        assert math.hypot(cx - 13.0, cy + 15.0) > STATIC_REJECT
 def test_detections_empty_scan_returns_nothing():
    assert detections_from_scan(np.array([], dtype=np.float32),
                                0.0, 0.0, 0.0) == []
 # ---------------------------------------------------------------------------
 # Tracker
 # ---------------------------------------------------------------------------
 def test_tracker_creates_track_for_new_detection():
    t = SheepTracker()
    t.update([(5.0, 0.0)])
    assert t.n_active() == 1
 def test_tracker_associates_close_detections():
    """A small movement within the gate keeps the same track."""
    t = SheepTracker()
    t.update([(5.0, 0.0)])
    t.update([(5.5, 0.0)])
    assert t.n_active() == 1
 def test_tracker_spawns_new_track_far_detection():
    t = SheepTracker()
    t.update([(5.0, 0.0)])
    t.update([(-5.0, 0.0)])           # well outside the gate
    assert t.n_active() == 2
 def test_tracker_reacquisition_for_stale_track():
    """A stale track within the wider re-acquisition gate rebinds rather
    than spawning a duplicate."""
    t = SheepTracker()
    t.update([(0.0, 0.0)])
    # Let it go stale.
    for _ in range(REACQUIRE_MIN_AGE):
        t.update([])
    # Re-emerges within REACQUIRE_GATE but outside the primary GATE.
    offset = (GATE_M + REACQUIRE_GATE_M) / 2.0
    t.update([(offset, 0.0)])
    assert t.n_active() == 1
 def test_tracker_forgets_stale_tracks():
    t = SheepTracker()
    t.update([(0.0, 0.0)])
    for _ in range(FORGET_STEPS + 1):
        t.update([])
    assert t.n_active() == 0
 def test_tracker_penned_position_promotes_track():
    t = SheepTracker()
    t.update([(11.5, -16.0)])         # spawn inside the pen column
    # is_penned_position is True for this point.
    assert t.n_penned() == 1
    assert t.n_active() == 0
 def test_tracker_penned_tracks_persist():
    t = SheepTracker()
    t.update([(11.5, -16.0)])
    for _ in range(FORGET_STEPS * 2):
        t.update([])
    # Penned tracks are not forgotten.
    assert t.n_penned() == 1
 def test_tracker_caps_active_set():
    t = SheepTracker()
    # Spawn more than the cap, each well outside the others' gates.
    for k in range(MAX_ACTIVE_TRACKS + 5):
        t.update([(k * (GATE_M + 1.0), 0.0)])
    assert t.n_active() <= MAX_ACTIVE_TRACKS
 def test_tracker_reset_clears_state():
    t = SheepTracker()
    t.update([(0.0, 0.0)])
    t.reset()
    assert t.n_active() == 0
    assert t.step == 0
@@ -0,0 +1,84 @@
 """Benchmark LiDAR perception improvements.
 Measures success rate, mean steps, and tracker quality metrics for
 demo collection across multiple seeds. Compares configurations.
 Usage::
    python -m tools.benchmark_lidar --n-sheep 5 --seeds 15
    HERDING_WORLD=field_round python -m tools.benchmark_lidar --n-sheep 5
 """
 from __future__ import annotations
 import argparse
 import time
 from collections import Counter
 from training.bc.collect import collect_one
 from herding.control.universal import compute_action
 def run_benchmark(n_sheep: int, n_seeds: int, max_steps: int = 100000,
                  drive_mode: str = "differential"):
    results = []
    t0 = time.time()
    for seed in range(n_seeds):
        obs, actions, success, steps = collect_one(
            n_sheep, seed, max_steps, 5, compute_action,
            frame_stack=1, privileged=False, drive_mode=drive_mode,
        )
        results.append({
            "seed": seed,
            "success": success,
            "steps": steps,
            "logged": len(obs),
        })
        tag = "+" if success else "x"
        print(f"  [{tag}] seed={seed:>2d}  steps={steps:>6d}")
    elapsed = time.time() - t0
    successes = [r for r in results if r["success"]]
    failures = [r for r in results if not r["success"]]
    n_ok = len(successes)
    rate = 100.0 * n_ok / len(results)
    mean_steps_ok = (sum(r["steps"] for r in successes) / n_ok) if n_ok else 0
    mean_steps_all = sum(r["steps"] for r in results) / len(results)
    print(f"\n  Results: {n_ok}/{len(results)} success ({rate:.0f}%)")
    print(f"  Mean steps (success): {mean_steps_ok:>8.0f}")
    print(f"  Mean steps (all):     {mean_steps_all:>8.0f}")
    print(f"  Elapsed: {elapsed:.0f}s")
    return {
        "n_sheep": n_sheep,
        "n_seeds": n_seeds,
        "success_rate": rate,
        "n_success": n_ok,
        "mean_steps_success": mean_steps_ok,
        "mean_steps_all": mean_steps_all,
        "elapsed_s": elapsed,
    }
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--n-sheep", type=int, default=5)
    parser.add_argument("--seeds", type=int, default=15)
    parser.add_argument("--max-steps", type=int, default=100000)
    parser.add_argument("--drive-mode", default="differential",
                        choices=["differential", "mecanum"])
    args = parser.parse_args()
    from herding.world.geometry import FIELD_SHAPE
    print(f"[bench] world={FIELD_SHAPE}  n_sheep={args.n_sheep}  "
          f"seeds={args.seeds}  drive={args.drive_mode}")
    print()
    result = run_benchmark(args.n_sheep, args.seeds, args.max_steps,
                           args.drive_mode)
    print()
    print("[bench] summary:", result)
 if __name__ == "__main__":
    main()
@@ -1,22 +0,0 @@
 """
 Viewpoint inspector — prints position, orientation and FOV to the console
 once per second.  Attach as the controller of a dummy supervisor robot to
 copy-paste exact camera values into field.wbt.
 """
 from controller import Supervisor
 robot    = Supervisor()
 timestep = int(robot.getBasicTimeStep())
 vp       = robot.getFromDef("VIEWPOINT")
 step = 0
 while robot.step(timestep) != -1:
    if step % 60 == 0:
        pos = vp.getField("position").getSFVec3f()
        ori = vp.getField("orientation").getSFRotation()
        fov = vp.getField("fieldOfView").getSFFloat()
        print(f"position:    {pos[0]:.3f} {pos[1]:.3f} {pos[2]:.3f}")
        print(f"orientation: {ori[0]:.3f} {ori[1]:.3f} {ori[2]:.3f} {ori[3]:.3f}")
        print(f"fieldOfView: {fov:.3f}\n")
    step += 1
@@ -0,0 +1,174 @@
 #!/bin/bash
 # Launch Webots with N sheep enabled and the chosen controller mode.
 # Generates a temporary world file in worlds/field_test.wbt with sheep
 # beyond N commented out, sets the env vars the dog controller reads,
 # then execs Webots on it.
 #
 # Usage:
 #   tools/run_webots.sh [N] [MODE] [DRIVE] [WORLD]
 #     N     : number of active sheep (1..10), default 10
 #     MODE  : "bc" | "rl" | "strombom" | "sequential", default "bc"
 #     DRIVE : "differential" | "mecanum", default "differential"
 #     WORLD : base world name (without .wbt), default "field"
 #             Supported: "field" (rectangular), "field_round" (circular)
 #
 # Examples:
 #   tools/run_webots.sh 10 bc                     # behaviour-cloned MLP, diff drive
 #   tools/run_webots.sh 10 rl mecanum             # KL-PPO fine-tune, mecanum wheels
 #   tools/run_webots.sh 5 sequential field_round  # analytic baseline, round field
 #   tools/run_webots.sh 3 strombom mecanum field_round  # Strömbom, mecanum, round
 #
 # Notes:
 # * bc loads training/runs/bc/policy.zip, rl loads training/runs/rl.
 #   Override via HERDING_POLICY_DIR=/path/to/run env var.
 # * Conda env "tir" must be active (provides stable-baselines3 + torch).
 #
 # Headless-ish (no 3D view, fast sim, no modal dialogs):
 #   WEBOTS_HEADLESS=1 make webots N=10 MODE=rl DRIVE=mecanum
 #   WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
 # This passes --no-rendering --minimize --mode=fast --batch to webots.
 # Webots still needs a display (Qt); on a machine without one use e.g.:
 #   xvfb-run -a env WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
 # Optional extra CLI tokens (space-separated):
 #   WEBOTS_EXTRA_ARGS="--stdout --stderr" WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl
 set -e
 N=${1:-10}
 MODE=${2:-bc}
 DRIVE=${3:-differential}
 WORLD=${4:-field}
 if (( N < 1 || N > 10 )); then
    echo "N must be 1..10, got $N" >&2; exit 1
 fi
 case "$MODE" in
    bc|rl|strombom|sequential|universal) ;;
    *) echo "MODE must be bc|rl|strombom|sequential|universal, got '$MODE'" >&2; exit 1 ;;
 esac
 case "$DRIVE" in
    differential|mecanum) ;;
    *) echo "DRIVE must be differential|mecanum, got '$DRIVE'" >&2; exit 1 ;;
 esac
 ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
 SRC="$ROOT/worlds/${WORLD}.wbt"
 if [[ ! -f "$SRC" ]]; then
    echo "World file not found: $SRC" >&2; exit 1
 fi
 DST="$ROOT/worlds/${WORLD}_test.wbt"
 if [[ -n "${HERDING_POLICY_DIR:-}" ]]; then
    RESOLVED_POLICY_DIR="$HERDING_POLICY_DIR"
 else
    # Try drive-mode-specific path first, then legacy path.
    if [[ "$MODE" == "rl" ]]; then
        DRIVED="$ROOT/training/runs/rl_${DRIVE}"
        LEGACY="$ROOT/training/runs/rl"
    else
        DRIVED="$ROOT/training/runs/bc_${DRIVE}"
        LEGACY="$ROOT/training/runs/bc"
    fi
    if [[ -d "$DRIVED" ]]; then
        RESOLVED_POLICY_DIR="$DRIVED"
    else
        RESOLVED_POLICY_DIR="$LEGACY"
    fi
 fi
 cp "$SRC" "$DST"
 # Swap robot proto based on drive mode.
 # Base worlds reference ShepherdDog (diff-drive). For mecanum we swap in
 # ShepherdDogMecanum and inject mecanum contact properties.
 if [[ "$DRIVE" == "mecanum" ]]; then
    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
    sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
    # Inject mecanum contact properties after the existing contactProperties block.
    python3 -c "
 import re, sys
 with open(sys.argv[1], 'r') as f:
    txt = f.read()
 # Find the closing ']' of contactProperties and insert before it.
 mec = '''
    ContactProperties {
      material1 \"MecanumWheel\"
      coulombFriction [
        2
      ]
      bounce 0
      forceDependentSlip [
        10
      ]
      softCFM 0.0001
    }'''
 # Insert before the first ']' that closes contactProperties [...]
 txt = re.sub(r'(contactProperties\s*\[[^\]]*)(\])', r'\1' + mec + r'\2', txt, count=1)
 with open(sys.argv[1], 'w') as f:
    f.write(txt)
 " "$DST"
 fi
 # Comment out sheep N+1..10 by prefixing the matching Sheep { ... } line.
 for i in $(seq $((N+1)) 10); do
    sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST"
 done
 active=$(grep -c '^Sheep' "$DST")
 echo "------------------------------------------------------------"
 echo "World      : $DST"
 echo "Mode       : $MODE"
 echo "Drive      : $DRIVE"
 echo "Sheep      : $active active"
 echo "Policy dir : $RESOLVED_POLICY_DIR"
 echo "------------------------------------------------------------"
 # Webots strips HERDING_* env vars from controller subprocesses in some
 # setups, so we also write a runtime config file the controller reads.
 cat > "$ROOT/herding_runtime.cfg" <<EOF
 HERDING_MODE=$MODE
 HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
 HERDING_DRIVE=$DRIVE
 HERDING_WORLD=$WORLD
 EOF
 export HERDING_MODE="$MODE"
 export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
 export HERDING_DRIVE="$DRIVE"
 export HERDING_WORLD="$WORLD"
 # The controller writes this sentinel when all GT sheep are penned. We
 # poll for it and kill Webots so the run finishes cleanly instead of
 # idling for minutes after the task is done.
 DONE_FILE="$ROOT/training/.run_done"
 mkdir -p "$(dirname "$DONE_FILE")"
 rm -f "$DONE_FILE"
 if [[ "${WEBOTS_HEADLESS:-}" == "1" ]]; then
    echo "[run_webots] headless flags: --no-rendering --minimize --mode=fast --batch"
    # shellcheck disable=SC2086
    webots --no-rendering --minimize --mode=fast --batch ${WEBOTS_EXTRA_ARGS:-} "$DST" &
 else
    # shellcheck disable=SC2086
    webots ${WEBOTS_EXTRA_ARGS:-} "$DST" &
 fi
 WEBOTS_PID=$!
 cleanup() {
    kill "$WEBOTS_PID" 2>/dev/null || true
    wait "$WEBOTS_PID" 2>/dev/null || true
    exit 0
 }
 trap cleanup INT TERM
 # Poll for the sentinel; bail when Webots exits on its own or when the
 # user closes the window.
 while kill -0 "$WEBOTS_PID" 2>/dev/null; do
    if [[ -f "$DONE_FILE" ]]; then
        echo "[run_webots] all sheep penned — closing Webots"
        sleep 1                       # let the controller print its line
        kill "$WEBOTS_PID" 2>/dev/null || true
        break
    fi
    sleep 1
 done
 wait "$WEBOTS_PID" 2>/dev/null || true
@@ -0,0 +1,90 @@
 # Training and Evaluation Details
 This file is the command-level companion to the root README. It focuses
 on data collection, BC, PPO fine-tuning, evaluation flags, and generated
 artifacts; use the root README for the high-level architecture and
 Webots demo quick start.
 Two stages, strictly sequential:
 ```
 sim demos (Strömbom on tracker output, K=4 frame stack)
    │
    ▼
 bc/pretrain.py  ──►  runs/bc   (Strömbom-imitated MLP)
    │
    ▼  KL-regularised PPO fine-tune
    │
 runs/rl                        (deployed `rl` mode — beats BC and Strömbom)
 ```
 ## Files
 ```
 herding_env.py     — Gymnasium env (LiDAR raycast + tracker by default)
 bc/pretrain.py     — MSE + cosine BC of (obs, action) demos into MlpPolicy
 rl/train.py       — KL-regularised PPO fine-tune of a BC checkpoint
 eval.py            — multi-seed analytic / learned policy comparison
 runs/              — checkpoints (whitelisted entries in top-level .gitignore)
 (Unit + integration tests live in the top-level ``tests/`` directory;
 run with ``python -m pytest tests/``.)
 ```
 ## End-to-end pipeline
 The simplest way to run everything is the Makefile at the project
 root: ``make`` does the full chain, ``make rl`` rebuilds whatever's
 needed up to that point, etc. The individual stages below are kept
 explicit for cases where you want to tune a single step.
 ```bash
 # 1. Sim demos with the active-scan + Strömbom teacher under LiDAR
 #    perception. K=4 frame stack so the MLP has temporal context.
 python -m training.bc.collect --teacher strombom \
    --out training/bc/demos.npz --seeds-per-n 15 --subsample 3 --frame-stack 4
 # 2. Behaviour-clone.
 python -m training.bc.pretrain --demos training/bc/demos.npz \
    --out training/runs/bc --epochs 60 --net-arch 512,512
 # 3. KL-regularised PPO fine-tune of bc.
 python -m training.rl.train \
    --bc training/runs/bc --out training/runs/rl \
    --total-timesteps 1000000
 # 4. Multi-seed eval (env-side, fast).
 python -m training.eval --policy training/runs/rl \
    --max-flock 10 --max-steps 15000 --n-seeds 10
 ```
 `bc/pretrain.py` saves the **best-val_cos** snapshot, not the final
 epoch — multi-modal teachers make training noisy and the last epoch is
 often worse than an earlier one.
 `rl/train.py` loads BC weights into both a trainable policy and a
 frozen reference, fixes `log_std` small, and adds `β · KL(π‖π_ref)` to
 the loss so the policy can only move within a trust region around BC.
 See the file header for hyperparameter rationale.
 ## Available analytic teachers
 | Name | What it does | Notes |
 |---|---|---|
 | `strombom` | Strömbom 2014 — collect when flock is scattered, drive CoM otherwise | Default; works for n=1–10 under tight cohesion |
 | `sequential` | Pick the sheep closest to the pen and drive only it | Alternative; needs loose-cohesion regime |
 Both are wrapped at demo-collection time in
 `herding/control/active_scan.py:ActiveScanTeacher`, which adds an
 opening in-place rotation, walk-to-centre when the LiDAR sees
 nothing, and near-sheep speed modulation (same modulation
 `herding/control/modulation.py` applies to every dog mode at
 inference).
 ## Evaluating analytic teachers directly
 ```
 python -m training.eval --policy strombom    --max-flock 10 --max-steps 15000 --n-seeds 10
 python -m training.eval --policy sequential  --max-flock 10 --max-steps 15000 --n-seeds 10
 ```
@@ -0,0 +1,211 @@
 """Collect (obs, action) demonstrations from an analytic teacher.
 Runs the chosen teacher across a grid of ``(n_sheep, seed)`` combos at
 full difficulty, logs every Nth ``(obs, action)`` pair, and saves
 successful trajectories to ``.npz`` for behaviour cloning. The teacher
 is wrapped in :class:`ActiveScanTeacher` by default so it operates on
 the same partial-obs view the student will have at deployment.
 Usage::
    python -m training.bc.collect --teacher strombom \\
        --out training/bc/demos.npz --frame-stack 4
 """
 from __future__ import annotations
 import argparse
 import os
 import time
 from pathlib import Path
 import numpy as np
 # Early CLI parse so we can configure geometry before heavy imports.
 # (argparse is used again below for the full parse; this is a lightweight
 # pre-pass that only reads --world.)
 _pre_argv = [a for a in os.sys.argv[1:]]
 _pre_world = None
 for i, a in enumerate(_pre_argv):
    if a == "--world" and i + 1 < len(_pre_argv):
        _pre_world = _pre_argv[i + 1]
        break
    if a.startswith("--world="):
        _pre_world = a.split("=", 1)[1]
        break
 if _pre_world is not None:
    from herding.world.geometry import configure as _geo_configure
    _geo_configure(_pre_world)
    os.environ["HERDING_WORLD"] = _pre_world
 from herding.control.active_scan import ActiveScanTeacher
 from herding.world.geometry import PEN_ENTRY, FIELD_SHAPE
 from herding.control.sequential import compute_action as sequential_action
 from herding.control.strombom import compute_action as strombom_action
 from herding.control.universal import compute_action as universal_action
 from training.herding_env import HerdingEnv
 TEACHERS = {
    "sequential": sequential_action,
    "strombom": strombom_action,
    "universal": universal_action,
 }
 def _call_teacher(fn, dog_xy, dog_heading, sheep_positions, pen_target,
                  drive_mode="differential"):
    """Call any teacher function and return (vx, vy, omega, mode).
    Normalizes across 3-tuple teachers (vx, vy, mode) and 4-tuple
    universal teacher (vx, vy, omega, mode).  ActiveScanTeacher (when
    invoked with drive_mode="mecanum") propagates the base teacher's
    omega — see test_active_scan_preserves_mecanum_omega.
    """
    # The universal teacher and ActiveScanTeacher accept the extended
    # (dog_xy, heading, sheep, pen, drive_mode) signature.  Older
    # teachers accept (dog_xy, sheep, pen).  Detect by trying the
    # extended call first.
    try:
        result = fn(dog_xy, dog_heading, sheep_positions, pen_target,
                    drive_mode)
    except TypeError:
        try:
            result = fn(dog_xy, dog_heading, sheep_positions, pen_target)
        except TypeError:
            result = fn(dog_xy, sheep_positions, pen_target)
    if len(result) == 4:
        return result  # (vx, vy, omega, mode)
    vx, vy, mode = result
    return vx, vy, 0.0, mode
 def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
                teacher_fn, frame_stack: int = 1, privileged: bool = False,
                drive_mode: str = "differential"):
    env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
                    difficulty=1.0, seed=seed, frame_stack=frame_stack,
                    drive_mode=drive_mode)
    obs, _ = env.reset(seed=seed)
    obs_list, action_list = [], []
    scan_teacher = ActiveScanTeacher(teacher_fn)
    for step in range(max_steps):
        if privileged:
            positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
                         for i in range(env.n_sheep) if not env.sheep_penned[i]}
            if not positions:
                break
            vx, vy, omega, _mode = _call_teacher(
                teacher_fn, (env.dog_x, env.dog_y), env.dog_heading,
                positions, PEN_ENTRY, drive_mode,
            )
        else:
            positions = env.perceived_positions()
            result = _call_teacher(
                scan_teacher, (env.dog_x, env.dog_y), env.dog_heading,
                positions, PEN_ENTRY, drive_mode,
            )
            vx, vy, omega, _mode = result
        if drive_mode == "mecanum":
            action = np.array([vx, vy, omega], dtype=np.float32)
        else:
            action = np.array([vx, vy], dtype=np.float32)
        if step % subsample == 0:
            obs_list.append(obs.copy())
            action_list.append(action.copy())
        obs, _r, term, trunc, _info = env.step(action)
        if term or trunc:
            break
    success = bool(env.sheep_penned.all())
    return (
        np.asarray(obs_list, dtype=np.float32),
        np.asarray(action_list, dtype=np.float32),
        success,
        env.steps,
    )
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--out", default="training/bc/demos.npz")
    parser.add_argument("--n-sheep-list", default="1,2,3,5,8,10")
    parser.add_argument("--seeds-per-n", type=int, default=15)
    parser.add_argument("--max-steps", type=int, default=30000)
    parser.add_argument("--subsample", type=int, default=5,
                        help="Keep every Nth (obs, action) pair.")
    parser.add_argument("--keep-failures", action="store_true",
                        help="Include partial-success trajectories. Default off.")
    parser.add_argument("--teacher", default="universal",
                        choices=list(TEACHERS.keys()),
                        help="Which analytic teacher to demonstrate.")
    parser.add_argument("--frame-stack", type=int, default=1,
                        help="Concatenate the last K obs into a "
                             "(32·K)-D vector for the policy.")
    parser.add_argument("--privileged", action="store_true",
                        help="Teacher reads ground truth instead of "
                             "tracker output (asymmetric BC).")
    parser.add_argument("--drive-mode", default="differential",
                        choices=["differential", "mecanum"],
                        help="Drive mode for the dog robot.")
    parser.add_argument("--world", default=None,
                        choices=["field", "field_round"],
                        help="World shape. If not set, uses HERDING_WORLD "
                             "env var or defaults to 'field'. Must be set "
                             "before geometry is imported.")
    args = parser.parse_args()
    # Validate --world matches geometry (already configured by the
    # early pre-parse above, or by HERDING_WORLD env var).
    if args.world is not None and args.world != FIELD_SHAPE:
        print(f"[demos] WARNING: --world={args.world} but geometry is "
              f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
    teacher_fn = TEACHERS[args.teacher]
    print(f"[demos] teacher: {args.teacher}  world: {FIELD_SHAPE}")
    n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
    print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
          f"max_steps={args.max_steps}, subsample={args.subsample}")
    all_obs, all_actions, all_meta = [], [], []
    t_start = time.time()
    n_success = 0; n_total = 0
    for n in n_sheep_list:
        for seed in range(args.seeds_per_n):
            obs, actions, success, total_steps = collect_one(
                n, seed, args.max_steps, args.subsample, teacher_fn,
                frame_stack=args.frame_stack, privileged=args.privileged,
                drive_mode=args.drive_mode,
            )
            n_total += 1
            if success:
                n_success += 1
            keep = success or args.keep_failures
            if keep and len(obs) > 0:
                all_obs.append(obs)
                all_actions.append(actions)
                all_meta.append((n, seed, len(obs), int(success), total_steps))
            tag = "✓" if success else "✗"
            print(f"  [{tag}] n={n:>2d} seed={seed:>2d}  steps={total_steps:>6d}  "
                  f"logged={len(obs):>5d}")
    if not all_obs:
        raise RuntimeError("No trajectories kept — try --keep-failures.")
    obs = np.concatenate(all_obs, axis=0)
    actions = np.concatenate(all_actions, axis=0)
    meta = np.array(all_meta, dtype=np.int32)
    Path(args.out).parent.mkdir(parents=True, exist_ok=True)
    np.savez(args.out, obs=obs, actions=actions, meta=meta)
    elapsed = time.time() - t_start
    print(f"\n=== {n_success}/{n_total} trajectories successful ({100*n_success/n_total:.0f}%) ===")
    print(f"=== {len(obs)} transitions saved to {args.out} ===")
    print(f"=== obs={obs.shape}, actions={actions.shape}, elapsed={elapsed:.0f}s ===")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,235 @@
 """Behaviour cloning of an analytic teacher into an SB3 MlpPolicy.
 Trains the mean-action head against ``(obs, action)`` demos from
 ``training.bc.collect`` using ``MSE + (1 − cos_sim)`` — the cosine
 term prevents collapse toward zero against unit-vector targets. The
 best-by-val_cos snapshot is restored at the end of training because
 multi-modal teachers make the last epoch unreliable.
 Output zip is loadable by ``PPO.load(...)`` and consumed by
 ``HERDING_MODE=bc`` in the dog controller.
 Usage::
    python -m training.bc.pretrain \\
        --demos training/bc/demos.npz \\
        --out training/runs/bc
 """
 from __future__ import annotations
 import argparse
 import time
 from pathlib import Path
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader, TensorDataset
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env import DummyVecEnv
 from training.herding_env import HerdingEnv
 def build_model(net_arch_pi, net_arch_vf, log_std_init: float,
                frame_stack: int = 1, drive_mode: str = "differential"):
    """Build a fresh SB3 PPO solely as a vehicle for the policy weights.
    PPO's training-loop plumbing isn't used during BC. ``frame_stack``
    must match the demo file so the env's obs space agrees with the
    recorded obs shape.
    """
    env = DummyVecEnv([lambda: HerdingEnv(frame_stack=frame_stack,
                                          drive_mode=drive_mode)])
    model = PPO(
        "MlpPolicy", env,
        policy_kwargs=dict(
            net_arch=dict(pi=net_arch_pi, vf=net_arch_vf),
            log_std_init=log_std_init,
        ),
        verbose=0,
    )
    return model, env
 def policy_forward_mean(policy, obs_batch):
    """Return the deterministic mean action for an obs batch.
    SB3's ActorCriticPolicy routes ``forward`` through a Distribution
    wrapper; we replicate the underlying chain
    ``extract_features → mlp_extractor → action_net``.
    """
    features = policy.extract_features(obs_batch)
    pi_features = features[0] if isinstance(features, tuple) else features
    latent_pi, _ = policy.mlp_extractor(pi_features)
    return policy.action_net(latent_pi)
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--demos", default="training/bc/demos.npz")
    parser.add_argument("--out", default="training/runs/bc")
    parser.add_argument("--epochs", type=int, default=60)
    parser.add_argument("--batch-size", type=int, default=256)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--val-split", type=float, default=0.1)
    parser.add_argument("--net-arch", default="256,256",
                        help="Comma-separated hidden layer widths.")
    parser.add_argument("--log-std-init", type=float, default=0.5)
    parser.add_argument("--cos-weight", type=float, default=1.0,
                        help="Weight of the (1 - cosine_similarity) loss "
                             "term; balances against MSE.")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--device", default="cpu")
    parser.add_argument("--drive-mode", default=None,
                        choices=["differential", "mecanum"],
                        help="Drive mode. If not set, inferred from "
                             "demo action dimension (2→differential, 3→mecanum).")
    args = parser.parse_args()
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    # --- Load demos ---
    print(f"[bc] loading demos from {args.demos}")
    data = np.load(args.demos)
    obs = data["obs"].astype(np.float32)
    actions = data["actions"].astype(np.float32)
    meta = data["meta"]
    print(f"[bc] obs={obs.shape}  actions={actions.shape}  trajectories={len(meta)}")
    if obs.size == 0:
        raise RuntimeError("Empty demo file.")
    a_norms = np.linalg.norm(actions, axis=1)
    print(f"[bc] action L2 norm: mean={a_norms.mean():.3f}  "
          f"min={a_norms.min():.3f}  max={a_norms.max():.3f}")
    # --- Train/val split ---
    n = len(obs)
    perm = np.random.permutation(n)
    n_val = int(n * args.val_split)
    val_idx, train_idx = perm[:n_val], perm[n_val:]
    print(f"[bc] train={len(train_idx)}  val={len(val_idx)}")
    obs_t = torch.from_numpy(obs)
    act_t = torch.from_numpy(actions)
    train_loader = DataLoader(
        TensorDataset(obs_t[train_idx], act_t[train_idx]),
        batch_size=args.batch_size, shuffle=True,
    )
    val_loader = DataLoader(
        TensorDataset(obs_t[val_idx], act_t[val_idx]),
        batch_size=args.batch_size, shuffle=False,
    )
    net_arch_pi = [int(x) for x in args.net_arch.split(",")]
    net_arch_vf = net_arch_pi[:]
    # Frame stack is inferred from the demo obs dim.
    obs_dim = obs.shape[1]
    from herding.perception.obs import OBS_DIM as _SINGLE
    if obs_dim % _SINGLE != 0:
        raise RuntimeError(f"demo obs dim {obs_dim} is not a multiple of {_SINGLE}")
    frame_stack = obs_dim // _SINGLE
    if frame_stack > 1:
        print(f"[bc] inferred frame_stack={frame_stack} from demo obs dim {obs_dim}")
    # Infer drive mode from action dimension if not explicitly set.
    action_dim = actions.shape[1]
    if args.drive_mode is not None:
        drive_mode = args.drive_mode
    elif action_dim == 3:
        drive_mode = "mecanum"
    else:
        drive_mode = "differential"
    print(f"[bc] drive_mode={drive_mode} (action_dim={action_dim})")
    model, _env = build_model(net_arch_pi, net_arch_vf, args.log_std_init,
                              frame_stack=frame_stack, drive_mode=drive_mode)
    policy = model.policy.to(args.device)
    optimizer = optim.Adam(policy.parameters(), lr=args.lr)
    # --- Train ---
    print(f"[bc] training: epochs={args.epochs}  batch={args.batch_size}  "
          f"lr={args.lr}  device={args.device}")
    t_start = time.time()
    best_val = float("inf")
    best_cos = -1.0
    best_state = None  # restored at the end so noisy last epochs don't win
    def combined_loss(pred, target):
        mse = nn.functional.mse_loss(pred, target)
        p_norm = pred.norm(dim=1).clamp_min(1e-6)
        t_norm = target.norm(dim=1).clamp_min(1e-6)
        cos_sim = (pred * target).sum(dim=1) / (p_norm * t_norm)
        cos_loss = (1.0 - cos_sim).mean()
        return mse + args.cos_weight * cos_loss, mse.item(), cos_sim.mean().item()
    for epoch in range(args.epochs):
        policy.train()
        train_loss_total, train_mse_total, train_cos_total, train_count = 0.0, 0.0, 0.0, 0
        for ob_batch, act_batch in train_loader:
            ob_batch = ob_batch.to(args.device)
            act_batch = act_batch.to(args.device)
            optimizer.zero_grad()
            mean_action = policy_forward_mean(policy, ob_batch)
            loss, mse_val, cos_val = combined_loss(mean_action, act_batch)
            loss.backward()
            optimizer.step()
            bs = ob_batch.size(0)
            train_loss_total += loss.item() * bs
            train_mse_total += mse_val * bs
            train_cos_total += cos_val * bs
            train_count += bs
        train_mse = train_mse_total / max(1, train_count)
        train_cos = train_cos_total / max(1, train_count)
        policy.eval()
        val_total, val_count = 0.0, 0
        cos_sim_total = 0.0
        with torch.no_grad():
            for ob_batch, act_batch in val_loader:
                ob_batch = ob_batch.to(args.device)
                act_batch = act_batch.to(args.device)
                mean_action = policy_forward_mean(policy, ob_batch)
                bs = ob_batch.size(0)
                val_total += nn.functional.mse_loss(
                    mean_action, act_batch, reduction="sum",
                ).item()
                m_norm = mean_action.norm(dim=1).clamp_min(1e-6)
                a_norm = act_batch.norm(dim=1).clamp_min(1e-6)
                cos = (mean_action * act_batch).sum(dim=1) / (m_norm * a_norm)
                cos_sim_total += cos.sum().item()
                val_count += bs
        val_mse = val_total / max(1, val_count) / actions.shape[1]
        cos_sim = cos_sim_total / max(1, val_count)
        print(f"  epoch {epoch+1:>2d}/{args.epochs}  "
              f"train_mse={train_mse:.4f}  train_cos={train_cos:+.3f}  "
              f"val_mse={val_mse:.4f}  val_cos={cos_sim:+.3f}")
        if val_mse < best_val:
            best_val = val_mse
        if cos_sim > best_cos:
            best_cos = cos_sim
            best_state = {k: v.detach().cpu().clone()
                          for k, v in policy.state_dict().items()}
    if best_state is not None:
        policy.load_state_dict(best_state)
        print(f"[bc] restored best-val_cos snapshot (cos={best_cos:.3f})")
    elapsed = time.time() - t_start
    print(f"[bc] done in {elapsed:.0f}s  best_val_mse={best_val:.4f}")
    # --- Save ---
    out_dir = Path(args.out)
    out_dir.mkdir(parents=True, exist_ok=True)
    model.save(out_dir / "policy.zip")
    print(f"[bc] saved policy to {out_dir / 'policy.zip'}")
    print(f"\n[bc] verify with:  "
          f"python -m training.eval --policy {out_dir}")
 if __name__ == "__main__":
    main()
@@ -1,14 +0,0 @@
 {
    "W_PER_SHEEP": 2.0,
    "W_ALIGN": 0.05,
    "W_PEN_BONUS": 10.0,
    "W_COMPLETE": 100.0,
    "W_STEP_COST": 0.02,
    "W_COMPACT": 0.0,
    "W_WALL_TOUCH": 0.0,
    "WALL_TOUCH_BUFFER": 0.4,
    "ALIGN_SHAPE": "standoff",
    "ALIGN_GATED": true,
    "ENTRY_AWARE": true,
    "ent_coef": 0.02
 }
@@ -0,0 +1,175 @@
 """Env-side evaluation of analytic or learned policies.
 Reports success rate, mean steps and mean penned per flock size for
 ``n_sheep ∈ 1..max_flock`` across ``--n-seeds`` seeds each.
 Usage::
    python -m training.eval --policy training/runs/rl --n-seeds 10
    python -m training.eval --policy strombom
 """
 from __future__ import annotations
 import argparse
 import os
 from pathlib import Path
 from statistics import mean
 import numpy as np
 # Early CLI pre-parse for --world so geometry is configured before
 # other herding.* modules are imported.
 _pre_argv = [a for a in os.sys.argv[1:]]
 _pre_world = None
 for i, a in enumerate(_pre_argv):
    if a == "--world" and i + 1 < len(_pre_argv):
        _pre_world = _pre_argv[i + 1]
        break
    if a.startswith("--world="):
        _pre_world = a.split("=", 1)[1]
        break
 if _pre_world is not None:
    from herding.world.geometry import configure as _geo_configure
    _geo_configure(_pre_world)
    os.environ["HERDING_WORLD"] = _pre_world
 from herding.world.geometry import MAX_SHEEP, PEN_ENTRY
 from herding.control.sequential import compute_action as sequential_action
 from herding.control.strombom import compute_action as strombom_action
 from training.herding_env import HerdingEnv
 def rollout(env: HerdingEnv, predict_fn, max_steps: int) -> dict:
    obs, _ = env.reset()
    for t in range(max_steps):
        action = predict_fn(env, obs)
        obs, _r, terminated, truncated, info = env.step(action)
        if terminated or truncated:
            return {
                "success": bool(info.get("is_success", False)),
                "steps": info.get("steps", t + 1),
                "n_penned": info.get("n_penned", 0),
            }
    return {"success": False, "steps": max_steps,
            "n_penned": int(env.sheep_penned.sum())}
 def make_analytic_predictor(action_fn, drive_mode: str = "differential"):
    """Wrap an analytic teacher so it runs on the env's exposed
    perception (tracker in LiDAR mode, GT in privileged mode)."""
    def _predict(env, _obs):
        positions = env.perceived_positions()
        vx, vy, _mode = action_fn((env.dog_x, env.dog_y), positions, PEN_ENTRY)
        if drive_mode == "mecanum":
            return np.array([vx, vy, 0.0], dtype=np.float32)
        return np.array([vx, vy], dtype=np.float32)
    return _predict
 def make_strombom_predictor(drive_mode: str = "differential"):
    return make_analytic_predictor(strombom_action, drive_mode)
 def make_policy_predictor(model, vecnorm):
    def _predict(_env, obs):
        obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
        if vecnorm is not None:
            obs_b = vecnorm.normalize_obs(obs_b)
        action, _ = model.predict(obs_b, deterministic=True)
        return action[0]
    return _predict
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--policy", required=True,
                        help="'strombom', 'sequential', or path to a "
                             "policy directory / zip.")
    parser.add_argument("--n-seeds", type=int, default=10)
    parser.add_argument("--max-steps", type=int, default=5000)
    parser.add_argument("--max-flock", type=int, default=MAX_SHEEP)
    parser.add_argument("--difficulty", type=float, default=1.0,
                        help="0 = sheep spawn near the gate (easy); "
                             "1 = full field (deployment distribution).")
    parser.add_argument("--drive-mode", default="differential",
                        choices=["differential", "mecanum"],
                        help="Drive mode for the dog robot.")
    parser.add_argument("--world", default=None,
                        choices=["field", "field_round"],
                        help="World shape. If not set, uses HERDING_WORLD "
                             "env var or defaults to 'field'.")
    args = parser.parse_args()
    drive_mode = args.drive_mode
    frame_stack = 1
    if args.policy == "strombom":
        predict = make_analytic_predictor(strombom_action, drive_mode)
    elif args.policy == "sequential":
        predict = make_analytic_predictor(sequential_action, drive_mode)
    else:
        from stable_baselines3 import PPO
        run = Path(args.policy)
        if run.is_file():
            zip_path = run
        else:
            for name in ("policy.zip", "final.zip"):
                if (run / name).exists():
                    zip_path = run / name
                    break
            else:
                raise FileNotFoundError(
                    f"No checkpoint found in {run} "
                    f"(tried policy.zip, final.zip)"
                )
        model = PPO.load(str(zip_path), device="auto")
        from herding.perception.obs import OBS_DIM as _SINGLE
        policy_obs_dim = int(model.observation_space.shape[0])
        if policy_obs_dim % _SINGLE == 0 and policy_obs_dim // _SINGLE >= 1:
            frame_stack = policy_obs_dim // _SINGLE
            if frame_stack > 1:
                print(f"[eval] policy expects frame_stack={frame_stack}")
        vecnorm = None
        vn_path = run / "vecnormalize.pkl"
        if not vn_path.exists() and run.parent.name != "best":
            vn_path = run.parent / "vecnormalize.pkl"
        if vn_path.exists():
            import pickle
            with open(vn_path, "rb") as f:
                vecnorm = pickle.load(f)
            vecnorm.training = False
            vecnorm.norm_reward = False
        predict = make_policy_predictor(model, vecnorm)
    # Infer drive_mode from policy action dim if using a learned policy.
    if args.policy not in ("strombom", "sequential"):
        policy_action_dim = int(model.action_space.shape[0])
        if policy_action_dim == 2 and drive_mode == "mecanum":
            drive_mode = "differential"
            print(f"[eval] policy has 2D actions — overriding drive_mode "
                  f"to differential")
        elif policy_action_dim == 3 and drive_mode == "differential":
            drive_mode = "mecanum"
            print(f"[eval] policy has 3D actions — overriding drive_mode "
                  f"to mecanum")
    print(f"{'n_sheep':>8} {'success%':>10} {'mean_steps':>12} {'mean_penned':>12}")
    print("-" * 46)
    for n in range(1, args.max_flock + 1):
        successes, steps, penned = [], [], []
        for seed in range(args.n_seeds):
            env = HerdingEnv(n_sheep=n, max_steps=args.max_steps,
                             difficulty=args.difficulty, seed=seed,
                             frame_stack=frame_stack, drive_mode=drive_mode)
            r = rollout(env, predict, args.max_steps)
            successes.append(int(r["success"]))
            steps.append(r["steps"])
            penned.append(r["n_penned"])
        sr = 100.0 * mean(successes)
        ms = mean(steps)
        mp = mean(penned)
        print(f"{n:>8d} {sr:>9.1f}% {ms:>12.0f} {mp:>12.2f}")
 if __name__ == "__main__":
    main()
@@ -1,318 +0,0 @@
 """
 Parity test: verify 2D training env matches Webots controller implementations.
 Tests:
 1. Observation building: HerdingEnv._obs() vs shepherd_dog_rl.build_obs()
 2. Dog drive: HerdingEnv._step_dog_substep() vs shepherd_dog_rl.drive() math
 3. Sheep drive: HerdingEnv._sheep_drive() vs sheep.py drive() math
 """
 import sys
 import os
 import math
 import numpy as np
 # Make imports work from project root
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "controllers", "shepherd_dog_rl"))
 from herding_env import HerdingEnv
 # Re-implement the Webots functions standalone (no Webots dependency)
 FIELD = 15.0
 PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
 PEN_ENTRY  = np.array([11.5,  -8.0], dtype=np.float32)
 PEN_X = (10.0, 13.0)
 PEN_Y = (-15.0, -8.0)
 ENTRY_AWARE = True
 def webots_build_obs(dog_pos, sheep_positions, n_sheep, dog_heading):
    """Standalone version of shepherd_dog_rl.py build_obs()."""
    D = 2 * FIELD
    active_pos = np.array(
        [p for p in sheep_positions
         if not (PEN_X[0] < p[0] < PEN_X[1] and PEN_Y[0] < p[1] < PEN_Y[1])],
        dtype=np.float32
    )
    n_active = len(active_pos)
    if n_active > 0:
        com = active_pos.mean(axis=0)
        d_from_com = np.linalg.norm(active_pos - com, axis=1)
        sorted_idx = np.argsort(d_from_com)[::-1]
        radius = float(d_from_com[sorted_idx[0]])
        def nth(n):
            return active_pos[sorted_idx[n]] if len(sorted_idx) > n else com
        far1, far2, far3 = nth(0), nth(1), nth(2)
    else:
        com = PEN_CENTER.copy()
        radius = 0.0
        far1 = far2 = far3 = PEN_CENTER.copy()
    frac_active = n_active / max(n_sheep, 1)
    pen_ref = PEN_ENTRY if ENTRY_AWARE else PEN_CENTER
    return np.array([
        dog_pos[0] / FIELD, dog_pos[1] / FIELD,
        (com[0] - dog_pos[0]) / D, (com[1] - dog_pos[1]) / D,
        (far1[0] - com[0]) / D, (far1[1] - com[1]) / D,
        (far2[0] - com[0]) / D, (far2[1] - com[1]) / D,
        (far3[0] - com[0]) / D, (far3[1] - com[1]) / D,
        (pen_ref[0] - com[0]) / D, (pen_ref[1] - com[1]) / D,
        (pen_ref[0] - far1[0]) / D, (pen_ref[1] - far1[1]) / D,
        radius / D,
        frac_active,
        math.cos(dog_heading), math.sin(dog_heading),
    ], dtype=np.float32)
 def webots_dog_drive(heading, speed_ms, wheel_r=0.038, k_turn=4.0,
                     motor_max=70.0, axle_track=0.28):
    """Standalone version of shepherd_dog_rl.py drive() kinematics.
    Returns (v_linear, omega, left_w, right_w).
    """
    err = math.atan2(math.sin(heading), math.cos(heading))
    fwd_ms = speed_ms * max(0.0, math.cos(err))
    fwd_rad = fwd_ms / wheel_r
    turn = k_turn * err
    l = max(-motor_max, min(motor_max, fwd_rad - turn))
    r = max(-motor_max, min(motor_max, fwd_rad + turn))
    v = wheel_r * 0.5 * (r + l)
    w = (wheel_r / axle_track) * (r - l)
    return v, w, l, r
 def webots_sheep_drive(heading, speed_rad, wheel_r=0.031, k_turn=4.0,
                       motor_max=22.0, axle_track=0.20):
    """Standalone version of sheep.py drive() kinematics."""
    err = math.atan2(math.sin(heading), math.cos(heading))
    fwd = speed_rad * max(0.0, math.cos(err))
    k = 4.0
    l = max(-motor_max, min(motor_max, fwd - k * err))
    r = max(-motor_max, min(motor_max, fwd + k * err))
    v = wheel_r * 0.5 * (r + l)
    w = (wheel_r / axle_track) * (r - l)
    return v, w, l, r
 def test_obs_parity():
    """Test that build_obs matches between 2D env and Webots controller."""
    print("=== Test 1: Observation Parity ===")
    env = HerdingEnv(n_sheep=3)
    # Set ENTRY_AWARE to match our webots constant
    env.ENTRY_AWARE = ENTRY_AWARE
    env.reset(seed=42)
    # Manually set positions for a controlled test
    env.dog_pos = np.array([5.0, 3.0], dtype=np.float32)
    env.dog_heading = 1.2
    env.sheep_pos[0] = np.array([0.0, 0.0], dtype=np.float32)
    env.sheep_pos[1] = np.array([2.0, -1.0], dtype=np.float32)
    env.sheep_pos[2] = np.array([11.5, -11.5], dtype=np.float32)  # penned
    env.penned[0] = False
    env.penned[1] = False
    env.penned[2] = True
    obs_2d = env._obs()
    # Build equivalent Webots observation
    sheep_positions = [
        env.sheep_pos[0].tolist(),
        env.sheep_pos[1].tolist(),
        env.sheep_pos[2].tolist(),
    ]
    obs_webots = webots_build_obs(
        env.dog_pos, sheep_positions, 3, env.dog_heading
    )
    max_diff = float(np.max(np.abs(obs_2d - obs_webots)))
    print(f"  Max element-wise diff: {max_diff:.2e}")
    if max_diff < 1e-6:
        print("  PASS: Observations match")
    else:
        print("  FAIL: Observations differ!")
        for i in range(18):
            if abs(obs_2d[i] - obs_webots[i]) > 1e-6:
                print(f"    dim {i}: 2d={obs_2d[i]:.6f}  webots={obs_webots[i]:.6f}")
    return max_diff < 1e-6
 def test_dog_drive_parity():
    """Test that dog diff-drive matches Webots controller."""
    print("\n=== Test 2: Dog Drive Parity ===")
    env = HerdingEnv(n_sheep=1)
    env.reset(seed=42)
    all_pass = True
    test_cases = [
        # (heading_error, speed_ms) — target_heading relative to current heading
        (0.0, 2.5),      # aligned, full speed
        (0.5, 2.5),      # 30deg error
        (1.5, 2.5),      # ~86deg error
        (3.14, 2.5),     # ~180deg error — should spin in place
        (0.0, 0.5),      # aligned, slow
        (0.3, 1.0),      # small error, medium speed
    ]
    for heading_err, speed_ms in test_cases:
        env.dog_heading = 0.0
        target_heading = heading_err
        action = np.array([
            math.cos(target_heading), math.sin(target_heading)
        ], dtype=np.float32) * (speed_ms / env.DOG_SPEED)
        # 2D env step
        dbg = env._step_dog_substep(action, 0.016)
        v_2d = dbg["v"]
        w_2d = dbg["w"]
        l_2d = dbg["left_w"]
        r_2d = dbg["right_w"]
        # Webots equivalent
        v_w, w_w, l_w, r_w = webots_dog_drive(heading_err, speed_ms)
        diffs = {
            "v": abs(v_2d - v_w),
            "w": abs(w_2d - w_w),
            "left": abs(l_2d - l_w),
            "right": abs(r_2d - r_w),
        }
        max_diff = max(diffs.values())
        ok = max_diff < 1e-6
        status = "PASS" if ok else "FAIL"
        print(f"  err={heading_err:.2f} spd={speed_ms:.1f}: {status} (max_diff={max_diff:.2e})")
        if not ok:
            for k, d in diffs.items():
                if d > 1e-6:
                    print(f"    {k}: 2d={eval(k+'_2d'):.6f} webots={eval(k+'_w'):.6f}")
            all_pass = False
    return all_pass
 def test_sheep_drive_parity():
    """Test that sheep diff-drive matches Webots sheep controller."""
    print("\n=== Test 3: Sheep Drive Parity ===")
    env = HerdingEnv(n_sheep=1)
    env.reset(seed=42)
    all_pass = True
    test_cases = [
        # (heading_error, speed_rad)
        (0.0, 20.0),     # aligned, flee speed
        (0.0, 3.0),      # aligned, wander speed
        (0.5, 15.0),     # moderate error
        (1.57, 10.0),    # 90deg — should spin in place
        (3.14, 20.0),    # 180deg — should spin in place fast
        (0.2, 8.0),      # small error, medium speed
    ]
    for heading_err, speed_rad in test_cases:
        env.sheep_heading[0] = 0.0
        env.sheep_pos[0] = np.array([0.0, 0.0], dtype=np.float32)
        target_heading = heading_err
        # 2D env
        new_pos = env._sheep_drive(0, target_heading, speed_rad, 0.016)
        v_2d_raw = float(np.linalg.norm(new_pos - np.array([0.0, 0.0]))) / 0.016
        # Re-derive v, w from the internal state
        heading_2d = env.sheep_heading[0]
        # Webots equivalent
        v_w, w_w, l_w, r_w = webots_sheep_drive(heading_err, speed_rad)
        # For 2D, compute the same intermediate values
        err_2d = (target_heading - 0.0 + np.pi) % (2 * np.pi) - np.pi
        fwd_2d = speed_rad * max(0.0, math.cos(err_2d))
        turn_2d = 4.0 * err_2d
        l_2d = max(-22.0, min(22.0, fwd_2d - turn_2d))
        r_2d = max(-22.0, min(22.0, fwd_2d + turn_2d))
        diffs = {
            "left": abs(l_2d - l_w),
            "right": abs(r_2d - r_w),
        }
        max_diff = max(diffs.values())
        ok = max_diff < 1e-6
        status = "PASS" if ok else "FAIL"
        print(f"  err={heading_err:.2f} spd={speed_rad:.1f}: {status} (max_diff={max_diff:.2e})")
        if not ok:
            for k, d in diffs.items():
                if d > 1e-6:
                    print(f"    {k}: 2d={l_2d if k=='left' else r_2d:.6f} webots={l_w if k=='left' else r_w:.6f}")
            all_pass = False
    return all_pass
 def test_full_trajectory_parity():
    """Test that running identical actions produces matching trajectories."""
    print("\n=== Test 4: Full Trajectory Parity (dog only) ===")
    # Run 50 steps with a fixed action, compare dog heading/position
    # at each step between 2D env kinematics and pure Webots kinematics.
    env = HerdingEnv(n_sheep=1)
    env.reset(seed=42)
    env.dog_pos = np.array([0.0, 0.0], dtype=np.float32)
    env.dog_heading = 0.0
    env.ENTRY_AWARE = ENTRY_AWARE
    action = np.array([0.8, -0.6], dtype=np.float32)  # magnitude 1.0
    dt = 0.016667  # sub_dt
    # Webots-side tracking
    wb_heading = 0.0
    wb_x, wb_y = 0.0, 0.0
    max_heading_diff = 0.0
    max_pos_diff = 0.0
    for step in range(50):
        # 2D env sub-step
        env._step_dog_substep(action, dt)
        # Webots-side computation
        speed_ms = 1.0 * 2.5
        target_heading = math.atan2(-0.6, 0.8)
        err = math.atan2(math.sin(target_heading - wb_heading),
                         math.cos(target_heading - wb_heading))
        fwd_ms = speed_ms * max(0.0, math.cos(err))
        fwd_rad = fwd_ms / 0.038
        turn = 4.0 * err
        l = max(-70.0, min(70.0, fwd_rad - turn))
        r = max(-70.0, min(70.0, fwd_rad + turn))
        v = 0.038 * 0.5 * (r + l)
        w = (0.038 / 0.28) * (r - l)
        wb_heading = math.atan2(math.sin(wb_heading + w * dt),
                                math.cos(wb_heading + w * dt))
        wb_x += math.cos(wb_heading) * v * dt
        wb_y += math.sin(wb_heading) * v * dt
        heading_diff = abs(env.dog_heading - wb_heading)
        pos_diff = math.hypot(env.dog_pos[0] - wb_x, env.dog_pos[1] - wb_y)
        max_heading_diff = max(max_heading_diff, heading_diff)
        max_pos_diff = max(max_pos_diff, pos_diff)
    print(f"  Max heading diff over 50 steps: {max_heading_diff:.2e} rad")
    print(f"  Max position diff over 50 steps: {max_pos_diff:.2e} m")
    ok = max_pos_diff < 1e-4
    print(f"  {'PASS' if ok else 'FAIL'}: Trajectories match")
    return ok
 if __name__ == "__main__":
    results = []
    results.append(("Obs parity", test_obs_parity()))
    results.append(("Dog drive parity", test_dog_drive_parity()))
    results.append(("Sheep drive parity", test_sheep_drive_parity()))
    results.append(("Trajectory parity", test_full_trajectory_parity()))
    print("\n" + "=" * 50)
    print("RESULTS")
    print("=" * 50)
    all_pass = True
    for name, passed in results:
        print(f"  {name}: {'PASS' if passed else 'FAIL'}")
        if not passed:
            all_pass = False
    print(f"\nOverall: {'ALL PASS' if all_pass else 'SOME FAILURES'}")
    env.close()
@@ -1,6 +1,9 @@
-gymnasium>=0.29
+# Pin major versions; SB3 2.x requires gymnasium and torch >= 1.13.
-stable-baselines3>=2.3
+gymnasium>=0.29,<2.0
-torch>=2.2
+stable-baselines3[extra]>=2.3,<3.0
-numpy>=1.26
+torch>=2.1
-matplotlib>=3.8
+numpy>=1.24
-tensorboard>=2.16
+pyyaml>=6.0
 tensorboard>=2.14
 tqdm>=4.66
 pytest>=8.0
@@ -0,0 +1,403 @@
 """KL-regularised PPO fine-tune of a behaviour-cloned policy.
 The trainable policy is initialised from ``runs/bc/policy.zip``. A
 frozen copy of the same weights becomes the reference; each PPO loss
 gets an extra ``β · KL(π ‖ π_ref)`` term so the policy can only move
 within a trust region around BC. ``log_std`` is fixed small to keep
 exploration tight.
 Output: ``runs/rl/policy.zip`` — same SB3 format as the BC checkpoint,
 loadable by ``HERDING_MODE=rl`` in the dog controller.
 Usage::
    python -m training.rl.train \\
        --bc training/runs/bc \\
        --out training/runs/rl \\
        --total-timesteps 2000000
 """
 from __future__ import annotations
 import argparse
 import os
 from pathlib import Path
 # Early CLI pre-parse for --world so geometry is configured before any
 # herding.* / training.* import binds geometry constants. Matches the
 # pattern used by training.bc.collect and training.eval.
 _pre_argv = [a for a in os.sys.argv[1:]]
 _pre_world = None
 for i, a in enumerate(_pre_argv):
    if a == "--world" and i + 1 < len(_pre_argv):
        _pre_world = _pre_argv[i + 1]
        break
    if a.startswith("--world="):
        _pre_world = a.split("=", 1)[1]
        break
 if _pre_world is not None:
    from herding.world.geometry import configure as _geo_configure
    _geo_configure(_pre_world)
    os.environ["HERDING_WORLD"] = _pre_world
 import numpy as np
 import torch as th
 import torch.nn.functional as F
 from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
 from herding.perception.obs import OBS_DIM
 from training.herding_env import HerdingEnv
 # --------------------------------------------------------------------
 # Env factory
 # --------------------------------------------------------------------
 def _make_env(rank: int, seed: int, frame_stack: int,
              drive_mode: str = "differential",
              difficulty: float = 1.0,
              max_n_sheep: int = 10):
    def _thunk():
        env = HerdingEnv(seed=seed + rank, frame_stack=frame_stack,
                         drive_mode=drive_mode, difficulty=difficulty,
                         max_n_sheep=max_n_sheep)
        env = Monitor(env, info_keywords=("is_success", "n_sheep", "n_penned"))
        return env
    return _thunk
 # --------------------------------------------------------------------
 # KL-regularised PPO
 # --------------------------------------------------------------------
 class KLPPO(PPO):
    """PPO with an extra KL-to-reference penalty in the policy loss.
    Overrides only ``train()``; rollout buffer, clipped surrogate, value
    loss and entropy bonus are unchanged from stock SB3 PPO.
    """
    def __init__(self, *args, ref_policy=None, kl_coef: float = 0.05, **kwargs):
        super().__init__(*args, **kwargs)
        self.ref_policy = ref_policy
        if self.ref_policy is not None:
            self.ref_policy.set_training_mode(False)
            for p in self.ref_policy.parameters():
                p.requires_grad = False
        self.kl_coef = kl_coef
    def train(self) -> None:
        # Stock SB3 PPO.train() structure with the KL-to-reference term
        # added inside the inner minibatch loop.
        self.policy.set_training_mode(True)
        self._update_learning_rate(self.policy.optimizer)
        clip_range = self.clip_range(self._current_progress_remaining)
        if self.clip_range_vf is not None:
            clip_range_vf = self.clip_range_vf(self._current_progress_remaining)
        entropy_losses, pg_losses, value_losses, kl_losses = [], [], [], []
        clip_fractions = []
        continue_training = True
        for epoch in range(self.n_epochs):
            approx_kl_divs = []
            for rollout_data in self.rollout_buffer.get(self.batch_size):
                actions = rollout_data.actions
                if isinstance(self.action_space, th.distributions.Categorical.__bases__):
                    actions = rollout_data.actions.long().flatten()
                values, log_prob, entropy = self.policy.evaluate_actions(
                    rollout_data.observations, actions)
                values = values.flatten()
                advantages = rollout_data.advantages
                if self.normalize_advantage and len(advantages) > 1:
                    advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
                ratio = th.exp(log_prob - rollout_data.old_log_prob)
                policy_loss_1 = advantages * ratio
                policy_loss_2 = advantages * th.clamp(ratio, 1 - clip_range, 1 + clip_range)
                policy_loss = -th.min(policy_loss_1, policy_loss_2).mean()
                pg_losses.append(policy_loss.item())
                clip_fraction = th.mean((th.abs(ratio - 1) > clip_range).float()).item()
                clip_fractions.append(clip_fraction)
                if self.clip_range_vf is None:
                    values_pred = values
                else:
                    values_pred = rollout_data.old_values + th.clamp(
                        values - rollout_data.old_values, -clip_range_vf, clip_range_vf)
                value_loss = F.mse_loss(rollout_data.returns, values_pred)
                value_losses.append(value_loss.item())
                if entropy is None:
                    entropy_loss = -th.mean(-log_prob)
                else:
                    entropy_loss = -th.mean(entropy)
                entropy_losses.append(entropy_loss.item())
                # KL-to-reference: closed-form KL between two diagonal
                # Gaussians, summed over the action axis, mean over batch.
                if self.ref_policy is None:
                    raise RuntimeError("KLPPO.train called without ref_policy")
                with th.no_grad():
                    ref_dist = self.ref_policy.get_distribution(rollout_data.observations)
                pi_dist = self.policy.get_distribution(rollout_data.observations)
                kl_div = th.distributions.kl.kl_divergence(
                    pi_dist.distribution, ref_dist.distribution).sum(dim=-1).mean()
                kl_losses.append(kl_div.item())
                loss = (policy_loss
                        + self.ent_coef * entropy_loss
                        + self.vf_coef * value_loss
                        + self.kl_coef * kl_div)
                with th.no_grad():
                    log_ratio = log_prob - rollout_data.old_log_prob
                    approx_kl_div = th.mean((th.exp(log_ratio) - 1) - log_ratio).cpu().numpy()
                    approx_kl_divs.append(approx_kl_div)
                if self.target_kl is not None and approx_kl_div > 1.5 * self.target_kl:
                    continue_training = False
                    if self.verbose >= 1:
                        print(f"Early stopping at step {epoch} due to reaching max kl: {approx_kl_div:.2f}")
                    break
                self.policy.optimizer.zero_grad()
                loss.backward()
                th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
                self.policy.optimizer.step()
            self._n_updates += 1
            if not continue_training:
                break
        explained_var = self._explained_variance()
        self.logger.record("train/entropy_loss", float(np.mean(entropy_losses)))
        self.logger.record("train/policy_gradient_loss", float(np.mean(pg_losses)))
        self.logger.record("train/value_loss", float(np.mean(value_losses)))
        self.logger.record("train/kl_to_reference", float(np.mean(kl_losses)))
        self.logger.record("train/approx_kl", float(np.mean(approx_kl_divs)))
        self.logger.record("train/clip_fraction", float(np.mean(clip_fractions)))
        self.logger.record("train/explained_variance", float(explained_var))
        if hasattr(self.policy, "log_std"):
            self.logger.record("train/std", th.exp(self.policy.log_std).mean().item())
    def _explained_variance(self) -> float:
        y_pred = self.rollout_buffer.values.flatten()
        y_true = self.rollout_buffer.returns.flatten()
        var_y = np.var(y_true)
        return float("nan") if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y
 # --------------------------------------------------------------------
 # Main
 # --------------------------------------------------------------------
 def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--bc", default="training/runs/bc",
                        help="Directory containing the BC initialisation.")
    parser.add_argument("--out", default="training/runs/rl",
                        help="Where to save the fine-tuned policy.")
    parser.add_argument("--total-timesteps", type=int, default=2_000_000)
    parser.add_argument("--n-envs", type=int, default=8)
    parser.add_argument("--learning-rate", type=float, default=5e-5)
    parser.add_argument("--kl-coef", type=float, default=0.05,
                        help="Coefficient of the KL-to-reference penalty.")
    parser.add_argument("--log-std", type=float, default=-1.5,
                        help="Initial (and frozen) log_std for exploration.")
    parser.add_argument("--freeze-log-std", action="store_true", default=True)
    parser.add_argument("--n-steps", type=int, default=2048)
    parser.add_argument("--batch-size", type=int, default=256)
    parser.add_argument("--n-epochs", type=int, default=10)
    parser.add_argument("--gamma", type=float, default=0.995)
    parser.add_argument("--gae-lambda", type=float, default=0.95)
    parser.add_argument("--clip-range", type=float, default=0.1)
    parser.add_argument("--ent-coef", type=float, default=0.0)
    parser.add_argument("--target-kl", type=float, default=0.02,
                        help="SB3 per-batch KL early-stop guard.")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--device", default="cpu")
    parser.add_argument("--drive-mode", default=None,
                        choices=["differential", "mecanum"],
                        help="Drive mode. If not set, inferred from "
                             "BC action dimension (2→differential, 3→mecanum).")
    parser.add_argument("--imitate-weight", type=float, default=None,
                        help="Override env.W_IMITATE (e.g. 0.0 to drop "
                             "Strömbom imitation during fine-tune).")
    parser.add_argument("--time-weight", type=float, default=None,
                        help="Override env.W_TIME (e.g. -0.1 for a "
                             "per-step time penalty).")
    parser.add_argument("--difficulty", type=float, default=1.0,
                        help="HerdingEnv difficulty for PPO rollouts. "
                             "Must match eval (1.0) to avoid train/eval "
                             "distribution mismatch.")
    parser.add_argument("--max-n-sheep", type=int, default=10,
                        help="Upper bound on flock size sampled each reset.")
    parser.add_argument("--world", default=None,
                        choices=["field", "field_round"],
                        help="World shape. If not set, uses HERDING_WORLD "
                             "env var or defaults to 'field'.")
    args = parser.parse_args()
    # --world was already honoured in the early pre-parse above; here we
    # just sanity-check that the final argparse view agrees.
    if args.world is not None:
        from herding.world.geometry import FIELD_SHAPE as _CURRENT_SHAPE
        if args.world != _CURRENT_SHAPE:
            print(f"[rl] WARNING: --world={args.world} but geometry is "
                  f"'{_CURRENT_SHAPE}'. File a bug.")
    bc_zip = Path(args.bc) / "policy.zip"
    if not bc_zip.exists():
        raise SystemExit(
            f"BC checkpoint not found at {bc_zip}. Train bc first with "
            f"`python -m training.bc.pretrain`."
        )
    out = Path(args.out)
    out.mkdir(parents=True, exist_ok=True)
    (out / "checkpoints").mkdir(exist_ok=True)
    (out / "best").mkdir(exist_ok=True)
    # Infer frame_stack from the BC checkpoint's obs space.
    ref_only = PPO.load(str(bc_zip), device=args.device)
    obs_dim = int(ref_only.observation_space.shape[0])
    if obs_dim % OBS_DIM != 0:
        raise SystemExit(f"BC obs dim {obs_dim} is not a multiple of {OBS_DIM}.")
    frame_stack = obs_dim // OBS_DIM
    print(f"[rl] BC obs dim {obs_dim} → frame_stack={frame_stack}")
    # Infer drive mode from BC action dim if not explicitly set.
    bc_action_dim = int(ref_only.action_space.shape[0])
    if args.drive_mode is not None:
        drive_mode = args.drive_mode
    elif bc_action_dim == 3:
        drive_mode = "mecanum"
    else:
        drive_mode = "differential"
    print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
    env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
                         difficulty=args.difficulty,
                         max_n_sheep=args.max_n_sheep)
               for i in range(args.n_envs)]
    venv = SubprocVecEnv(env_fns) if args.n_envs > 1 else DummyVecEnv(env_fns)
    eval_venv = DummyVecEnv([_make_env(99, args.seed + 999, frame_stack,
                                       drive_mode,
                                       difficulty=args.difficulty,
                                       max_n_sheep=args.max_n_sheep)])
    print(f"[rl] difficulty={args.difficulty} max_n_sheep={args.max_n_sheep}")
    # Reward-shaping overrides (broadcast to every env instance).
    def _broadcast(method: str, value):
        for v in (venv, eval_venv):
            try:
                v.env_method(method, value)
            except AttributeError:
                v.venv.env_method(method, value)
    if args.imitate_weight is not None:
        _broadcast("set_imitate_weight", args.imitate_weight)
        print(f"[rl] W_IMITATE overridden to {args.imitate_weight}")
    if args.time_weight is not None:
        _broadcast("set_time_weight", args.time_weight)
        print(f"[rl] W_TIME overridden to {args.time_weight}")
    # Build a fresh KLPPO at the right obs/action shape, then copy BC
    # weights into both the trainable policy and the frozen reference.
    model = KLPPO(
        "MlpPolicy", venv,
        ref_policy=None,  # filled in below
        kl_coef=args.kl_coef,
        learning_rate=args.learning_rate,
        n_steps=args.n_steps,
        batch_size=args.batch_size,
        n_epochs=args.n_epochs,
        gamma=args.gamma,
        gae_lambda=args.gae_lambda,
        clip_range=args.clip_range,
        ent_coef=args.ent_coef,
        target_kl=args.target_kl,
        policy_kwargs=dict(
            net_arch=dict(pi=[512, 512], vf=[512, 512]),
            log_std_init=args.log_std,
        ),
        verbose=1,
        seed=args.seed,
        device=args.device,
        tensorboard_log=str(out / "tb"),
    )
    # strict=False — the BC value head wasn't trained; PPO trains it.
    bc_state = ref_only.policy.state_dict()
    missing, unexpected = model.policy.load_state_dict(bc_state, strict=False)
    print(f"[rl] BC → policy: missing={len(missing)} unexpected={len(unexpected)}")
    ref_policy = type(model.policy)(
        observation_space=model.observation_space,
        action_space=model.action_space,
        lr_schedule=lambda _: 0.0,
        net_arch=dict(pi=[512, 512], vf=[512, 512]),
        log_std_init=args.log_std,
    ).to(args.device)
    ref_policy.load_state_dict(bc_state, strict=False)
    model.ref_policy = ref_policy
    model.ref_policy.set_training_mode(False)
    for p in model.ref_policy.parameters():
        p.requires_grad = False
    # Force both policies to the same log_std so the KL term measures
    # mean drift only, not a std mismatch carried over from BC.
    with th.no_grad():
        model.policy.log_std.fill_(args.log_std)
        model.ref_policy.log_std.fill_(args.log_std)
    if args.freeze_log_std:
        model.policy.log_std.requires_grad = False
        print(f"[rl] log_std frozen at {args.log_std} (σ ≈ {np.exp(args.log_std):.3f})")
    ckpt_cb = CheckpointCallback(
        save_freq=max(1, 50_000 // args.n_envs),
        save_path=str(out / "checkpoints"),
        name_prefix="ppo",
    )
    # EvalCallback writes <save_path>/best_model.zip on every new best
    # eval reward. We send it straight to ``out/`` and rename to
    # ``policy.zip`` after training so the deployed file lives at the
    # canonical path.
    eval_cb = EvalCallback(
        eval_venv,
        best_model_save_path=str(out),
        log_path=str(out / "evals"),
        eval_freq=max(1, 20_000 // args.n_envs),
        n_eval_episodes=5,
        deterministic=True,
    )
    print(f"[rl] training: total_timesteps={args.total_timesteps} "
          f"n_envs={args.n_envs} lr={args.learning_rate} kl_coef={args.kl_coef}")
    model.learn(total_timesteps=args.total_timesteps,
                callback=[ckpt_cb, eval_cb], progress_bar=True)
    # Save the end-of-training state for debugging convergence behaviour.
    model.save(out / "final.zip")
    # Promote the EvalCallback's best-by-eval-reward snapshot to the
    # canonical ``policy.zip`` (what the controller loads). Fall back
    # to the final state if eval never recorded a "best".
    import shutil
    best_zip = out / "best_model.zip"
    policy_zip = out / "policy.zip"
    if best_zip.exists():
        if policy_zip.exists():
            policy_zip.unlink()
        best_zip.rename(policy_zip)
        print(f"[rl] best snapshot → {policy_zip}  (final state kept at {out/'final.zip'})")
    else:
        shutil.copy(out / "final.zip", policy_zip)
        print(f"[rl] no best snapshot recorded; using final → {policy_zip}")
 if __name__ == "__main__":
    main()
@@ -1 +0,0 @@
@@ -1,392 +0,0 @@
 """
 PPO training for the herding task with curriculum learning.
 Trains from scratch through a 1→max_sheep curriculum, evaluates after each
 stage, and auto-generates trajectory/timeseries plots plus a summary chart.
 Usage
 -----
    python train.py                                       # defaults from config.json
    python train.py --config my_config.json --max-sheep 5
    python train.py --max-sheep 3 --steps-per-stage 1000000
 Outputs (in runs/<timestamp>/):
    config.json          resolved config
    final_model.zip      trained PPO model
    vecnorm.pkl          VecNormalize statistics
    stage_results.json   per-stage evaluation metrics
    success_rate.png     summary bar chart
    eval/                trajectory & timeseries plots per sheep count
 """
 import argparse
 import json
 import os
 import time
 from copy import deepcopy
 import numpy as np
 from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import BaseCallback
 from stable_baselines3.common.vec_env import (
    DummyVecEnv,
    SubprocVecEnv,
    VecNormalize,
 )
 from herding_env import HerdingEnv
 from viz import (
    run_and_record,
    plot_trajectory,
    plot_timeseries,
    plot_success_rate,
    save_episode_gif,
 )
 # ── Callbacks ────────────────────────────────────────────────────────────────
 class ProgressCallback(BaseCallback):
    """One-line progress summary every `freq` env steps."""
    def __init__(self, stage_label: str, freq: int = 100_000):
        super().__init__()
        self.stage_label = stage_label
        self.freq = freq
        self._last = 0
        self._ep_returns = []
        self._ep_success = []
        self._total_eps = 0
        self._total_success = 0
        self._cur_ret = None
    def _on_step(self) -> bool:
        rewards = self.locals.get("rewards")
        dones = self.locals.get("dones")
        infos = self.locals.get("infos", [])
        if rewards is None or dones is None:
            return True
        if self._cur_ret is None or len(self._cur_ret) != len(rewards):
            self._cur_ret = np.zeros(len(rewards), dtype=np.float64)
        self._cur_ret += np.asarray(rewards, dtype=np.float64)
        for i, d in enumerate(dones):
            if not d:
                continue
            self._ep_returns.append(float(self._cur_ret[i]))
            info = infos[i] if i < len(infos) else {}
            success = int(info.get("n_penned", 0) == info.get("n_sheep", -1))
            self._ep_success.append(success)
            self._total_eps += 1
            self._total_success += success
            self._cur_ret[i] = 0.0
            if len(self._ep_returns) > 50:
                self._ep_returns.pop(0)
                self._ep_success.pop(0)
        if self.num_timesteps - self._last >= self.freq:
            self._last = self.num_timesteps
            n = len(self._ep_returns)
            mean_r = float(np.mean(self._ep_returns)) if n else float("nan")
            win_sr = float(np.mean(self._ep_success)) if n else float("nan")
            cum_sr = (self._total_success / self._total_eps
                      if self._total_eps else float("nan"))
            print(f"           ... [{self.stage_label} | "
                  f"{self.num_timesteps:>7,} steps | "
                  f"ret(last {n})={mean_r:+.2f}  "
                  f"win_sr={win_sr*100:.0f}%  cum_sr={cum_sr*100:.0f}%]",
                  flush=True)
        return True
 # ── Environment factory ──────────────────────────────────────────────────────
 def make_env(n_sheep, seed, max_steps, reward_cfg=None):
    def _init():
        env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
                         reward_cfg=reward_cfg)
        env.reset(seed=seed)
        return env
    return _init
 # ── Failure-mode classification ──────────────────────────────────────────────
 COMPACT_RADIUS = 5.0
 def _classify(ep_radii, ep_com_dists, n_penned, n_sheep):
    if n_penned == n_sheep:
        return "SUCCESS"
    if min(ep_radii) > COMPACT_RADIUS:
        return "NEVER_COMPACT"
    first = next(i for i, r in enumerate(ep_radii) if r <= COMPACT_RADIUS)
    if min(ep_com_dists[first:]) > 3.0:
        return "COMPACT_CANT_DRIVE"
    if n_penned == 0:
        return "DROVE_NO_SHEEP"
    return f"PARTIAL_{n_penned}of{n_sheep}"
 # ── Evaluation ───────────────────────────────────────────────────────────────
 def evaluate(model, vn_template, n_sheep, n_episodes, max_steps,
             reward_cfg=None):
    """Evaluate at a given sheep count; returns metrics dict."""
    raw = DummyVecEnv([make_env(n_sheep, 9999, max_steps, reward_cfg)])
    vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
    vn.obs_rms = deepcopy(vn_template.obs_rms)
    vn.ret_rms = deepcopy(vn_template.ret_rms)
    successes = 0
    ep_lens = []
    min_pen_list = []
    action_mags = []
    failure_counts = {}
    rc_sums = {}
    rc_n = 0
    for _ in range(n_episodes):
        obs = vn.reset()
        done = False
        steps = 0
        min_pen = float("inf")
        mags = []
        ep_radii = []
        ep_com_dists = []
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, _, dones, infos = vn.step(action)
            done = dones[0]
            inner = vn.envs[0]
            com, radius, _ = inner._flock_stats()
            min_pen = min(min_pen, float(np.linalg.norm(com - inner.PEN_CENTER)))
            mags.append(float(np.linalg.norm(action[0])))
            ep_radii.append(radius)
            ep_com_dists.append(float(np.linalg.norm(com - inner.PEN_CENTER)))
            steps += 1
            rc = infos[0].get("rcomps")
            if rc:
                for k, v in rc.items():
                    rc_sums[k] = rc_sums.get(k, 0.0) + v
                rc_n += 1
        n_penned = infos[0].get("n_penned", 0)
        success = n_penned == n_sheep
        successes += int(success)
        ep_lens.append(steps)
        min_pen_list.append(min_pen)
        action_mags.extend(mags)
        mode = _classify(ep_radii, ep_com_dists, n_penned, n_sheep)
        failure_counts[mode] = failure_counts.get(mode, 0) + 1
    vn.close()
    result = {
        "sr": successes / n_episodes,
        "mean_len": float(np.mean(ep_lens)),
        "mean_min_pen": float(np.mean(min_pen_list)),
        "mean_act": float(np.mean(action_mags)) if action_mags else 0.0,
        "failure_modes": failure_counts,
    }
    if rc_n > 0:
        result["reward_per_step"] = {k: v / rc_n for k, v in rc_sums.items()}
    return result
 # ── CLI ──────────────────────────────────────────────────────────────────────
 DEFAULT_CONFIG = {
    "W_PER_SHEEP": 2.0,
    "W_ALIGN": 0.05,
    "W_PEN_BONUS": 10.0,
    "W_COMPLETE": 100.0,
    "W_STEP_COST": 0.02,
    "W_SOUTH": 0.01,
    "W_COMPACT": 0.0,
    "W_WALL_TOUCH": 0.04,
    "WALL_TOUCH_BUFFER": 0.3,
    "ALIGN_SHAPE": "standoff",
    "ALIGN_GATED": True,
    "ENTRY_AWARE": True,
    "ent_coef": 0.02,
 }
 def parse_args():
    p = argparse.ArgumentParser(
        description="PPO training for herding task with curriculum learning")
    p.add_argument("--config", type=str, default=None,
                   help="JSON config file (reward weights + ent_coef)")
    p.add_argument("--max-sheep", type=int, default=10)
    p.add_argument("--steps-per-stage", type=int, default=1_500_000)
    p.add_argument("--n-envs", type=int, default=8)
    p.add_argument("--max-steps", type=int, default=2500)
    p.add_argument("--eval-episodes", type=int, default=30)
    p.add_argument("--run-dir", type=str, default=None)
    p.add_argument("--no-gif", action="store_true",
                   help="Skip per-stage GIF rendering (PNGs still produced).")
    p.add_argument("--gif-fps", type=int, default=20)
    p.add_argument("--gif-skip", type=int, default=3,
                   help="Keep every Nth frame (smaller GIF; default 3).")
    return p.parse_args()
 # ── Main ─────────────────────────────────────────────────────────────────────
 def main():
    args = parse_args()
    # Load config: --config overrides, else auto-load config.json if present
    cfg = dict(DEFAULT_CONFIG)
    config_path = args.config
    if config_path is None and os.path.exists("config.json"):
        config_path = "config.json"
    if config_path:
        with open(config_path) as f:
            cfg.update(json.load(f))
        print(f"Config loaded from {config_path}")
    rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
    # Run directory
    run_dir = args.run_dir or os.path.join(
        "runs", time.strftime("%Y%m%d_%H%M%S"))
    eval_dir = os.path.join(run_dir, "eval")
    os.makedirs(eval_dir, exist_ok=True)
    with open(os.path.join(run_dir, "config.json"), "w") as f:
        json.dump(cfg, f, indent=2)
    print(f"Config: {cfg}")
    print(f"Run dir: {run_dir}")
    print(f"Curriculum: 1 → {args.max_sheep} sheep, "
          f"{args.steps_per_stage:,} steps/stage\n")
    # Training envs
    train_env = SubprocVecEnv([
        make_env(1, seed=i, max_steps=args.max_steps, reward_cfg=rcfg)
        for i in range(args.n_envs)
    ])
    vn = VecNormalize(train_env, norm_obs=True, norm_reward=True,
                      clip_obs=10.0)
    # Model — force CPU (PPO with MLP runs faster on CPU than GPU; SB3 warns
    # about this otherwise).
    model = PPO(
        "MlpPolicy", vn,
        learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
        gamma=0.995, gae_lambda=0.95, clip_range=0.2,
        ent_coef=cfg.get("ent_coef", 0.02), vf_coef=0.5, max_grad_norm=0.5,
        policy_kwargs=dict(net_arch=[256, 256]),
        device="cpu",
        verbose=0,
    )
    # Curriculum training
    stage_results = []
    t0 = time.time()
    try:
        for n in range(1, args.max_sheep + 1):
            if n == 1:
                print(f"\n[Stage n_sheep=1] training {args.steps_per_stage:,} steps")
                model.learn(
                    total_timesteps=args.steps_per_stage,
                    reset_num_timesteps=True,
                    callback=ProgressCallback("1 sheep", freq=100_000),
                )
            else:
                # Mixed transition: half envs stay at n-1, half advance to n,
                # for the first half of the stage budget. This prevents the
                # n+1 task's noisy early gradients from destroying the n policy
                # (catastrophic forgetting) before it has a chance to adapt.
                half = max(1, args.n_envs // 2)
                for i in range(half):
                    vn.env_method("set_n_sheep", n - 1, indices=[i])
                for i in range(half, args.n_envs):
                    vn.env_method("set_n_sheep", n, indices=[i])
                mix_steps  = args.steps_per_stage // 2
                full_steps = args.steps_per_stage - mix_steps
                print(f"\n[Stage n_sheep={n}] mixed ({n-1}/{n} sheep) "
                      f"{mix_steps:,} steps")
                model.learn(
                    total_timesteps=mix_steps,
                    reset_num_timesteps=False,
                    callback=ProgressCallback(f"{n-1}→{n} mix", freq=100_000),
                )
                vn.env_method("set_n_sheep", n)
                print(f"[Stage n_sheep={n}] full ({n} sheep) {full_steps:,} steps")
                model.learn(
                    total_timesteps=full_steps,
                    reset_num_timesteps=False,
                    callback=ProgressCallback(f"{n} sheep", freq=100_000),
                )
            # Evaluate
            print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
            r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
            print(f"[Stage n_sheep={n}] sr={r['sr']*100:.0f}%  "
                  f"mean_len={r['mean_len']:.0f}  "
                  f"mean_min_pen={r['mean_min_pen']:.1f}m  "
                  f"mean_act={r['mean_act']:.2f}")
            # Failure-mode breakdown
            if r["failure_modes"]:
                modes = "  ".join(
                    f"{k}={v}" for k, v in sorted(
                        r["failure_modes"].items(), key=lambda x: -x[1]))
                print(f"  failure modes: {modes}")
            # Reward breakdown
            if "reward_per_step" in r:
                rps = r["reward_per_step"]
                print(f"  reward/step: " + "  ".join(
                    f"{k}={v:+.4f}" for k, v in rps.items()))
            # Episode visualisation: trajectory + timeseries + animated GIF
            hist = run_and_record(model, vn, n, args.max_steps, rcfg,
                                  seed=1000 + n)
            tag = "success" if hist["success"] else "fail"
            plot_trajectory(
                hist,
                os.path.join(eval_dir, f"traj_{n}s_{tag}.png"))
            plot_timeseries(
                hist,
                os.path.join(eval_dir, f"ts_{n}s_{tag}.png"))
            if not args.no_gif:
                save_episode_gif(
                    hist,
                    os.path.join(eval_dir, f"ep_{n}s_{tag}.gif"),
                    fps=args.gif_fps, skip=args.gif_skip)
            r["n_sheep"] = n
            stage_results.append(r)
        # Save artefacts
        model.save(os.path.join(run_dir, "final_model"))
        vn.save(os.path.join(run_dir, "vecnorm.pkl"))
        with open(os.path.join(run_dir, "stage_results.json"), "w") as f:
            json.dump(stage_results, f, indent=2)
    finally:
        try:
            vn.close()
        except Exception:
            pass
    # Summary
    elapsed = (time.time() - t0) / 60
    print("\n" + "=" * 70)
    print("  TRAINING SUMMARY")
    print("=" * 70)
    for r in stage_results:
        print(f"  n_sheep={r['n_sheep']}  sr={r['sr']*100:>3.0f}%  "
              f"len={r['mean_len']:>5.0f}  min_pen={r['mean_min_pen']:>5.1f}m  "
              f"act={r['mean_act']:.2f}")
    print(f"\n  Total time: {elapsed:.1f} min")
    print(f"  Artefacts:  {run_dir}/")
    plot_success_rate(stage_results, os.path.join(run_dir, "success_rate.png"))
    print(f"  Plots:      {run_dir}/success_rate.png, {eval_dir}/")
 if __name__ == "__main__":
    main()
@@ -1,411 +0,0 @@
 """
 PPO training with attention-based policy (train_at.py).
 Key difference from train.py
 -----------------------------
 - Observation exposes ALL sheep as individual per-sheep tokens rather than
  only the top-3 farthest. The policy therefore has complete flock visibility
  at any sheep count — no hidden sheep even at n=10.
 - A TransformerFeaturesExtractor processes the sheep tokens with multi-head
  self-attention (permutation-invariant), then mean-pools over valid tokens
  and concatenates the result with global dog/pen features.
 - Curriculum transition uses the same mixed-env approach as train.py: half
  the envs stay at n-1 for the first half of each new stage to suppress
  catastrophic forgetting.
 Observation layout  (7 + MAX_SHEEP*6 = 67 dims, fixed)
 -------------------------------------------------------
  Global (7):
    dog_x / FIELD,  dog_y / FIELD,
    cos(heading),   sin(heading),
    (pen_x - dog_x) / D,  (pen_y - dog_y) / D,
    n_active / n_sheep
  Per sheep i  (6):
    (sheep_x - dog_x) / D,  (sheep_y - dog_y) / D,   ← pos rel to dog
    (pen_x   - sheep_x) / D, (pen_y  - sheep_y) / D,  ← sheep-to-pen
    is_active   1.0 if not penned, else 0.0
    is_valid    1.0 if i < n_sheep, else 0.0 (padding sentinel)
  After VecNormalize, is_valid for real sheep normalises > 0 and for
  padding tokens < 0 (because mean ∈ (0,1)), so a threshold of 0 cleanly
  separates real from padded without any extra bookkeeping.
 Usage
 -----
    python train_at.py                                 # defaults from config.json
    python train_at.py --max-sheep 10 --steps-per-stage 2000000
    python train_at.py --embed-dim 128 --n-heads 4 --n-layers 3
 """
 import argparse
 import json
 import os
 import time
 from copy import deepcopy
 import numpy as np
 import torch
 import torch.nn as nn
 from gymnasium import spaces
 from stable_baselines3 import PPO
 from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
 from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize
 from herding_env import HerdingEnv
 from train import ProgressCallback, _classify, COMPACT_RADIUS, DEFAULT_CONFIG
 from viz import (
    run_and_record, plot_trajectory, plot_timeseries,
    plot_success_rate, save_episode_gif,
 )
 # ── Per-sheep token observation environment ───────────────────────────────────
 class HerdingEnvAt(HerdingEnv):
    """
    HerdingEnv with a per-sheep token observation for the attention policy.
    Everything else (dynamics, reward, curriculum interface) is inherited.
    """
    OBS_GLOBAL = 7
    OBS_SHEEP  = 6
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        obs_dim = self.OBS_GLOBAL + self.MAX_SHEEP * self.OBS_SHEEP
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32
        )
    def _obs(self) -> np.ndarray:
        S = self.FIELD
        D = 2.0 * self.FIELD
        pen_ref     = self.PEN_ENTRY if self.ENTRY_AWARE else self.PEN_CENTER
        active_mask = ~self.penned[:self.n_sheep]
        n_active    = int(active_mask.sum())
        global_feats = np.array([
            self.dog_pos[0] / S,
            self.dog_pos[1] / S,
            float(np.cos(self.dog_heading)),
            float(np.sin(self.dog_heading)),
            (pen_ref[0] - self.dog_pos[0]) / D,
            (pen_ref[1] - self.dog_pos[1]) / D,
            n_active / max(self.n_sheep, 1),
        ], dtype=np.float32)
        sheep_feats = np.zeros((self.MAX_SHEEP, self.OBS_SHEEP), dtype=np.float32)
        for i in range(self.n_sheep):
            pos = self.sheep_pos[i]
            sheep_feats[i] = [
                (pos[0] - self.dog_pos[0]) / D,
                (pos[1] - self.dog_pos[1]) / D,
                (pen_ref[0] - pos[0]) / D,
                (pen_ref[1] - pos[1]) / D,
                float(not self.penned[i]),
                1.0,   # is_valid: this sheep exists
            ]
        # i >= n_sheep: all zeros, is_valid=0 → masked out in attention
        return np.concatenate([global_feats, sheep_feats.ravel()])
 # ── Attention features extractor ──────────────────────────────────────────────
 class ShepherdAttentionExtractor(BaseFeaturesExtractor):
    """
    Multi-head self-attention over per-sheep tokens, mean-pooled over valid
    (non-padding) tokens and concatenated with global dog/pen features.
    After VecNormalize:
      real sheep  → is_valid_norm > 0   (normalised from 1.0)
      padding     → is_valid_norm ≤ 0   (normalised from 0.0)
    so threshold at 0 is always correct regardless of curriculum stage.
    """
    GLOBAL_DIM = HerdingEnvAt.OBS_GLOBAL   # 7
    SHEEP_DIM  = HerdingEnvAt.OBS_SHEEP    # 6
    MAX_SHEEP  = HerdingEnv.MAX_SHEEP      # 10
    VALID_IDX  = 5                          # index of is_valid within each token
    def __init__(self, observation_space, embed_dim: int = 64,
                 n_heads: int = 4, n_layers: int = 2, ff_dim: int = 128):
        super().__init__(observation_space,
                         features_dim=self.GLOBAL_DIM + embed_dim)
        self.sheep_embed = nn.Linear(self.SHEEP_DIM, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim, nhead=n_heads, dim_feedforward=ff_dim,
            dropout=0.0, batch_first=True,
        )
        self.transformer = nn.TransformerEncoder(encoder_layer,
                                                 num_layers=n_layers)
    def forward(self, obs: torch.Tensor) -> torch.Tensor:
        B = obs.shape[0]
        global_feats = obs[:, :self.GLOBAL_DIM]                       # (B, 7)
        tokens = obs[:, self.GLOBAL_DIM:].view(
            B, self.MAX_SHEEP, self.SHEEP_DIM)                        # (B, 10, 6)
        # is_valid after VecNorm: real > 0, padding ≤ 0
        is_valid_norm    = tokens[:, :, self.VALID_IDX]               # (B, 10)
        key_padding_mask = is_valid_norm <= 0.0                       # True → ignore
        x = self.sheep_embed(tokens)                                  # (B, 10, E)
        x = self.transformer(x, src_key_padding_mask=key_padding_mask)
        valid_w = (is_valid_norm > 0.0).float().unsqueeze(-1)        # (B, 10, 1)
        pooled  = (x * valid_w).sum(1) / valid_w.sum(1).clamp(min=1.0)
        return torch.cat([global_feats, pooled], dim=1)               # (B, 7+E)
 # ── Environment factory ───────────────────────────────────────────────────────
 def make_env_at(n_sheep, seed, max_steps, reward_cfg=None):
    def _init():
        env = HerdingEnvAt(n_sheep=n_sheep, max_steps=max_steps,
                           reward_cfg=reward_cfg)
        env.reset(seed=seed)
        return env
    return _init
 # ── Evaluation ────────────────────────────────────────────────────────────────
 def evaluate_at(model, vn_template, n_sheep, n_episodes, max_steps,
                reward_cfg=None):
    raw = DummyVecEnv([make_env_at(n_sheep, 9999, max_steps, reward_cfg)])
    vn  = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
    vn.obs_rms = deepcopy(vn_template.obs_rms)
    vn.ret_rms = deepcopy(vn_template.ret_rms)
    successes = 0
    ep_lens, min_pen_list, action_mags = [], [], []
    failure_counts, rc_sums = {}, {}
    rc_n = 0
    for _ in range(n_episodes):
        obs  = vn.reset()
        done = False
        steps, min_pen = 0, float("inf")
        mags, ep_radii, ep_com_dists = [], [], []
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, _, dones, infos = vn.step(action)
            done  = dones[0]
            inner = vn.envs[0]
            com, radius, _ = inner._flock_stats()
            min_pen = min(min_pen,
                          float(np.linalg.norm(com - inner.PEN_CENTER)))
            mags.append(float(np.linalg.norm(action[0])))
            ep_radii.append(radius)
            ep_com_dists.append(float(np.linalg.norm(com - inner.PEN_CENTER)))
            steps += 1
            rc = infos[0].get("rcomps")
            if rc:
                for k, v in rc.items():
                    rc_sums[k] = rc_sums.get(k, 0.0) + v
                rc_n += 1
        n_penned = infos[0].get("n_penned", 0)
        successes += int(n_penned == n_sheep)
        ep_lens.append(steps)
        min_pen_list.append(min_pen)
        action_mags.extend(mags)
        mode = _classify(ep_radii, ep_com_dists, n_penned, n_sheep)
        failure_counts[mode] = failure_counts.get(mode, 0) + 1
    vn.close()
    result = {
        "sr":          successes / n_episodes,
        "mean_len":    float(np.mean(ep_lens)),
        "mean_min_pen": float(np.mean(min_pen_list)),
        "mean_act":    float(np.mean(action_mags)) if action_mags else 0.0,
        "failure_modes": failure_counts,
    }
    if rc_n > 0:
        result["reward_per_step"] = {k: v / rc_n for k, v in rc_sums.items()}
    return result
 # ── CLI ───────────────────────────────────────────────────────────────────────
 def parse_args():
    p = argparse.ArgumentParser(
        description="PPO + attention training for herding task")
    p.add_argument("--config",           type=str, default=None)
    p.add_argument("--max-sheep",        type=int, default=10)
    p.add_argument("--steps-per-stage",  type=int, default=1_500_000)
    p.add_argument("--n-envs",           type=int, default=8)
    p.add_argument("--max-steps",        type=int, default=2500)
    p.add_argument("--eval-episodes",    type=int, default=30)
    p.add_argument("--run-dir",          type=str, default=None)
    p.add_argument("--no-gif",           action="store_true")
    p.add_argument("--gif-fps",          type=int, default=20)
    p.add_argument("--gif-skip",         type=int, default=3)
    # Attention architecture
    p.add_argument("--embed-dim",        type=int, default=64,
                   help="Transformer embedding dimension (default 64)")
    p.add_argument("--n-heads",          type=int, default=4,
                   help="Number of attention heads (default 4)")
    p.add_argument("--n-layers",         type=int, default=2,
                   help="Number of transformer encoder layers (default 2)")
    p.add_argument("--ff-dim",           type=int, default=128,
                   help="Transformer feed-forward dim (default 128)")
    return p.parse_args()
 # ── Main ──────────────────────────────────────────────────────────────────────
 def main():
    args = parse_args()
    cfg = dict(DEFAULT_CONFIG)
    config_path = args.config
    if config_path is None and os.path.exists("config.json"):
        config_path = "config.json"
    if config_path:
        with open(config_path) as f:
            cfg.update(json.load(f))
        print(f"Config loaded from {config_path}")
    rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
    run_dir  = args.run_dir or os.path.join(
        "runs", "at_" + time.strftime("%Y%m%d_%H%M%S"))
    eval_dir = os.path.join(run_dir, "eval")
    os.makedirs(eval_dir, exist_ok=True)
    with open(os.path.join(run_dir, "config.json"), "w") as f:
        json.dump(cfg, f, indent=2)
    print(f"Config:      {cfg}")
    print(f"Run dir:     {run_dir}")
    print(f"Curriculum:  1 → {args.max_sheep} sheep, "
          f"{args.steps_per_stage:,} steps/stage")
    print(f"Transformer: embed={args.embed_dim}  heads={args.n_heads}  "
          f"layers={args.n_layers}  ff={args.ff_dim}\n")
    train_env = SubprocVecEnv([
        make_env_at(1, seed=i, max_steps=args.max_steps, reward_cfg=rcfg)
        for i in range(args.n_envs)
    ])
    vn = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.0)
    model = PPO(
        "MlpPolicy", vn,
        learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
        gamma=0.995, gae_lambda=0.95, clip_range=0.2,
        ent_coef=cfg.get("ent_coef", 0.02), vf_coef=0.5, max_grad_norm=0.5,
        policy_kwargs=dict(
            features_extractor_class=ShepherdAttentionExtractor,
            features_extractor_kwargs=dict(
                embed_dim=args.embed_dim,
                n_heads=args.n_heads,
                n_layers=args.n_layers,
                ff_dim=args.ff_dim,
            ),
            net_arch=[256, 256],
        ),
        device="cpu",
        verbose=0,
    )
    stage_results = []
    t0 = time.time()
    try:
        for n in range(1, args.max_sheep + 1):
            if n == 1:
                print(f"\n[Stage n_sheep=1] training {args.steps_per_stage:,} steps")
                model.learn(
                    total_timesteps=args.steps_per_stage,
                    reset_num_timesteps=True,
                    callback=ProgressCallback("1 sheep", freq=100_000),
                )
            else:
                half       = max(1, args.n_envs // 2)
                mix_steps  = args.steps_per_stage // 2
                full_steps = args.steps_per_stage - mix_steps
                for i in range(half):
                    vn.env_method("set_n_sheep", n - 1, indices=[i])
                for i in range(half, args.n_envs):
                    vn.env_method("set_n_sheep", n, indices=[i])
                print(f"\n[Stage n_sheep={n}] mixed ({n-1}/{n} sheep) "
                      f"{mix_steps:,} steps")
                model.learn(
                    total_timesteps=mix_steps,
                    reset_num_timesteps=False,
                    callback=ProgressCallback(f"{n-1}→{n} mix", freq=100_000),
                )
                vn.env_method("set_n_sheep", n)
                print(f"[Stage n_sheep={n}] full ({n} sheep) {full_steps:,} steps")
                model.learn(
                    total_timesteps=full_steps,
                    reset_num_timesteps=False,
                    callback=ProgressCallback(f"{n} sheep", freq=100_000),
                )
            print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
            r = evaluate_at(model, vn, n, args.eval_episodes,
                            args.max_steps, rcfg)
            print(f"[Stage n_sheep={n}] sr={r['sr']*100:.0f}%  "
                  f"mean_len={r['mean_len']:.0f}  "
                  f"mean_min_pen={r['mean_min_pen']:.1f}m  "
                  f"mean_act={r['mean_act']:.2f}")
            if r["failure_modes"]:
                modes = "  ".join(
                    f"{k}={v}" for k, v in sorted(
                        r["failure_modes"].items(), key=lambda x: -x[1]))
                print(f"  failure modes: {modes}")
            if "reward_per_step" in r:
                rps = r["reward_per_step"]
                print("  reward/step: " + "  ".join(
                    f"{k}={v:+.4f}" for k, v in rps.items()))
            hist = run_and_record(
                model, vn, n, args.max_steps, rcfg,
                seed=1000 + n, make_env_fn=make_env_at,
            )
            tag = "success" if hist["success"] else "fail"
            plot_trajectory(hist, os.path.join(eval_dir, f"traj_{n}s_{tag}.png"))
            plot_timeseries(hist, os.path.join(eval_dir, f"ts_{n}s_{tag}.png"))
            if not args.no_gif:
                save_episode_gif(
                    hist,
                    os.path.join(eval_dir, f"ep_{n}s_{tag}.gif"),
                    fps=args.gif_fps, skip=args.gif_skip)
            r["n_sheep"] = n
            stage_results.append(r)
        model.save(os.path.join(run_dir, "final_model"))
        vn.save(os.path.join(run_dir, "vecnorm.pkl"))
        with open(os.path.join(run_dir, "stage_results.json"), "w") as f:
            json.dump(stage_results, f, indent=2)
    finally:
        try:
            vn.close()
        except Exception:
            pass
    elapsed = (time.time() - t0) / 60
    print("\n" + "=" * 70)
    print("  TRAINING SUMMARY  (attention policy)")
    print("=" * 70)
    for r in stage_results:
        print(f"  n_sheep={r['n_sheep']}  sr={r['sr']*100:>3.0f}%  "
              f"len={r['mean_len']:>5.0f}  "
              f"min_pen={r['mean_min_pen']:>5.1f}m  "
              f"act={r['mean_act']:.2f}")
    print(f"\n  Total time: {elapsed:.1f} min")
    print(f"  Artefacts:  {run_dir}/")
    plot_success_rate(stage_results, os.path.join(run_dir, "success_rate.png"))
    print(f"  Plots:      {run_dir}/success_rate.png, {eval_dir}/")
 if __name__ == "__main__":
    main()
@@ -1,342 +0,0 @@
 """
 All visualization for the herding policy: trajectory plots, timeseries plots,
 success-rate bar chart, and animated GIFs.
 Used both by train.py (auto-rendered after each curriculum stage) and as a CLI
 to render a fresh episode against a saved model.
 CLI usage:
    python viz.py --run-dir runs/v1 --n-sheep 5
    python viz.py --run-dir runs/v1 --n-sheep 10 --no-gif
    python viz.py --model runs/v1/final_model.zip --vecnorm runs/v1/vecnorm.pkl \\
        --n-sheep 3 --out-dir vis_v1_3sheep
 """
 import argparse
 import os
 import json
 from copy import deepcopy
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 import matplotlib.animation as animation
 from matplotlib.collections import LineCollection
 import numpy as np
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
 from herding_env import HerdingEnv
 # ── Palette ──────────────────────────────────────────────────────────────────
 SHEEP_COLORS = [
    "#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
    "#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
 ]
 DOG_COLOR = "#4e342e"
 # ── Common drawing primitives ────────────────────────────────────────────────
 def draw_field(ax):
    ax.set_xlim(-16, 16)
    ax.set_ylim(-16, 16)
    ax.set_aspect("equal")
    ax.set_facecolor("#dcedc8")
    ax.add_patch(mpatches.Rectangle(
        (-15, -15), 30, 30, fill=False, edgecolor="#795548", lw=2))
    ax.add_patch(mpatches.Rectangle(
        (10, -15), 3, 7, facecolor="#ffe082", edgecolor="#795548", lw=2))
    ax.text(11.5, -11.5, "pen", ha="center", va="center",
            fontsize=8, color="#795548")
 def faded_path(ax, xs, ys, color, lw=1.5, label=None):
    n = len(xs)
    if n < 2:
        return
    points = np.array([xs, ys]).T.reshape(-1, 1, 2)
    segs = np.concatenate([points[:-1], points[1:]], axis=1)
    alphas = np.linspace(0.15, 1.0, len(segs))
    colors = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
    ax.add_collection(LineCollection(segs, colors=colors, linewidth=lw))
    if label:
        ax.plot([], [], color=color, lw=lw, label=label)
 # ── Episode rollout ──────────────────────────────────────────────────────────
 def make_eval_env(n_sheep, seed, max_steps, reward_cfg=None):
    def _init():
        env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
                         reward_cfg=reward_cfg)
        env.reset(seed=seed)
        return env
    return _init
 def run_and_record(model, vn_template, n_sheep, max_steps,
                   reward_cfg=None, seed=42, make_env_fn=None):
    """Run one deterministic episode and return full trajectory history."""
    _factory = make_env_fn or make_eval_env
    raw = DummyVecEnv([_factory(n_sheep, seed, max_steps, reward_cfg)])
    vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
    vn.obs_rms = deepcopy(vn_template.obs_rms)
    vn.ret_rms = deepcopy(vn_template.ret_rms)
    obs = vn.reset()
    inner = vn.envs[0]
    done = False
    dog_xs, dog_ys = [], []
    sheep_xs = [[] for _ in range(n_sheep)]
    sheep_ys = [[] for _ in range(n_sheep)]
    sheep_penned = [[] for _ in range(n_sheep)]
    radii = []
    pen_dists = [[] for _ in range(n_sheep)]
    action_mags = []
    rewards = []
    penned_at = [None] * n_sheep
    step = 0
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, dones, infos = vn.step(action)
        done = dones[0]
        step += 1
        dog_xs.append(float(inner.dog_pos[0]))
        dog_ys.append(float(inner.dog_pos[1]))
        com, radius, _ = inner._flock_stats()
        radii.append(radius)
        rewards.append(float(reward[0]))
        action_mags.append(float(np.linalg.norm(action[0])))
        for i in range(n_sheep):
            sheep_xs[i].append(float(inner.sheep_pos[i][0]))
            sheep_ys[i].append(float(inner.sheep_pos[i][1]))
            sheep_penned[i].append(bool(inner.penned[i]))
            pen_dists[i].append(
                float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
            if inner.penned[i] and penned_at[i] is None:
                penned_at[i] = step
    n_penned = infos[0].get("n_penned", 0)
    vn.close()
    return dict(
        dog_xs=dog_xs, dog_ys=dog_ys,
        sheep_xs=sheep_xs, sheep_ys=sheep_ys,
        sheep_penned=sheep_penned,
        radii=radii, pen_dists=pen_dists,
        action_mags=action_mags, rewards=rewards,
        penned_at=penned_at,
        n_penned=n_penned, n_sheep=n_sheep,
        success=n_penned == n_sheep, steps=step,
    )
 # ── Static plots ─────────────────────────────────────────────────────────────
 def plot_trajectory(hist, out_path):
    fig, ax = plt.subplots(figsize=(7, 7))
    draw_field(ax)
    for i in range(hist["n_sheep"]):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
        faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
        ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
        end = hist["penned_at"][i] if hist["penned_at"][i] is not None else -1
        ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
    faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0,
               label="dog")
    ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR,
            ms=10, zorder=5)
    ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR,
            ms=10, zorder=5)
    result = ("SUCCESS" if hist["success"]
              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
    ax.set_title(f"n={hist['n_sheep']}  {result}  {hist['steps']} steps",
                 fontsize=12)
    ax.legend(loc="upper left", fontsize=8)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_timeseries(hist, out_path):
    t = np.arange(hist["steps"])
    fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
    axes[0].plot(t, hist["radii"], color="steelblue")
    axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact (5m)")
    axes[0].set_ylabel("flock radius (m)")
    axes[0].legend(fontsize=8)
    axes[0].set_title("Flock radius")
    for i in range(hist["n_sheep"]):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1,
                     label=f"sheep {i+1}")
        if hist["penned_at"][i] is not None:
            axes[1].axvline(hist["penned_at"][i], color=c, ls=":", lw=1)
    axes[1].set_ylabel("dist to pen (m)")
    axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
    axes[1].set_title("Per-sheep distance to pen")
    axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
    axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
    axes[2].set_ylabel("action ||(vx,vy)||")
    axes[2].set_ylim(0, 1.5)
    axes[2].set_title("Dog action magnitude")
    axes[2].legend(fontsize=8)
    axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
    axes[3].axhline(0, color="black", lw=0.5)
    axes[3].set_ylabel("reward")
    axes[3].set_xlabel("step")
    axes[3].set_title("Reward per step")
    result = ("SUCCESS" if hist["success"]
              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
    fig.suptitle(f"n_sheep={hist['n_sheep']}  {result}  {hist['steps']} steps",
                 fontsize=13)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_success_rate(stage_results, out_path):
    fig, ax = plt.subplots(figsize=(8, 4))
    ns = [r["n_sheep"] for r in stage_results]
    srs = [r["sr"] * 100 for r in stage_results]
    bars = ax.bar(ns, srs, color="steelblue", edgecolor="white")
    ax.set_xlabel("Sheep count")
    ax.set_ylabel("Success rate (%)")
    ax.set_ylim(0, 105)
    ax.axhline(90, color="orange", ls="--", lw=1, label="90% target")
    for bar, sr in zip(bars, srs):
        ax.text(bar.get_x() + bar.get_width() / 2,
                bar.get_height() + 1, f"{sr:.0f}%",
                ha="center", fontsize=9)
    ax.legend()
    ax.set_title("Evaluation success rate per sheep count")
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 # ── Animated GIF ─────────────────────────────────────────────────────────────
 def save_episode_gif(hist, out_path, fps=20, skip=3):
    """Render hist as an animated GIF. `skip` keeps every Nth frame (smaller file)."""
    n_sheep = hist["n_sheep"]
    frames = list(range(0, hist["steps"], max(1, skip)))
    if frames[-1] != hist["steps"] - 1:
        frames.append(hist["steps"] - 1)
    fig, ax = plt.subplots(figsize=(6, 6))
    draw_field(ax)
    title = ax.text(0, 16.5, "", ha="center", fontsize=11)
    dog_marker, = ax.plot([], [], "s", color=DOG_COLOR, ms=12,
                          markeredgecolor="black", markeredgewidth=1.5,
                          zorder=5)
    sheep_markers = []
    for i in range(n_sheep):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        m, = ax.plot([], [], "o", color=c, ms=10,
                     markeredgecolor="#333", markeredgewidth=1, zorder=4)
        sheep_markers.append(m)
    dog_trail, = ax.plot([], [], color=DOG_COLOR, lw=1.0, alpha=0.5)
    def update(k):
        title.set_text(
            f"n={n_sheep}  step {k+1}/{hist['steps']}  "
            f"penned {sum(hist['sheep_penned'][i][k] for i in range(n_sheep))}/{n_sheep}")
        dog_marker.set_data([hist["dog_xs"][k]], [hist["dog_ys"][k]])
        dog_trail.set_data(hist["dog_xs"][:k+1], hist["dog_ys"][:k+1])
        for i, m in enumerate(sheep_markers):
            m.set_data([hist["sheep_xs"][i][k]], [hist["sheep_ys"][i][k]])
            penned = hist["sheep_penned"][i][k]
            m.set_color("deeppink" if penned else SHEEP_COLORS[i % len(SHEEP_COLORS)])
        return [title, dog_marker, dog_trail, *sheep_markers]
    anim = animation.FuncAnimation(
        fig, update, frames=frames, interval=1000 / fps, blit=False)
    anim.save(out_path, writer=animation.PillowWriter(fps=fps), dpi=80)
    plt.close(fig)
 # ── CLI ──────────────────────────────────────────────────────────────────────
 def _resolve_paths(args):
    if args.run_dir:
        model_path  = os.path.join(args.run_dir, "final_model.zip")
        vn_path     = os.path.join(args.run_dir, "vecnorm.pkl")
        cfg_path    = os.path.join(args.run_dir, "config.json")
    else:
        model_path  = args.model
        vn_path     = args.vecnorm
        cfg_path    = args.config
    return model_path, vn_path, cfg_path
 def main():
    p = argparse.ArgumentParser(
        description="Render trajectory + timeseries + GIF for a saved policy.")
    p.add_argument("--run-dir", type=str, default=None,
                   help="Run directory containing final_model.zip + vecnorm.pkl + config.json")
    p.add_argument("--model",   type=str, default=None)
    p.add_argument("--vecnorm", type=str, default=None)
    p.add_argument("--config",  type=str, default=None)
    p.add_argument("--n-sheep", type=int, default=3)
    p.add_argument("--seed",    type=int, default=42)
    p.add_argument("--max-steps", type=int, default=2500)
    p.add_argument("--out-dir", type=str, default=None)
    p.add_argument("--no-gif",  action="store_true",
                   help="Skip the animated GIF (PNG-only is faster).")
    p.add_argument("--gif-fps", type=int, default=20)
    p.add_argument("--gif-skip", type=int, default=3)
    args = p.parse_args()
    model_path, vn_path, cfg_path = _resolve_paths(args)
    if not (model_path and vn_path):
        p.error("either --run-dir or both --model and --vecnorm are required")
    rcfg = None
    if cfg_path and os.path.exists(cfg_path):
        with open(cfg_path) as f:
            cfg = json.load(f)
        rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
    out_dir = args.out_dir or os.path.join(
        os.path.dirname(os.path.abspath(model_path)),
        f"vis_{args.n_sheep}s")
    os.makedirs(out_dir, exist_ok=True)
    print(f"Loading model:   {model_path}")
    print(f"Loading vecnorm: {vn_path}")
    model = PPO.load(model_path, device="cpu")
    raw = DummyVecEnv([make_eval_env(args.n_sheep, args.seed, args.max_steps, rcfg)])
    vn = VecNormalize.load(vn_path, raw)
    print(f"Rolling out n_sheep={args.n_sheep} (seed={args.seed})...")
    hist = run_and_record(model, vn, args.n_sheep, args.max_steps,
                          reward_cfg=rcfg, seed=args.seed)
    result = "SUCCESS" if hist["success"] else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})"
    print(f"  {result} in {hist['steps']} steps")
    plot_trajectory(hist, os.path.join(out_dir, "trajectory.png"))
    plot_timeseries(hist, os.path.join(out_dir, "timeseries.png"))
    print(f"  saved trajectory.png + timeseries.png to {out_dir}/")
    if not args.no_gif:
        gif_path = os.path.join(out_dir, "episode.gif")
        print(f"  rendering GIF (fps={args.gif_fps}, skip={args.gif_skip})...")
        save_episode_gif(hist, gif_path, fps=args.gif_fps, skip=args.gif_skip)
        print(f"  saved {gif_path}")
 if __name__ == "__main__":
    main()
@@ -1,9 +0,0 @@
 Webots Project File version R2025a
 perspectives: 000000ff00000000fd00000002000000010000011c00000298fc0200000001fb0000001400540065007800740045006400690074006f00720100000000000002980000003f00ffffff000000030000084300000238fc0100000001fb0000001a0043006f006e0073006f006c00650041006c006c0041006c006c0100000000000008430000006900ffffff000007250000029800000001000000020000000100000008fc00000000
 simulationViewPerspectives: 000000ff000000010000000200000100000006250100000002010000000100
 sceneTreePerspectives: 000000ff00000001000000030000001f000000c0000000000100000002010000000200
 maximizedDockId: -1
 centralWidgetVisible: 1
 orthographicViewHeight: 1
 textFiles: -1
 consoles: Console:All:All
@@ -10,7 +10,7 @@ EXTERNPROTO "../protos/Sheep.proto"
 # World
 WorldInfo {
  info [
-    "RL-Based Autonomous Shepherd Robot"
+    "Autonomous Shepherd Robot (Strömbom)"
    "Group G25"
  ]
  title "Shepherd Herding"
@@ -106,19 +106,26 @@ Solid { translation -2.5 -15 0.84 children [ Shape { appearance USE CAP geometry
 Solid { translation 14 -15 0.40 children [ Shape { appearance USE STONE_A geometry Box { size 2.0 0.16 0.80 } } ] boundingObject Box { size 2.0 0.16 0.80 } }
 Solid { translation 14 -15 0.84 children [ Shape { appearance USE CAP geometry Box { size 2.1 0.26 0.07 } } ] boundingObject Box { size 2.1 0.26 0.07 } }
 # Gate posts
-Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
+Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
-Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
+Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
-# Outer gate (wooden, slightly ajar, Z-brace)
+# Outer gate — fully open, hinged on the west gate post. Modeled as a swung-back
-Solid { translation 11.5 -15.08 0.55 rotation 0 0 1 0.25 children [
+# wooden gate parallel to the south wall, on the west side, so the 3m corridor
 # between gate posts (x=10..13, y=-15) is unobstructed.
 Solid { translation 8.6 -15.05 0.55 rotation 0 0 1 0 children [
  Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
-  Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [ Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } } ] }
+  # FPOST appearance DEF lives here so the external pen below can USE it.
  Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [
    Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } }
  ] }
 ] boundingObject Box { size 2.80 0.08 1.00 } }
-# ==================== QUARANTINE PEN (wooden post-and-rail fence, inside field) ====================
+# ==================== EXTERNAL PEN (south of field, accessed through south-wall gate) ====================
-# Flow: main field → inner gate → quarantine area → outer gate → outside
+# Flow: main field → south-wall gate (x ∈ [10, 13], y = -15) → external pen
 # The pen is a wooden post-and-rail rectangle south of the field, x ∈ [10, 13],
 # y ∈ [-22, -15], open on the north side (the gate hole is the entrance).
-# West wall (x=10, ~7m along Y)
+# Pen west wall (x=10, y from -22 to -15, length 7m)
-Solid { translation 10 -11.46 0.55 children [
+Solid { translation 10 -18.5 0.55 children [
  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
@@ -130,8 +137,8 @@ Solid { translation 10 -11.46 0.55 children [
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
 ] boundingObject Box { size 0.14 6.92 1.10 } }
-# East wall (x=13)
+# Pen east wall (x=13, y from -22 to -15, length 7m)
-Solid { translation 13 -11.46 0.55 children [
+Solid { translation 13 -18.5 0.55 children [
  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
@@ -143,39 +150,50 @@ Solid { translation 13 -11.46 0.55 children [
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
 ] boundingObject Box { size 0.14 6.92 1.10 } }
-# North wall - open entrance (no wall, just corner posts)
+# Pen south wall (y=-22, x from 10 to 13, length 3m, closes the back of the pen)
-Solid { translation 10 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
+Solid { translation 11.5 -22 0.55 children [
-Solid { translation 13 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
+  Transform { translation -1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation  0   0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation  1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 2.92 0.14 0.04 } } ] }
 ] boundingObject Box { size 2.92 0.14 1.10 } }
 # Pen north corner posts at the gate opening (no wall — sheep enter here from the field)
 Solid { translation 10 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
 Solid { translation 13 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
 # Corner pillars
-Solid { translation  15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
+Solid { translation  15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
-Solid { translation  15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
+Solid { translation  15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
-Solid { translation -15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
+Solid { translation -15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
-Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
+Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
 # Mid-pillars every 5 m — East
-Solid { translation  15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation  15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation  15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation  15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation  15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # West
-Solid { translation -15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation -15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation -15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation -15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # North
-Solid { translation  10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation   5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation   5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation   0  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation   0  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation  -5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  -5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation -10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # South
-Solid { translation   5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation   5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation   0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation   0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation  -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
-Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
 # Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
@@ -503,28 +521,16 @@ ShepherdDog {
 }
 # ==================== SHEEP ====================
-Sheep {
+# Up to 10 sheep, scattered through the field's central/north zone. Comment
-  translation 3 2 0.5
+# out trailing slots to test smaller flock sizes; the dog policy is trained
-  name "sheep1"
+# to handle 1..10 sheep so any prefix works.
-  controller "sheep"
+Sheep { translation  3.0  2.0 0.5 name "sheep1"  controller "sheep" }
-}
+Sheep { translation  3.0 -2.0 0.5 name "sheep2"  controller "sheep" }
-Sheep {
+Sheep { translation  4.0  0.0 0.5 name "sheep3"  controller "sheep" }
-  translation 3 -2 0.5
+Sheep { translation -3.0  4.0 0.5 name "sheep4"  controller "sheep" }
-  name "sheep2"
+Sheep { translation -5.0 -2.0 0.5 name "sheep5"  controller "sheep" }
-  controller "sheep"
+Sheep { translation  6.0  5.0 0.5 name "sheep6"  controller "sheep" }
-}
+Sheep { translation -6.0  6.0 0.5 name "sheep7"  controller "sheep" }
-Sheep {
+Sheep { translation  0.0  8.0 0.5 name "sheep8"  controller "sheep" }
-  translation 4 0 0.5
+Sheep { translation -8.0  0.0 0.5 name "sheep9"  controller "sheep" }
-  name "sheep3"
+Sheep { translation  7.0 -4.0 0.5 name "sheep10" controller "sheep" }
  controller "sheep"
 }
 # Sheep {
 #   translation 3.5 1 0.5
 #   name "sheep4"
 #   controller "sheep"
 # }
 # Sheep {
 #   translation 3.5 -1 0.5
 #   name "sheep5"
 #   controller "sheep"
 # }
@@ -0,0 +1,537 @@
 #VRML_SIM R2025a utf8
 EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/backgrounds/protos/TexturedBackground.proto"
 EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/backgrounds/protos/TexturedBackgroundLight.proto"
 EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/floors/protos/UnevenTerrain.proto"
 EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/appearances/protos/Grass.proto"
 EXTERNPROTO "../protos/ShepherdDog.proto"
 EXTERNPROTO "../protos/Sheep.proto"
 # World
 WorldInfo {
  info [
    "Autonomous Shepherd Robot (Strömbom)"
    "Group G25"
  ]
  title "Shepherd Herding (Round)"
  ERP 0.62
  basicTimeStep 16
  contactProperties [
    ContactProperties {
      coulombFriction [
        12
      ]
      softCFM 1e-05
    }
  ]
 }
 # Viewpoint
 DEF VIEWPOINT Viewpoint {
  position 4.34 -100.99 41.73
  orientation 0.199 -0.190 -0.961 4.624
  fieldOfView 0.785
 }
 # Background
 Background {
  skyColor [0.55 0.75 0.95]
 }
 # Single sun (diagonal from SW)
 DirectionalLight {
  ambientIntensity 1
  direction -0.3 0.5 -0.85
  color 1 0.98 0.92
  intensity 2.5
  castShadows TRUE
 }
 # Grass terrain
 UnevenTerrain {
  rotation 0 0 1 -1.5708
  size 100 100 0.3
  xDimension 50
  yDimension 50
  appearance Grass {
    colorOverride 0.78 0.88 0.68
    textureTransform TextureTransform {
      scale 100 100
    }
  }
  perlinNOctaves 4
 }
 # ==================== APPEARANCES ====================
 Transform {
  children [
    Shape { appearance DEF STONE_A    PBRAppearance { baseColor 0.48 0.45 0.40 roughness 0.95 metalness 0 } }
    Shape { appearance DEF STONE_B    PBRAppearance { baseColor 0.36 0.33 0.29 roughness 0.95 metalness 0 } }
    Shape { appearance DEF STONE_C    PBRAppearance { baseColor 0.58 0.55 0.50 roughness 0.90 metalness 0 } }
    Shape { appearance DEF CAP        PBRAppearance { baseColor 0.54 0.51 0.46 roughness 0.80 metalness 0 } }
    Shape { appearance DEF BARN_RED   PBRAppearance { baseColor 0.62 0.18 0.12 roughness 0.80 metalness 0 } }
    Shape { appearance DEF BARN_ROOF  PBRAppearance { baseColor 0.28 0.20 0.13 roughness 0.72 metalness 0 } }
    Shape { appearance DEF WOOD       PBRAppearance { baseColor 0.48 0.32 0.16 roughness 0.90 metalness 0 } }
    Shape { appearance DEF TRUNK      PBRAppearance { baseColor 0.38 0.24 0.11 roughness 0.90 metalness 0 } }
    Shape { appearance DEF LEAF_A     PBRAppearance { baseColor 0.22 0.52 0.16 roughness 0.85 metalness 0 } }
    Shape { appearance DEF LEAF_B     PBRAppearance { baseColor 0.16 0.42 0.10 roughness 0.85 metalness 0 } }
    Shape { appearance DEF LEAF_C     PBRAppearance { baseColor 0.30 0.60 0.20 roughness 0.80 metalness 0 } }
    Shape { appearance DEF STRAW      PBRAppearance { baseColor 0.85 0.75 0.35 roughness 0.95 metalness 0 } }
    Shape { appearance DEF HAT        PBRAppearance { baseColor 0.50 0.35 0.18 roughness 0.85 metalness 0 } }
    Shape { appearance DEF SHIRT      PBRAppearance { baseColor 0.60 0.30 0.30 roughness 0.80 metalness 0 } }
    Shape { appearance DEF PANTS      PBRAppearance { baseColor 0.25 0.25 0.30 roughness 0.80 metalness 0 } }
    Shape { appearance DEF DOOR_MAT   PBRAppearance { baseColor 0.55 0.38 0.20 roughness 0.72 metalness 0 } }
    Shape { appearance DEF GLASS      PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } }
    Shape { appearance DEF HAY        PBRAppearance { baseColor 0.82 0.72 0.32 roughness 0.95 metalness 0 } }
  ]
 }
 DEF TRIM       PBRAppearance { baseColor 0.90 0.88 0.82 roughness 0.70 metalness 0 }
 # ==================== CIRCULAR STONE WALL (R=15 m) ====================
 Solid { translation 15.00 0.00 0.40 rotation 0 0 1 -1.5708 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 15.00 0.00 0.84 rotation 0 0 1 -1.5708 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation 14.10 5.13 0.40 rotation 0 0 1 -1.2217 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 14.10 5.13 0.84 rotation 0 0 1 -1.2217 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation 11.49 9.64 0.40 rotation 0 0 1 -0.8727 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 11.49 9.64 0.84 rotation 0 0 1 -0.8727 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation 7.50 12.99 0.40 rotation 0 0 1 -0.5236 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 7.50 12.99 0.84 rotation 0 0 1 -0.5236 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation 2.60 14.77 0.40 rotation 0 0 1 -0.1745 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 2.60 14.77 0.84 rotation 0 0 1 -0.1745 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -2.60 14.77 0.40 rotation 0 0 1 0.1745 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -2.60 14.77 0.84 rotation 0 0 1 0.1745 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -7.50 12.99 0.40 rotation 0 0 1 0.5236 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -7.50 12.99 0.84 rotation 0 0 1 0.5236 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -11.49 9.64 0.40 rotation 0 0 1 0.8727 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -11.49 9.64 0.84 rotation 0 0 1 0.8727 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -14.10 5.13 0.40 rotation 0 0 1 1.2217 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -14.10 5.13 0.84 rotation 0 0 1 1.2217 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -15.00 0.00 0.40 rotation 0 0 1 1.5708 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -15.00 0.00 0.84 rotation 0 0 1 1.5708 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -14.10 -5.13 0.40 rotation 0 0 1 1.9199 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -14.10 -5.13 0.84 rotation 0 0 1 1.9199 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -11.49 -9.64 0.40 rotation 0 0 1 2.2689 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -11.49 -9.64 0.84 rotation 0 0 1 2.2689 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -7.50 -12.99 0.40 rotation 0 0 1 2.6180 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation -7.50 -12.99 0.84 rotation 0 0 1 2.6180 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation -3.37 -14.62 0.40 rotation 0 0 1 2.9671 children [ Shape { appearance USE STONE_A geometry Box { size 3.65 0.16 0.80 } } ] boundingObject Box { size 3.65 0.16 0.80 } }
 Solid { translation -3.37 -14.62 0.84 rotation 0 0 1 2.9671 children [ Shape { appearance USE CAP geometry Box { size 3.7 0.26 0.07 } } ] boundingObject Box { size 3.7 0.26 0.07 } }
 Solid { translation 3.37 -14.62 0.40 rotation 0 0 1 3.3161 children [ Shape { appearance USE STONE_A geometry Box { size 3.65 0.16 0.80 } } ] boundingObject Box { size 3.65 0.16 0.80 } }
 Solid { translation 3.37 -14.62 0.84 rotation 0 0 1 3.3161 children [ Shape { appearance USE CAP geometry Box { size 3.7 0.26 0.07 } } ] boundingObject Box { size 3.7 0.26 0.07 } }
 Solid { translation 7.50 -12.99 0.40 rotation 0 0 1 3.6652 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 7.50 -12.99 0.84 rotation 0 0 1 3.6652 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation 11.49 -9.64 0.40 rotation 0 0 1 4.0143 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 11.49 -9.64 0.84 rotation 0 0 1 4.0143 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 Solid { translation 14.10 -5.13 0.40 rotation 0 0 1 4.3633 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
 Solid { translation 14.10 -5.13 0.84 rotation 0 0 1 4.3633 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
 # Gate posts
 Solid { translation -1.57 -14.92 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
 Solid { translation 1.57 -14.92 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
 # Outer gate — swung-back beside west gate post
 Solid { translation -2.97 -14.92 0.55 rotation 0 0 1 0 children [
  Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
  Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [
    Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } }
  ] }
 ] boundingObject Box { size 2.80 0.08 1.00 } }
 # Pillars between wall sections
 Solid { translation 14.97 2.64 0.53 rotation 0 0 1 0.9599 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 13.16 7.60 0.53 rotation 0 0 1 1.3090 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 9.77 11.64 0.53 rotation 0 0 1 1.6581 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 5.20 14.28 0.53 rotation 0 0 1 2.0071 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 0.00 15.20 0.53 rotation 0 0 1 2.3562 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -5.20 14.28 0.53 rotation 0 0 1 2.7053 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -9.77 11.64 0.53 rotation 0 0 1 3.0543 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -13.16 7.60 0.53 rotation 0 0 1 3.4034 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -14.97 2.64 0.53 rotation 0 0 1 3.7525 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -14.97 -2.64 0.53 rotation 0 0 1 4.1015 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -13.16 -7.60 0.53 rotation 0 0 1 4.4506 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -9.77 -11.64 0.53 rotation 0 0 1 4.7997 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation -5.20 -14.28 0.53 rotation 0 0 1 5.1487 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 5.20 -14.28 0.53 rotation 0 0 1 5.8469 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 9.77 -11.64 0.53 rotation 0 0 1 6.1959 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 13.16 -7.60 0.53 rotation 0 0 1 6.5450 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 Solid { translation 14.97 -2.64 0.53 rotation 0 0 1 6.8941 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # ==================== EXTERNAL PEN (south of round field gate) ====================
 # Pen west wall
 Solid { translation -1.57 -18.5 0.55 children [
  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
 ] boundingObject Box { size 0.14 6.92 1.10 } }
 # Pen east wall
 Solid { translation 1.57 -18.5 0.55 children [
  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
 ] boundingObject Box { size 0.14 6.92 1.10 } }
 # Pen south wall
 Solid { translation 0.00 -22 0.55 children [
  Transform { translation -1.52 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation  0   0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation  1.52 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 3.16 0.14 0.04 } } ] }
 ] boundingObject Box { size 3.16 0.14 1.10 } }
 # Pen north corner posts at the gate opening
 Solid { translation -1.57 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
 Solid { translation 1.57 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
 # Gate width: 3.14 m (pen x: [-1.57, 1.57])
 # ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
 # Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
 Solid {
  translation 18.5 25.49 2
  children [
    Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 10 7 4 } }
    # Gambrel roof
    Transform { translation -3.5 0 3.05 rotation 0 1 0 -0.611 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 3.9 7.2 0.18 } } ] }
    Transform { translation 3.5 0 3.05 rotation 0 1 0 0.611 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 3.9 7.2 0.18 } } ] }
    Transform { translation -1.0 0 4.55 rotation 0 1 0 -0.422 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 2.5 7.2 0.18 } } ] }
    Transform { translation 1.0 0 4.55 rotation 0 1 0 0.422 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 2.5 7.2 0.18 } } ] }
    Transform { translation 0 0 5.04 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 1.6 7.2 0.22 } } ] }
    # South gable fill
    Transform { translation 0 -3.57 2.40 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 8.8 0.16 0.80 } } ] }
    Transform { translation 0 -3.57 3.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 6.8 0.16 0.70 } } ] }
    Transform { translation 0 -3.57 3.70 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 5.1 0.16 0.60 } } ] }
    Transform { translation 0 -3.57 4.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 4.0 0.16 0.40 } } ] }
    Transform { translation 0 -3.57 4.42 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 2.7 0.16 0.60 } } ] }
    Transform { translation 0 -3.57 4.84 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 0.9 0.16 0.36 } } ] }
    # North gable fill
    Transform { translation 0  3.57 2.40 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 8.8 0.16 0.80 } } ] }
    Transform { translation 0  3.57 3.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 6.8 0.16 0.70 } } ] }
    Transform { translation 0  3.57 3.70 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 5.1 0.16 0.60 } } ] }
    Transform { translation 0  3.57 4.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 4.0 0.16 0.40 } } ] }
    Transform { translation 0  3.57 4.42 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 2.7 0.16 0.60 } } ] }
    Transform { translation 0  3.57 4.84 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 0.9 0.16 0.36 } } ] }
    # Double barn doors (south face)
    Transform {
      translation 0 -3.51 -0.50
      children [
        Shape { appearance PBRAppearance { baseColor 0.44 0.30 0.14 roughness 0.88 metalness 0 } geometry Box { size 2.8 0.10 3.0 } }
        Transform { rotation 0 0 1  0.83 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.10 0.12 3.75 } } ] }
        Transform { rotation 0 0 1 -0.83 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.10 0.12 3.75 } } ] }
        Transform { translation -1.45 0  0    children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.12 0.14 3.24 } } ] }
        Transform { translation  1.45 0  0    children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.12 0.14 3.24 } } ] }
        Transform { translation  0    0  1.62 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 3.04 0.14 0.14 } } ] }
      ]
    }
    # Windows
    Transform { translation -3.6 -3.52 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 1.40 0.12 1.10 } } ] }
    Transform { translation  3.6 -3.52 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 1.40 0.12 1.10 } } ] }
    Transform { translation 5.06  2.0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 0.12 1.20 1.0 } } ] }
    Transform { translation 5.06 -2.0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 0.12 1.20 1.0 } } ] }
    Transform { translation 0 -3.52 3.90 children [ Shape { appearance PBRAppearance { baseColor 0.44 0.30 0.14 roughness 0.88 metalness 0 } geometry Box { size 1.30 0.12 1.00 } } ] }
  ]
  boundingObject Box { size 10 7 7 }
 }
 # ==================== BARN 3 — Red barn (NE, outside fence, gate facing fence) ====================
 # Body 7×9×3.5, red walls, steep dark roof
 Solid {
  translation 29.76 9.52 1.75
  rotation 0 0 1 -1.5708
  children [
    Shape { appearance USE BARN_RED geometry Box { size 7 9 3.5 } }
    # Roof
    Transform { translation -2.0 0 3.0 rotation 0 1 0 -0.70 children [ Shape { appearance USE BARN_ROOF geometry Box { size 4.2 9.2 0.20 } } ] }
    Transform { translation 2.0 0 3.0 rotation 0 1 0 0.70 children [ Shape { appearance USE BARN_ROOF geometry Box { size 4.2 9.2 0.20 } } ] }
    Transform { translation 0 0 4.28 children [ Shape { appearance USE BARN_ROOF geometry Box { size 2.0 9.2 0.24 } } ] }
    # South gable fill
    Transform { translation 0 -4.52 2.05 children [ Shape { appearance USE BARN_RED geometry Box { size 6.2 0.16 0.60 } } ] }
    Transform { translation 0 -4.52 2.65 children [ Shape { appearance USE BARN_RED geometry Box { size 4.5 0.16 0.60 } } ] }
    Transform { translation 0 -4.52 3.25 children [ Shape { appearance USE BARN_RED geometry Box { size 2.9 0.16 0.60 } } ] }
    Transform { translation 0 -4.52 3.85 children [ Shape { appearance USE BARN_RED geometry Box { size 1.2 0.16 0.60 } } ] }
    # North gable fill
    Transform { translation 0  4.52 2.05 children [ Shape { appearance USE BARN_RED geometry Box { size 6.2 0.16 0.60 } } ] }
    Transform { translation 0  4.52 2.65 children [ Shape { appearance USE BARN_RED geometry Box { size 4.5 0.16 0.60 } } ] }
    Transform { translation 0  4.52 3.25 children [ Shape { appearance USE BARN_RED geometry Box { size 2.9 0.16 0.60 } } ] }
    Transform { translation 0  4.52 3.85 children [ Shape { appearance USE BARN_RED geometry Box { size 1.2 0.16 0.60 } } ] }
    # Door
    Transform {
      translation 0 -4.52 -0.62
      children [
        Shape { appearance USE DOOR_MAT geometry Box { size 1.70 0.14 2.26 } }
        Transform { translation 0 0 1.22 children [ Shape { appearance USE WOOD geometry Box { size 2.10 0.18 0.26 } } ] }
        Transform { translation -0.90 0  0 children [ Shape { appearance USE WOOD geometry Box { size 0.24 0.18 2.52 } } ] }
        Transform { translation  0.90 0  0 children [ Shape { appearance USE WOOD geometry Box { size 0.24 0.18 2.52 } } ] }
        Transform { translation 0 0 -0.68 children [ Shape { appearance USE WOOD geometry Box { size 1.60 0.12 0.12 } } ] }
        Transform { translation 0 0  0.30 children [ Shape { appearance USE WOOD geometry Box { size 1.60 0.12 0.12 } } ] }
      ]
    }
    # Windows — south face
    Transform { translation -2.2 -4.53 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.80 0.14 0.70 } } ] }
    Transform { translation  2.2 -4.53 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.80 0.14 0.70 } } ] }
    # East-face windows
    Transform { translation 3.52  3.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
    Transform { translation 3.52  0.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
    Transform { translation 3.52 -3.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
  ]
  boundingObject Box { size 7 9 6 }
 }
 # ==================== TREES (outside fence) ====================
 # Tree A — large oak, SE
 Solid {
  translation 20 -18 0
  children [
    Transform { translation 0 0 2.0 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.0 radius 0.30 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 4.6 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.6 subdivision 4 } } ] }
    Transform { translation  1.2  0.6 5.6 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.9 subdivision 4 } } ] }
    Transform { translation -1.0  0.9 5.3 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.7 subdivision 4 } } ] }
    Transform { translation  0.4 -1.1 5.1 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.5 subdivision 4 } } ] }
    Transform { translation -0.5 -0.4 6.2 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.0 subdivision 4 } } ] }
  ]
 }
 # Tree B — medium, NE near barn
 Solid {
  translation -8 26 0
  children [
    Transform { translation 0 0 1.7 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.4 radius 0.25 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 3.8 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 2.2 subdivision 4 } } ] }
    Transform { translation  0.9 -0.7 4.7 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.6 subdivision 4 } } ] }
    Transform { translation -0.6  0.8 4.4 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.4 subdivision 4 } } ] }
  ]
 }
 # Tree C — large, NW
 Solid {
  translation -23 20 0
  children [
    Transform { translation 0 0 2.3 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.6 radius 0.36 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 5.2 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 2.9 subdivision 4 } } ] }
    Transform { translation  1.3  0.9 6.3 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.1 subdivision 4 } } ] }
    Transform { translation -1.1  1.1 6.0 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.9 subdivision 4 } } ] }
    Transform { translation  0.6 -1.3 5.8 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.6 subdivision 4 } } ] }
  ]
 }
 # Tree D — small, SW
 Solid {
  translation -20 -23 0
  children [
    Transform { translation 0 0 1.4 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.8 radius 0.20 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 3.2 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.9 subdivision 4 } } ] }
    Transform { translation -0.7  0.6 4.0 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.4 subdivision 4 } } ] }
    Transform { translation  0.6 -0.5 3.8 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.2 subdivision 4 } } ] }
  ]
 }
 # Tree E — north cluster
 Solid {
  translation 7 23 0
  children [
    Transform { translation 0 0 1.9 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.8 radius 0.27 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 4.1 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.3 subdivision 4 } } ] }
    Transform { translation  1.0  0.5 5.0 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.7 subdivision 4 } } ] }
    Transform { translation -0.6 -0.9 4.8 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.4 subdivision 4 } } ] }
  ]
 }
 # Tree F — SW
 Solid {
  translation -2.98 -22.8 0
  children [
    Transform { translation 0 0 1.3 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.6 radius 0.19 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 2.9 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.7 subdivision 4 } } ] }
    Transform { translation  0.6  0.4 3.7 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.2 subdivision 4 } } ] }
  ]
 }
 # Tree G — west side
 Solid {
  translation -23 -5 0
  children [
    Transform { translation 0 0 2.0 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.0 radius 0.29 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 4.4 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 2.4 subdivision 4 } } ] }
    Transform { translation -1.0  0.8 5.3 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.8 subdivision 4 } } ] }
    Transform { translation  0.9 -0.7 5.0 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.6 subdivision 4 } } ] }
  ]
 }
 # Tree H — east side
 Solid {
  translation 21.35 -1.05 0
  children [
    Transform { translation 0 0 1.5 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.0 radius 0.22 subdivision 10 } } ] }
    Transform { translation  0.0  0.0 3.4 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.0 subdivision 4 } } ] }
    Transform { translation  0.7  0.6 4.2 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.4 subdivision 4 } } ] }
    Transform { translation -0.5 -0.8 4.0 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.2 subdivision 4 } } ] }
  ]
 }
 # ==================== SCARECROW (east side, outside fence) ====================
 Solid {
  translation 20 -10 0
  rotation 0 0 1 2.61799
  children [
    Transform { translation 0 0 1.22 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.44 radius 0.045 subdivision 8 } } ] }
    Transform { translation 0 0 2.02 rotation 1 0 0 1.5708 children [ Shape { appearance USE TRUNK geometry Cylinder { height 1.60 radius 0.032 subdivision 8 } } ] }
    Transform {
      translation 0 0 2.44
      children [
        Shape { appearance USE STRAW geometry Sphere { radius 0.17 subdivision 3 } }
        Transform { translation 0.13  0.05 0.06 children [ Shape { appearance PBRAppearance { baseColor 0.06 0.06 0.06 } geometry Sphere { radius 0.028 subdivision 2 } } ] }
        Transform { translation 0.13 -0.05 0.06 children [ Shape { appearance PBRAppearance { baseColor 0.06 0.06 0.06 } geometry Sphere { radius 0.028 subdivision 2 } } ] }
        Transform { translation 0.16 0 -0.02 rotation 0 1 0 1.5708 children [ Shape { appearance PBRAppearance { baseColor 0.75 0.50 0.30 } geometry Cone { height 0.07 bottomRadius 0.032 subdivision 6 } } ] }
        Transform { translation 0.14  0.04 -0.06 children [ Shape { appearance PBRAppearance { baseColor 0.18 0.08 0.08 } geometry Box { size 0.01 0.04 0.01 } } ] }
        Transform { translation 0.14 -0.04 -0.06 children [ Shape { appearance PBRAppearance { baseColor 0.18 0.08 0.08 } geometry Box { size 0.01 0.04 0.01 } } ] }
      ]
    }
    Transform { translation 0 0 2.62 children [ Shape { appearance USE HAT geometry Cylinder { height 0.04 radius 0.28 subdivision 12 } } ] }
    Transform { translation 0 0 2.80 children [ Shape { appearance USE HAT geometry Cylinder { height 0.30 radius 0.17 subdivision 10 } } ] }
    Transform { translation 0 0 1.60 children [ Shape { appearance USE SHIRT geometry Box { size 0.20 0.40 0.46 } } ] }
    Transform { translation 0 0 1.14 children [ Shape { appearance USE PANTS geometry Box { size 0.17 0.32 0.34 } } ] }
    Transform { translation 0  0.68 2.03 rotation 0 0  1 0.25 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.24 0.03 } } ] }
    Transform { translation 0 -0.68 2.03 rotation 0 0 -1 0.25 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.24 0.03 } } ] }
    Transform { translation 0.10  0.08 1.82 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.03 0.14 } } ] }
    Transform { translation 0.10 -0.08 1.82 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.03 0.14 } } ] }
  ]
 }
 # ==================== HAY BALES (near barn) ====================
 Solid { translation 25.75 13.76 0.62 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
 Solid { translation 24.34 12.32 0.62 rotation -1 0 0 1.5708 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
 Solid { translation 24.28 13.79 0.62 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
 # ==================== TRACTOR (near barn) ====================
 Solid {
  translation 17 19 0.18
  rotation 0 0 1 1.9
  children [
    # Chassis
    Transform { translation 0 0 0.35 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.20 0.20 roughness 0.6 metalness 0.3 } geometry Box { size 2.0 0.90 0.12 } } ] }
    # Engine hood
    Transform { translation 0.60 0 0.60 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.65 0.80 0.45 } } ] }
    # Main body
    Transform { translation -0.15 0 0.60 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.80 0.85 0.45 } } ] }
    # Cabin
    Transform { translation -0.20 0 0.95 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.75 0.80 0.45 } } ] }
    # Cabin roof
    Transform { translation -0.20 0 1.22 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.85 0.90 0.06 } } ] }
    # Windshield
    Transform { translation 0.12 0 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.02 0.55 0.35 } } ] }
    # Rear window
    Transform { translation -0.58 0 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.02 0.55 0.35 } } ] }
    # Side windows
    Transform { translation -0.20  0.40 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.55 0.02 0.30 } } ] }
    Transform { translation -0.20 -0.40 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.55 0.02 0.30 } } ] }
    # Seat
    Transform { translation -0.25 0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.12 0.12 roughness 0.9 } geometry Box { size 0.30 0.35 0.06 } } ] }
    # Exhaust stack
    Transform { translation 0.50 0.30 0.60 children [
      Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.4 metalness 0.6 } geometry Cylinder { height 0.90 radius 0.03 subdivision 6 } }
      Transform { translation 0 0 0.50 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.20 0.20 roughness 0.4 metalness 0.6 } geometry Cylinder { height 0.04 radius 0.045 subdivision 6 } } ] }
    ] }
    # Rear axle
    Transform { translation -0.45 0 0.40 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.08 1.15 0.08 } } ] }
    # Front axle
    Transform { translation 0.60 0 0.25 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.08 0.90 0.08 } } ] }
    # Rear left wheel
    Transform { translation -0.45  0.60 0.40 rotation 1 0 0 1.5708 children [
      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.22 radius 0.40 subdivision 20 } }
      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.24 radius 0.14 subdivision 10 } }
    ] }
    # Rear right wheel
    Transform { translation -0.45 -0.60 0.40 rotation 1 0 0 1.5708 children [
      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.22 radius 0.40 subdivision 20 } }
      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.24 radius 0.14 subdivision 10 } }
    ] }
    # Front left wheel
    Transform { translation 0.60  0.45 0.25 rotation 1 0 0 1.5708 children [
      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.16 radius 0.25 subdivision 16 } }
      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.18 radius 0.09 subdivision 8 } }
    ] }
    # Front right wheel
    Transform { translation 0.60 -0.45 0.25 rotation 1 0 0 1.5708 children [
      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.16 radius 0.25 subdivision 16 } }
      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.18 radius 0.09 subdivision 8 } }
    ] }
    # Rear fenders
    Transform { translation -0.45  0.50 0.72 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.50 0.12 0.20 } } ] }
    Transform { translation -0.45 -0.50 0.72 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.50 0.12 0.20 } } ] }
    # Front bumper
    Transform { translation 0.95 0 0.35 children [ Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 roughness 0.7 metalness 0.3 } geometry Box { size 0.12 0.75 0.30 } } ] }
    # Headlights
    Transform { translation 0.97  0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.95 0.92 0.70 roughness 0.3 } geometry Sphere { radius 0.05 subdivision 3 } } ] }
    Transform { translation 0.97 -0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.95 0.92 0.70 roughness 0.3 } geometry Sphere { radius 0.05 subdivision 3 } } ] }
    # Taillights
    Transform { translation -0.58  0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.80 0.10 0.10 roughness 0.4 } geometry Box { size 0.04 0.08 0.06 } } ] }
    Transform { translation -0.58 -0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.80 0.10 0.10 roughness 0.4 } geometry Box { size 0.04 0.08 0.06 } } ] }
    # Drawbar hitch
    Transform { translation -0.95 0 0.20 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.12 0.06 0.06 } } ] }
  ]
  boundingObject Box { size 2.2 1.4 1.3 }
 }
 # ==================== GRASS PATCHES (inside field, decorative) ====================
 Solid { translation -8 6 0.15 children [
  Transform { translation  0.10  0.00 0 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.30 } } ] }
  Transform { translation -0.05  0.12 0 rotation 0 0 1 0.4 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
  Transform { translation  0.08 -0.10 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.28 } } ] }
  Transform { translation -0.12  0.04 0 rotation 0 0 1 0.2 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.24 } } ] }
 ] }
 Solid { translation 6 -9 0.15 children [
  Transform { translation  0.08  0.06 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.28 } } ] }
  Transform { translation -0.10  0.00 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.32 } } ] }
  Transform { translation  0.02 -0.12 0 rotation 0 0 1 0.35 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.26 } } ] }
  Transform { translation -0.06  0.10 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.22 } } ] }
 ] }
 Solid { translation -3 11 0.15 children [
  Transform { translation  0.06 -0.06 0 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.26 } } ] }
  Transform { translation -0.08  0.08 0 rotation 0 0 1 0.3 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.30 } } ] }
  Transform { translation  0.12  0.02 0 rotation 0 0 1 -0.25 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.28 } } ] }
 ] }
 Solid { translation 10 8 0.15 children [
  Transform { translation -0.07  0.05 0 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.24 } } ] }
  Transform { translation  0.09 -0.07 0 rotation 0 0 1 0.4 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.28 } } ] }
  Transform { translation  0.00  0.11 0 rotation 0 0 1 -0.2 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
 ] }
 Solid { translation -11 -7 0.15 children [
  Transform { translation  0.05  0.08 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.30 } } ] }
  Transform { translation -0.09 -0.04 0 rotation 0 0 1 0.35 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.28 } } ] }
  Transform { translation  0.10 -0.09 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.24 } } ] }
  Transform { translation -0.03  0.12 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
 ] }
 # ==================== SHEPHERD DOG ====================
 ShepherdDog {
  translation 0 0 0.5
  rotation 0 0 1 0
  controller "shepherd_dog"
 }
 # ==================== SHEEP ====================
 # Up to 10 sheep, scattered through the field's central/north zone. Comment
 # out trailing slots to test smaller flock sizes; the dog policy is trained
 # to handle 1..10 sheep so any prefix works.
 Sheep { translation  3.0  2.0 0.5 name "sheep1"  controller "sheep" }
 Sheep { translation  3.0 -2.0 0.5 name "sheep2"  controller "sheep" }
 Sheep { translation  4.0  0.0 0.5 name "sheep3"  controller "sheep" }
 Sheep { translation -3.0  4.0 0.5 name "sheep4"  controller "sheep" }
 Sheep { translation -5.0 -2.0 0.5 name "sheep5"  controller "sheep" }
 Sheep { translation  6.0  5.0 0.5 name "sheep6"  controller "sheep" }
 Sheep { translation -6.0  6.0 0.5 name "sheep7"  controller "sheep" }
 Sheep { translation  0.0  8.0 0.5 name "sheep8"  controller "sheep" }
 Sheep { translation -8.0  0.0 0.5 name "sheep9"  controller "sheep" }
 Sheep { translation  7.0 -4.0 0.5 name "sheep10" controller "sheep" }
Author	SHA1	Message	Date
Johnny Fernandes	be58ad2054	Results from last checkpoinr	2026-05-13 07:49:17 +00:00
Johnny Fernandes	5c2ee4bba5	Checkpoint 8	2026-05-12 22:41:03 +01:00
Johnny Fernandes	a01a5c9cef	Checkpoint 7	2026-05-11 12:21:51 +01:00
Johnny Fernandes	fce0e0c786	Checkpoint 6	2026-05-11 10:35:48 +01:00
Johnny Fernandes	b457155538	Checkpoint 5 - incomplete	2026-05-11 10:35:39 +01:00
Johnny Fernandes	6688325d89	Checkpoint 4	2026-05-11 00:42:52 +01:00
Johnny Fernandes	2a6db038df	Checkpoint 3	2026-05-10 12:46:14 +01:00
Johnny Fernandes	1bb9415414	Checkpoint 2	2026-05-07 22:00:10 +01:00
Johnny Fernandes	90aa3bbcb4	Checkpoint 1	2026-05-07 21:59:58 +01:00
Johnny Fernandes	80a314b9e9	Trying attention method	2026-04-26 22:32:13 +01:00