Checkpoint 10

Results from last checkpoint
2026-05-13 23:22:17 +01:00 · 2026-05-13 23:14:16 +01:00 · 2026-05-13 20:26:18 +00:00 · 2026-05-13 13:46:50 +01:00 · 2026-05-13 07:49:17 +00:00 · 2026-05-12 22:41:03 +01:00
68 changed files with 34277 additions and 982 deletions
@@ -1,2 +1,15 @@
-# Stuff
-_example/
+# Python
+__pycache__/
+
+# Training artefacts: ignore all run outputs except deployable policies
+training/runs/**
+!training/runs/
+!training/runs/.gitkeep
+!training/runs/*/
+!training/runs/*/policy.zip
+
+# Webots launcher scratch
+worlds/**
+!worlds/field.wbt
+!worlds/field_round.wbt
+herding_runtime.cfg
@@ -0,0 +1,285 @@
+# Training pipeline for the shepherd-dog herding project.
+# Stages chain via output files in training/.
+#
+# Usage:
+#   make            # full pipeline: bc_demos -> bc -> rl -> eval
+#   make bc_demos   # generate sim demos
+#   make bc         # behaviour clone (rebuilds bc_demos if missing)
+#   make rl         # KL-PPO fine-tune (rebuilds bc if missing)
+#   make eval       # 10-seed env eval of rl
+#   make test       # pytest suite
+#   make webots N=10 MODE=rl   # launch Webots in the chosen mode
+#   WEBOTS_HEADLESS=1 make webots   # no 3D view, fast mode (still needs DISPLAY or xvfb-run)
+#   make clean      # delete bc_demos and run artefacts
+#   make clean_all  # delete artefacts for all combinations
+#   make help       # print the target table
+#
+# Override any hyperparameter on the command line, for example:
+#   make rl PPO_STEPS=2000000 KL=0.02
+#   make eval EVAL_SEEDS=20
+#
+# Drive mode selects the locomotion model:
+#   make DRIVE=differential       2-wheel diff-drive (default)
+#   make DRIVE=mecanum             4-wheel omnidirectional
+#
+# World shape:
+#   make WORLD=field              rectangular (default)
+#   make WORLD=field_round        circular fence
+#
+# To train all 4 combinations:
+#   make train_all
+
+
+PY               := python
+
+# Drive mode and world shape — each combination gets its own artefacts.
+DRIVE            ?= differential
+WORLD            ?= field
+
+# Derived tag and paths.
+TAG               = $(DRIVE)_$(WORLD)
+BC_DEMOS          = training/bc/demos_$(TAG).npz
+BC_DIR            = training/runs/bc_$(TAG)
+RL_DIR            = training/runs/rl_$(TAG)
+# Stage-2 "speed pass": continue PPO from RL_DIR with TIME_W < 0 so the
+# policy keeps Stage-1's success rate but cuts time-to-pen.  Output is a
+# separate run dir so Stage-1 stays comparable.
+RL_FAST_DIR       = training/runs/rl_fast_$(TAG)
+BC_POLICY         = $(BC_DIR)/policy.zip
+RL_POLICY         = $(RL_DIR)/policy.zip
+RL_FAST_POLICY    = $(RL_FAST_DIR)/policy.zip
+
+# --- Demo collection ---
+TEACHER          ?= universal
+# Round field is fundamentally harder (narrow gate at south of a circle).
+# Default to more demos there to give BC a fair shot at 60%+.
+ifeq ($(WORLD),field_round)
+SEEDS_PER_N      ?= 60
+else
+SEEDS_PER_N      ?= 25
+endif
+SUBSAMPLE        ?= 3
+FRAME_STACK      ?= 4
+DEMO_MAX_STEPS   ?= 100000
+
+# --- Behaviour cloning ---
+ifeq ($(WORLD),field_round)
+BC_EPOCHS        ?= 150
+else
+BC_EPOCHS        ?= 60
+endif
+BC_NET_ARCH      ?= 512,512
+
+# --- KL-PPO fine-tune ---
+# Round field: longer training, looser KL, no time penalty (success
+# must be learned before speed is rewarded).
+ifeq ($(WORLD),field_round)
+PPO_STEPS        ?= 4000000
+KL               ?= 0.02
+else
+PPO_STEPS        ?= 2000000
+KL               ?= 0.05
+endif
+# Time penalty is 0 until success rate is high. Earlier runs showed
+# TIME_W=-0.05 traded ~10 pts of success for speed on hard combos —
+# learn to succeed first, optimize speed in a later pass.
+TIME_W           ?= 0.0
+IMITATE          ?= 0.0
+# PPO rollouts at full difficulty so the training distribution matches
+# eval (deployment).  Anything lower causes a train/eval mismatch that
+# can make RL eval worse than BC.
+DIFFICULTY       ?= 1.0
+
+# --- Stage-2 "speed pass" (rl_fast) ---
+# Continues from RL_DIR with a negative TIME_W. Tighter KL keeps the
+# policy near the Stage-1 success rate while step-count drops.
+RL_FAST_STEPS    ?= 1000000
+RL_FAST_KL       ?= 0.05
+RL_FAST_TIME_W   ?= -0.05
+
+# --- Evaluation ---
+EVAL_SEEDS       ?= 10
+EVAL_MAX_STEPS   ?= 15000
+
+# --- Webots launcher ---
+N                ?= 10
+MODE             ?= rl
+
+
+.PHONY: all bc_demos bc rl rl_fast eval eval_fast eval_all eval_all_fast \
+        test webots clean clean_all help \
+        train_all train_diff_rect train_diff_round \
+        train_mec_rect train_mec_round \
+        train_all_fast train_diff_rect_fast train_diff_round_fast \
+        train_mec_rect_fast train_mec_round_fast \
+        remote_full
+
+all: eval
+
+# Export HERDING_WORLD so that geometry.py picks it up at import time.
+export HERDING_WORLD = $(WORLD)
+# Force Python stdout/stderr unbuffered so progress is visible live when
+# the build is run under tee / nohup / tmux pipes.
+export PYTHONUNBUFFERED = 1
+
+bc_demos: $(BC_DEMOS)
+$(BC_DEMOS):
+	$(PY) -m training.bc.collect \
+		--teacher $(TEACHER) --out $(BC_DEMOS) \
+		--seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \
+		--frame-stack $(FRAME_STACK) --drive-mode $(DRIVE) \
+		--world $(WORLD) \
+		--max-steps $(DEMO_MAX_STEPS)
+
+bc: $(BC_POLICY)
+$(BC_POLICY): $(BC_DEMOS)
+	$(PY) -m training.bc.pretrain \
+		--demos $(BC_DEMOS) --out $(BC_DIR) \
+		--epochs $(BC_EPOCHS) --net-arch $(BC_NET_ARCH)
+
+rl: $(RL_POLICY)
+$(RL_POLICY): $(BC_POLICY)
+	$(PY) -m training.rl.train \
+		--bc $(BC_DIR) --out $(RL_DIR) \
+		--total-timesteps $(PPO_STEPS) --kl-coef $(KL) \
+		--imitate-weight $(IMITATE) --time-weight $(TIME_W) \
+		--difficulty $(DIFFICULTY) \
+		--drive-mode $(DRIVE) --world $(WORLD)
+
+eval: $(RL_POLICY)
+	$(PY) -m training.eval --policy $(RL_DIR) \
+		--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
+		--drive-mode $(DRIVE) --world $(WORLD)
+
+# --- Stage-2 speed pass ---
+# Continues PPO from $(RL_DIR) with a per-step time penalty so the
+# policy keeps Stage-1's success rate but cuts mean steps-to-pen. Use
+# `make rl_fast` after Stage-1 RL has converged (success ≥ teacher).
+rl_fast: $(RL_FAST_POLICY)
+$(RL_FAST_POLICY): $(RL_POLICY)
+	$(PY) -m training.rl.train \
+		--bc $(RL_DIR) --out $(RL_FAST_DIR) \
+		--total-timesteps $(RL_FAST_STEPS) --kl-coef $(RL_FAST_KL) \
+		--imitate-weight $(IMITATE) --time-weight $(RL_FAST_TIME_W) \
+		--difficulty $(DIFFICULTY) \
+		--drive-mode $(DRIVE) --world $(WORLD)
+
+eval_fast: $(RL_FAST_POLICY)
+	$(PY) -m training.eval --policy $(RL_FAST_DIR) \
+		--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
+		--drive-mode $(DRIVE) --world $(WORLD)
+
+test:
+	$(PY) -m pytest tests/
+
+webots:
+	tools/run_webots.sh $(N) $(MODE) $(DRIVE) $(WORLD)
+
+clean:
+	rm -f $(BC_DEMOS)
+	rm -rf $(BC_DIR) $(RL_DIR)
+
+clean_all:
+	rm -f training/bc/demos_*.npz
+	rm -rf training/runs/bc_* training/runs/rl_*
+
+# --- Train all 4 combinations ---
+train_diff_rect:
+	$(MAKE) DRIVE=differential WORLD=field
+
+train_diff_round:
+	$(MAKE) DRIVE=differential WORLD=field_round
+
+train_mec_rect:
+	$(MAKE) DRIVE=mecanum WORLD=field
+
+train_mec_round:
+	$(MAKE) DRIVE=mecanum WORLD=field_round
+
+train_all: train_diff_rect train_diff_round train_mec_rect train_mec_round
+
+# Gym eval sweep over all 4 combos. Use after train_all / train_all_fast.
+eval_all:
+	@for d in differential mecanum; do \
+	  for w in field field_round; do \
+	    echo ""; \
+	    echo "=== BC  $$d / $$w ==="; \
+	    $(PY) -m training.eval --policy training/runs/bc_$${d}_$${w} \
+	      --max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
+	      --drive-mode $$d --world $$w; \
+	    echo ""; \
+	    echo "=== RL  $$d / $$w ==="; \
+	    $(PY) -m training.eval --policy training/runs/rl_$${d}_$${w} \
+	      --max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
+	      --drive-mode $$d --world $$w; \
+	  done; \
+	done
+
+# One-shot remote runbook: clean → Stage-1 train → Stage-1 eval → Stage-2
+# train → Stage-2 eval. Each step pipes to its own log file in the repo
+# root so the run is fully unattended.
+remote_full:
+	$(MAKE) clean_all
+	$(MAKE) train_all 2>&1 | tee stage1_train.log
+	$(MAKE) eval_all 2>&1 | tee stage1_eval.log
+	$(MAKE) train_all_fast 2>&1 | tee stage2_train.log
+	$(MAKE) eval_all_fast 2>&1 | tee stage2_eval.log
+	@echo ""
+	@echo "===================================================="
+	@echo "  Done. Logs: stage1_train.log stage1_eval.log"
+	@echo "              stage2_train.log stage2_eval.log"
+	@echo "===================================================="
+
+eval_all_fast:
+	@for d in differential mecanum; do \
+	  for w in field field_round; do \
+	    echo ""; \
+	    echo "=== RL_FAST  $$d / $$w ==="; \
+	    $(PY) -m training.eval --policy training/runs/rl_fast_$${d}_$${w} \
+	      --max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
+	      --drive-mode $$d --world $$w; \
+	  done; \
+	done
+
+# --- Stage-2 sweep ---
+train_diff_rect_fast:
+	$(MAKE) DRIVE=differential WORLD=field rl_fast
+
+train_diff_round_fast:
+	$(MAKE) DRIVE=differential WORLD=field_round rl_fast
+
+train_mec_rect_fast:
+	$(MAKE) DRIVE=mecanum WORLD=field rl_fast
+
+train_mec_round_fast:
+	$(MAKE) DRIVE=mecanum WORLD=field_round rl_fast
+
+train_all_fast: train_diff_rect_fast train_diff_round_fast \
+                train_mec_rect_fast train_mec_round_fast
+
+help:
+	@echo "Targets:"
+	@echo "  make              full pipeline (bc_demos -> bc -> rl -> eval)"
+	@echo "  make bc_demos     sim demos via the '$(TEACHER)' teacher"
+	@echo "  make bc           train BC (rebuilds bc_demos if missing)"
+	@echo "  make rl           KL-PPO fine-tune (rebuilds bc if missing)"
+	@echo "  make eval         $(EVAL_SEEDS)-seed env eval of rl"
+	@echo "  make test         pytest suite"
+	@echo "  make webots [N=$(N)] [MODE=$(MODE)] [DRIVE=$(DRIVE)] [WORLD=$(WORLD)]"
+	@echo "                    launch Webots in the chosen mode"
+	@echo "  WEBOTS_HEADLESS=1 make webots …   no 3D view + fast + --batch"
+	@echo "  make clean        delete artefacts for current DRIVE+WORLD"
+	@echo "  make clean_all    delete artefacts for all combinations"
+	@echo ""
+	@echo "Combinations:"
+	@echo "  make DRIVE=differential WORLD=field       diff + rectangular (default)"
+	@echo "  make DRIVE=differential WORLD=field_round  diff + circular"
+	@echo "  make DRIVE=mecanum     WORLD=field         mecanum + rectangular"
+	@echo "  make DRIVE=mecanum     WORLD=field_round   mecanum + circular"
+	@echo "  make train_all                            all 4 in sequence"
+	@echo ""
+	@echo "Hyperparameter overrides (showing defaults):"
+	@echo "  TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK) DEMO_MAX_STEPS=$(DEMO_MAX_STEPS)"
+	@echo "  BC_EPOCHS=$(BC_EPOCHS) BC_NET_ARCH=$(BC_NET_ARCH)"
+	@echo "  PPO_STEPS=$(PPO_STEPS) KL=$(KL) IMITATE=$(IMITATE) TIME_W=$(TIME_W)"
+	@echo "  EVAL_SEEDS=$(EVAL_SEEDS) EVAL_MAX_STEPS=$(EVAL_MAX_STEPS)"
@@ -0,0 +1,199 @@
+# Autonomous Shepherd-Dog Herding (Webots + RL)
+
+Group G25 — *Diogo Costa, Johnny Fernandes, Nelson Neto*
+
+A differential-drive shepherd dog that herds 1–10 sheep through a 3 m
+gate into an external pen. The dog has three deployable modes:
+
+| Mode | Source | Role |
+|---|---|---|
+| `strombom` | Strömbom et al. (2014) collect/drive heuristic | Analytic baseline |
+| `bc` | Behaviour cloning of the Strömbom teacher | Imitation learning result |
+| `rl` | KL-regularised PPO fine-tune of `bc` | Reward-driven refinement |
+
+`sequential` (single-target pin-and-push) is kept as an alternative
+analytic baseline.
+
+## Perception
+
+The dog perceives sheep **only through its front-mounted 140° LiDAR**
+(180 rays, 12 m max range — see `protos/ShepherdDog.proto`). Each
+control step:
+
+1. Read `lidar.getRangeImage()`,
+2. Cluster returns into world-frame `(x, y)` estimates
+   (`herding/perception/lidar_perception.py`),
+3. Fold them into a multi-target tracker that maintains last-seen
+   positions for sheep currently outside the FOV
+   (`herding/perception/sheep_tracker.py`).
+
+**LiDAR validation** (intermediate-goal item v from `docs/project.md`):
+during development a diagnostic-dump controller captured 80 real
+Webots scans plus the ground-truth sheep positions. Comparing
+detections against GT showed clustered centroids match GT positions
+within 0.15 m after the +SHEEP_RADIUS surface-to-centre correction —
+i.e. the LiDAR pipeline produces correct sheep-position estimates
+from the real Webots scan, validating the sensor for the herding
+task.
+
+The tracker outputs a `{name: (x, y)}` dict shaped exactly like the
+prior receiver-based one, so Strömbom, Sequential, and the BC obs
+builder all run unchanged on top of it. The 2D Gymnasium env
+(`herding/perception/lidar_sim.py`) raycasts sheep discs at training time, so
+demos collected in the env match the perception the deployed
+controller sees in Webots.
+
+Privileged ground-truth perception is available for ablation —
+`HerdingEnv(use_lidar=False)`.
+
+## Quick start
+
+```bash
+# 1. Set up the Python env (any venv with PyTorch + SB3)
+pip install -r training/requirements.txt
+
+# 2. Smoke test (70 pytest cases, < 1 s)
+make test
+
+# 3. Reproduce the full pipeline (~30–60 min CPU)
+make            # demos -> bc -> rl -> eval
+
+# Individual stages (each rebuilds upstream artefacts if missing):
+make bc_demos   # sim demos
+make bc         # behaviour clone
+make rl         # KL-PPO fine-tune
+make eval       # 10-seed env eval of rl
+
+# 4. Run in Webots
+make webots N=10 MODE=bc          # behaviour-cloned MLP
+make webots N=10 MODE=rl          # KL-PPO fine-tune
+make webots N=10 MODE=strombom    # analytic baseline
+# (or invoke directly: tools/run_webots.sh 10 rl)
+```
+
+`make help` lists every target and the overridable hyperparameters
+(e.g. `make rl PPO_STEPS=2000000 KL=0.02`).
+
+## Documentation map
+
+- This README is the project overview: architecture, quick start, and
+  headline results.
+- `training/README.md` has the command-level training and evaluation
+  details for demo collection, BC, PPO fine-tuning, and policy artifacts.
+- `docs/project.md` is the original course proposal/goals document, kept
+  for traceability rather than as run instructions.
+
+## Layout
+
+```
+herding/                  — perception / control / world primitives
+  world/                  — environment-side physics & geometry
+    geometry.py             field/pen constants, robot specs
+    diffdrive.py            differential-drive kinematics
+    flocking_sim.py         Reynolds + Strömbom 2014 sheep dynamics
+  perception/             — LiDAR → tracked-sheep pipeline
+    lidar_sim.py            fast 2D raycast for the env
+    lidar_perception.py     scan → world-frame cluster centroids + filters
+    sheep_tracker.py        multi-target NN tracker with FOV memory
+    obs.py                  32-D order-invariant observation builder
+  control/                — every dog mode's action source
+    strombom.py             canonical CoM collect/drive heuristic
+    sequential.py           single-target "pin-and-push" alternative
+    active_scan.py          wraps a base teacher with opening rotation +
+                            walk-to-centre fallback
+    modulation.py           shared near-sheep speed-modulation helper
+
+controllers/
+  sheep/sheep.py          — Webots sheep controller (uses herding.world.flocking_sim)
+  shepherd_dog/
+    shepherd_dog.py       — Webots dog controller, mode-switched
+    policy_loader.py      — lazy SB3 policy loader (auto-detects frame stack)
+
+training/
+  herding_env.py          — Gymnasium env (LiDAR + tracker by default)
+  bc/collect.py           — sim demos via the active-scan teacher
+  bc/pretrain.py          — supervised BC of (obs, action) demos into MLP
+  rl/train.py             — KL-regularised PPO fine-tune of BC
+  eval.py                 — analytic + learned policy comparison harness
+  bc/demos.npz            — collected demonstrations (gitignored)
+  runs/                   — checkpoints (whitelisted in .gitignore)
+  requirements.txt
+
+tests/
+  conftest.py             — pytest setup (adds project root to sys.path)
+  test_geometry.py        — geometric predicates + constants
+  test_diffdrive.py       — kinematics and (vx, vy) → wheel-speed map
+  test_obs.py             — observation builder (shape, normalisation, order)
+  test_control.py         — speed modulation + analytic teachers + active scan
+  test_perception.py      — LiDAR sim + clustering + tracker
+  test_env.py             — Gymnasium contract + determinism + reward
+
+tools/
+  run_webots.sh           — launch Webots with N sheep + chosen mode
+
+Makefile                  — pipeline orchestrator (make / make rl / make test / …)
+
+worlds/
+  field.wbt               — main world (3 m gate, external pen)
+
+protos/                   — Sheep / ShepherdDog robot definitions
+docs/project.md           — original course proposal/goals
+```
+
+## Shared low-level control
+
+Every dog mode (Strömbom, Sequential, BC, RL) routes its action
+through `herding/control/modulation.py:modulate_speed_near_sheep`,
+which scales action magnitude down when within ~2.5 m of the nearest
+tracked sheep. This stops the dog from charging in at full speed and
+scattering the flock. Direction (intent) is preserved.
+
+All modes also share the same EMA action smoother in
+`controllers/shepherd_dog/shepherd_dog.py:ACTION_SMOOTH = 0.55`.
+
+## Results — env eval, 10 seeds × n=1..10
+
+`max_steps=15000`, full-field spawn distribution. Success rate per
+flock size, then mean steps over successful seeds.
+
+### Success rate (%)
+
+| n  | Strömbom | `bc` | `rl` |
+|---:|---:|---:|---:|
+|  1 |  30 |  80 | **90** |
+|  2 |  90 |  50 | **90** |
+|  3 |  60 |  90 | **90** |
+|  4 |  40 |  80 | **90** |
+|  5 |  60 |  70 | **100** |
+|  6 |  30 |  80 | 80 |
+|  7 |  70 |  80 | **100** |
+|  8 |  30 | 100 | **100** |
+|  9 |  40 |  90 | **100** |
+| 10 |  50 | 100 | **100** |
+
+### Mean penned per episode (out of n)
+
+| n  | Strömbom | `bc` | `rl` |
+|---:|---:|---:|---:|
+|  1 | 0.30 | 0.80 | **0.90** |
+|  5 | 3.90 | 4.10 | **5.00** |
+|  8 | 4.20 | 8.00 | **8.00** |
+| 10 | 7.40 | 10.00 | **10.00** |
+
+### Takeaways
+
+- **BC clearly beats Strömbom** under realistic LiDAR conditions (full
+  field, partial observability). Strömbom struggles on small flocks
+  where a single sheep can spawn beyond the LiDAR's 12 m range; BC
+  learned active perception from the demos.
+- **RL refines BC** without regressing on any cell. Ties or beats BC
+  at every flock size; biggest gains at n=1 and n=4 where BC's
+  imitation of Strömbom's drive heuristic was sub-optimal.
+- **Aggressive reward shaping doesn't help** — a more aggressive
+  variant (β=0.02, W_TIME=-0.1, W_IMITATE=0, 3 M steps) trained as
+  an ablation was strictly worse than the conservative tune shipped
+  here (β=0.05, W_IMITATE=0.5, 1 M steps).
+
+## License
+
+Educational project for the *Topics in Intelligent Robotics* course.
@@ -0,0 +1,30 @@
+"""Backwards-compat shim — flocking logic now lives in ``herding.world.flocking_sim``.
+
+Kept so any external reference still resolves.
+"""
+
+import os
+import sys
+
+_HERE = os.path.dirname(os.path.abspath(__file__))
+_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+
+from herding.world.flocking_sim import (  # noqa: F401
+    MAX_SPEED, FLEE_SPEED, WANDER_SPEED,
+    WALL_MARGIN, WALL_HARD_MARGIN, WALL_HARD_GAIN,
+    FLEE_DIST, SEPARATION_DIST, COHESION_DIST,
+    PEN_MARGIN,
+    compute_heading_speed,
+)
+from herding.world.geometry import (  # noqa: F401
+    FIELD_X, FIELD_Y, PEN_X, PEN_Y,
+    in_pen,
+)
+
+# Original module-level names retained for any code still importing them.
+X_MIN, X_MAX = FIELD_X
+Y_MIN, Y_MAX = FIELD_Y
+PEN_X_MIN, PEN_X_MAX = PEN_X
+PEN_Y_MIN, PEN_Y_MAX = PEN_Y
@@ -1,213 +1,131 @@
-"""
-Sheep flocking controller (Webots, Reynolds boids variant).
+"""Sheep flocking controller (Webots).

-Each sheep broadcasts its GPS position every 3 steps on channel 1 and
-listens for the dog and peer sheep positions.  Peers are keyed by robot
-name so each neighbour has exactly one current entry in the dict.
+Each sheep emits its GPS position every 3 steps and listens for the
+dog's position and peer-sheep positions. The behavioural step is
+delegated to :func:`herding.world.flocking_sim.compute_heading_speed`
+so the env and Webots use identical sheep dynamics.

-Force stack each step (summed then converted to a heading + speed):
-    flee       — away from dog, quadratic ramp, dominant when close
-    cohesion   — toward flock centre, halved while fleeing
-    separation — inverse-distance push, prevents physical overlap
-    walls      — linear repulsion from field boundary
-    wander     — small persistent drift for natural idle motion
-
-Pen behaviour: on first entry into the quarantine pen the sheep latches
-permanently — it turns pink (via the exposed woolColor PROTO field) and
-the normal force stack is replaced by pen-confinement forces only.
+A sheep latches penned the first time it crosses the gate plane south;
+the wool turns pink (via the exposed ``woolColor`` PROTO field) and
+the dynamics switch to in-pen containment.
 """

-import random
 import math
+import os
+import random
+import sys
+
+# --- Make the shared herding/ package importable from this controller dir ---
+_HERE = os.path.dirname(os.path.abspath(__file__))
+_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+
 from controller import Supervisor

-# ---------------------------------------------------------------------------
-# Tuning constants
-# ---------------------------------------------------------------------------
+from herding.world.diffdrive import heading_speed_to_wheels
+from herding.world.flocking_sim import MAX_SPEED, compute_heading_speed
+from herding.world.geometry import (
+    SHEEP_MAX_WHEEL_OMEGA,
+    is_penned_position,
+)

-MAX_SPEED    = 22.0   # rad/s hard clamp on both motors
-FLEE_SPEED   = 20.0   # rad/s upper bound while panicking
-WANDER_SPEED =  3.0   # rad/s lower bound during calm wandering

-X_MIN, X_MAX = -14.5, 14.5   # stone wall inner edges (metres)
-Y_MIN, Y_MAX = -14.5, 14.5
-WALL_MARGIN  =  3.5           # avoidance starts this far from the wall
-
-FLEE_DIST       = 7.0   # dog within this radius triggers flee (metres)
-SEPARATION_DIST = 2.5   # inverse-distance push active inside this radius
-COHESION_DIST   = 8.0   # pull toward flock centre active inside this radius
-
-PEN_X_MIN, PEN_X_MAX = 10.0, 13.0   # quarantine pen extents (metres)
-PEN_Y_MIN, PEN_Y_MAX = -15.0, -8.0  # open entrance at y=-8, gate at y=-15
-PEN_MARGIN = 0.8                     # confinement force starts this far from pen wall
-
-# ---------------------------------------------------------------------------
-# Device setup
-# ---------------------------------------------------------------------------
-
-robot    = Supervisor()
+# --- Devices ---
+robot = Supervisor()
 timestep = int(robot.getBasicTimeStep())
-name     = robot.getName()
+name = robot.getName()
 self_node = robot.getSelf()

-left_motor  = robot.getDevice("left wheel motor")
+left_motor = robot.getDevice("left wheel motor")
 right_motor = robot.getDevice("right wheel motor")
 left_motor.setPosition(float("inf"))
 right_motor.setPosition(float("inf"))
 left_motor.setVelocity(0.0)
 right_motor.setVelocity(0.0)
+MOTOR_MAX = min(left_motor.getMaxVelocity(), SHEEP_MAX_WHEEL_OMEGA)

-gps      = robot.getDevice("gps");      gps.enable(timestep)
-compass  = robot.getDevice("compass");  compass.enable(timestep)
+gps = robot.getDevice("gps");           gps.enable(timestep)
+compass = robot.getDevice("compass");   compass.enable(timestep)
 receiver = robot.getDevice("receiver"); receiver.enable(timestep)
-emitter  = robot.getDevice("emitter")
+emitter = robot.getDevice("emitter")

-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def norm_angle(a):
-    return math.atan2(math.sin(a), math.cos(a))

+# --- Helpers ---

 def bearing():
-    # Compass returns north direction in sensor frame; for this Z-up world
-    # with north = +Y, atan2(n[0], n[1]) gives the standard math angle
-    # (0 = east, π/2 = north) matching atan2(fy, fx) used for heading.
+    """World-frame heading (0 = east, π/2 = north)."""
    n = compass.getValues()
    return math.atan2(n[0], n[1])


-def drive(heading, speed):
-    err = norm_angle(heading - bearing())
-    # Scale forward component by cos(err): at 90° error fwd→0 so the robot
-    # spins in place to realign rather than driving sideways at full speed.
-    fwd = speed * max(0.0, math.cos(err))
-    k = 4.0
-    left_motor.setVelocity( max(-MAX_SPEED, min(MAX_SPEED, fwd - k * err)))
-    right_motor.setVelocity(max(-MAX_SPEED, min(MAX_SPEED, fwd + k * err)))
+def drive(heading, speed_motor):
+    left_w, right_w = heading_speed_to_wheels(
+        heading, min(speed_motor, MAX_SPEED), bearing(), MOTOR_MAX, k_turn=4.0
+    )
+    left_motor.setVelocity(left_w)
+    right_motor.setVelocity(right_w)


 def paint_pink():
-    # woolColor is declared as a PROTO field with IS binding to the DEF WOOL
-    # PBRAppearance baseColor.  Changing it here propagates to every USE WOOL
-    # shape on the body.  Direct field access avoids PROTO-internal opacity.
+    """Switch the sheep's wool to pink via the exposed PROTO field."""
    self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])

-# ---------------------------------------------------------------------------
-# State
-# ---------------------------------------------------------------------------

+# --- State ---
 wander_angle = random.uniform(-math.pi, math.pi)
-step   = 0
-dog_x  = None
-dog_y  = None
-peers  = {}   # name → (x, y), one entry per neighbour, cleared every 30 steps
+step_count = 0
+dog_x, dog_y = None, None
+peers = {}                       # name → (x, y); periodically pruned
 penned = False

-# ---------------------------------------------------------------------------
-# Main loop
-# ---------------------------------------------------------------------------
+# Safety net for differential-drive sheep pinned against a wall.
+_prev_x, _prev_y = None, None
+_stuck_count = 0
+STUCK_STEPS = 20
+STUCK_DIST = 0.05

+
+# --- Main loop ---
 while robot.step(timestep) != -1:
-    step += 1
+    step_count += 1
    pos = gps.getValues()
    x, y = pos[0], pos[1]

-    # Pen entry: one-way latch, never unset
-    if not penned and PEN_X_MIN < x < PEN_X_MAX and PEN_Y_MIN < y < PEN_Y_MAX:
+    if not penned and is_penned_position(x, y):
        penned = True
        paint_pink()

-    # Refresh peer table (clear before receiving so fresh data is never lost)
-    if step % 30 == 0:
+    # Stale peers get dropped periodically so a peer that's gone silent
+    # doesn't permanently distort the local CoM.
+    if step_count % 30 == 0:
        peers.clear()
    while receiver.getQueueLength() > 0:
        msg = receiver.getString()
        receiver.nextPacket()
-        p = msg.split(":")
-        if p[0] == "dog" and len(p) >= 3:
-            dog_x, dog_y = float(p[1]), float(p[2])
-        elif p[0] == "sheep" and len(p) >= 4 and p[1] != name:
-            peers[p[1]] = (float(p[2]), float(p[3]))
+        parts = msg.split(":")
+        if parts[0] == "dog" and len(parts) >= 3:
+            dog_x, dog_y = float(parts[1]), float(parts[2])
+        elif parts[0] == "sheep" and len(parts) >= 4 and parts[1] != name:
+            peers[parts[1]] = (float(parts[2]), float(parts[3]))

-    fx, fy = 0.0, 0.0
+    dog_xy = (dog_x, dog_y) if dog_x is not None and dog_y is not None else None
+    heading, speed, wander_angle = compute_heading_speed(
+        x=x, y=y, penned=penned, dog_xy=dog_xy, peers=peers,
+        wander_angle=wander_angle,
+    )

-    if penned:
-        # Inside pen: wander freely, strong boundary forces prevent exit,
-        # separation still active to avoid collisions with other penned sheep.
+    # Stuck-against-wall recovery: drive toward the field centre.
+    if _prev_x is not None:
+        moved = math.hypot(x - _prev_x, y - _prev_y)
+        _stuck_count = _stuck_count + 1 if moved < STUCK_DIST else 0
+    if _stuck_count >= STUCK_STEPS:
+        heading = math.atan2(-y, -x)
+        speed = MAX_SPEED
+        _stuck_count = 0
+    _prev_x, _prev_y = x, y

-        pm = PEN_MARGIN
-        if x < PEN_X_MIN + pm: fx += ((PEN_X_MIN + pm - x) / pm) * 15.0
-        if x > PEN_X_MAX - pm: fx -= ((x - (PEN_X_MAX - pm)) / pm) * 15.0
-        if y < PEN_Y_MIN + pm: fy += ((PEN_Y_MIN + pm - y) / pm) * 15.0
-        if y > PEN_Y_MAX - pm: fy -= ((y - (PEN_Y_MAX - pm)) / pm) * 15.0
-
-        for px, py in peers.values():
-            dx, dy = px - x, py - y
-            d = math.hypot(dx, dy)
-            if 0.05 < d < SEPARATION_DIST:
-                push = (SEPARATION_DIST - d) / d
-                fx -= (dx / d) * push * 2.5
-                fy -= (dy / d) * push * 2.5
-
-        if random.random() < 0.02:
-            wander_angle += random.uniform(-0.6, 0.6)
-        fx += math.cos(wander_angle) * 0.5
-        fy += math.sin(wander_angle) * 0.5
-
-    else:
-        fleeing = False
-
-        # Flee — quadratic ramp so force grows rapidly as the dog closes in
-        if dog_x is not None:
-            dx   = dog_x - x
-            dy   = dog_y - y
-            dist = math.hypot(dx, dy)
-            if 0.01 < dist < FLEE_DIST:
-                fleeing = True
-                t = 1.0 - dist / FLEE_DIST
-                s = t * t * 20.0
-                fx -= (dx / dist) * s
-                fy -= (dy / dist) * s
-
-        # Cohesion — halved while fleeing to reduce mid-panic collisions
-        cx, cy, cn = 0.0, 0.0, 0
-        for px, py in peers.values():
-            d = math.hypot(px - x, py - y)
-            if 0.3 < d < COHESION_DIST:
-                cx += px; cy += py; cn += 1
-        if cn > 0:
-            w = 0.08 if fleeing else 0.15
-            fx += (cx / cn - x) * w
-            fy += (cy / cn - y) * w
-
-        # Separation — inverse-distance: huge when nearly overlapping, fades quickly
-        for px, py in peers.values():
-            dx, dy = px - x, py - y
-            d = math.hypot(dx, dy)
-            if 0.05 < d < SEPARATION_DIST:
-                push = (SEPARATION_DIST - d) / d
-                fx -= (dx / d) * push * 2.5
-                fy -= (dy / d) * push * 2.5
-
-        # Walls
-        if x < X_MIN + WALL_MARGIN: fx += ((X_MIN + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
-        if x > X_MAX - WALL_MARGIN: fx -= ((x - (X_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
-        if y < Y_MIN + WALL_MARGIN: fy += ((Y_MIN + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
-        if y > Y_MAX - WALL_MARGIN: fy -= ((y - (Y_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
-
-        # Wander — suppressed while fleeing so drift cannot deflect the flee heading
-        if not fleeing:
-            if random.random() < 0.02:
-                wander_angle += random.uniform(-0.6, 0.6)
-            fx += math.cos(wander_angle) * 0.5
-            fy += math.sin(wander_angle) * 0.5
-
-    heading = math.atan2(fy, fx)
-    mag     = math.hypot(fx, fy)
-    speed   = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
    drive(heading, speed)

-    if step % 3 == 0:
+    if step_count % 3 == 0:
        emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
@@ -0,0 +1,90 @@
+"""Lazy SB3 policy loader for the dog controller.
+
+SB3 is imported only when a learned policy is actually requested,
+so the analytic modes can run on installs without stable-baselines3
+or torch.
+
+The handle auto-detects frame stacking from the policy's expected
+observation dimension: if it's a multiple of the single-frame
+``OBS_DIM``, an internal buffer of the last K frames is maintained
+and concatenated on each ``predict`` call.
+"""
+
+import os
+from pathlib import Path
+
+
+class PolicyHandle:
+    """Wrap a loaded policy (+ optional VecNormalize) for ``predict(obs)``."""
+
+    def __init__(self, model, vecnorm):
+        self.model = model
+        self.vecnorm = vecnorm
+        from herding.perception.obs import OBS_DIM
+        policy_dim = int(model.observation_space.shape[0])
+        if policy_dim % OBS_DIM == 0 and policy_dim // OBS_DIM >= 1:
+            self.frame_stack = policy_dim // OBS_DIM
+        else:
+            self.frame_stack = 1
+        self._buffer: list = []
+        self._single_dim = OBS_DIM
+
+    def predict(self, obs):
+        import numpy as np
+        single = np.asarray(obs, dtype=np.float32).reshape(-1)
+        if single.shape[0] != self._single_dim:
+            # Caller passed an already-stacked obs.
+            stacked = single
+        elif self.frame_stack > 1:
+            if not self._buffer:
+                self._buffer = [single.copy() for _ in range(self.frame_stack)]
+            else:
+                self._buffer.append(single)
+                if len(self._buffer) > self.frame_stack:
+                    self._buffer = self._buffer[-self.frame_stack:]
+            stacked = np.concatenate(self._buffer, axis=0)
+        else:
+            stacked = single
+
+        obs_b = stacked.reshape(1, -1)
+        if self.vecnorm is not None:
+            obs_b = self.vecnorm.normalize_obs(obs_b)
+        action, _ = self.model.predict(obs_b, deterministic=True)
+        return action[0]
+
+
+def load(model_path: str, vecnorm_path: str | None = None) -> PolicyHandle:
+    """Load a policy zip (+ optional VecNormalize pickle) from disk.
+
+    ``model_path`` may be a ``.zip`` file or a directory; in the
+    latter case ``policy.zip`` is preferred, with ``final.zip`` as
+    a fallback for partially-completed RL runs.
+    """
+    p = Path(model_path)
+    if p.is_dir():
+        zip_candidates = [p / "policy.zip", p / "final.zip"]
+        zip_path = next((z for z in zip_candidates if z.exists()), None)
+        if zip_path is None:
+            raise FileNotFoundError(
+                f"No policy zip in {p} (looked for policy.zip, final.zip)"
+            )
+        if vecnorm_path is None:
+            vn = p / "vecnormalize.pkl"
+            if vn.exists():
+                vecnorm_path = str(vn)
+    else:
+        zip_path = p
+
+    # Deferred imports so the analytic path doesn't require SB3.
+    from stable_baselines3 import PPO
+    from stable_baselines3.common.vec_env import VecNormalize  # noqa: F401
+
+    model = PPO.load(str(zip_path), device="auto")
+    vecnorm = None
+    if vecnorm_path and os.path.exists(vecnorm_path):
+        import pickle
+        with open(vecnorm_path, "rb") as f:
+            vecnorm = pickle.load(f)
+        vecnorm.training = False
+        vecnorm.norm_reward = False
+    return PolicyHandle(model=model, vecnorm=vecnorm)
@@ -1,88 +1,408 @@
-"""
-Shepherd Dog controller (Webots, manual keyboard control).
+"""Shepherd Dog controller (Webots).

-WASD / arrow keys drive the robot.  +/- adjust speed in 10 % increments.
-GPS position is broadcast every step on channel 1 so sheep controllers
-can compute flee forces.  Ears wag continuously via sinusoidal position
-targets — purely cosmetic.
+Mode is selected by ``HERDING_MODE`` (env var, or via the
+``herding_runtime.cfg`` file the launcher writes since Webots strips
+env vars on some setups):
+
+    strombom    → canonical Strömbom (2014) collect/drive heuristic
+                  wrapped in ActiveScanTeacher (opening rotation +
+                  walk-to-centre when the tracker briefly empties).
+    sequential  → single-target "pin-and-push", same wrapper.
+    bc          → behaviour-cloned MLP, trained on Strömbom demos.
+                  Default policy: training/runs/bc/policy.zip.
+    rl          → KL-regularised PPO fine-tune of bc. Same obs/action
+                  space as bc; refines time-to-pen via reward while
+                  staying anchored to bc.
+                  Default policy: training/runs/rl/policy.zip.
+
+Sheep perception
+----------------
+The dog perceives sheep through its **front-mounted 140° LiDAR**
+(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step:
+
+    1. Reads ``lidar.getRangeImage()``.
+    2. Runs ``herding.perception.lidar_perception.detections_from_scan``
+       to cluster returns into world-frame ``(x, y)`` sheep estimates.
+    3. Folds those into a ``SheepTracker`` which maintains last-seen
+       positions for sheep currently out of FOV and latches "penned"
+       once a track crosses the gate plane south.
+
+Sheep ``emitter`` messages are read **for diagnostic logging only**
+(GT_penned counter + auto-finish sentinel); they are never used to
+drive the policy. Perception for control comes entirely from LiDAR.
+
+Auto-finish
+-----------
+When the dog observes (via GT, read off the receiver) that all sheep
+are penned, it writes ``training/.run_done`` and the launcher
+(``tools/run_webots.sh``) detects it and closes Webots. This keeps
+batch evaluation runs bounded.
 """

 import math
-from controller import Robot, Keyboard
+import os
+import sys

-robot    = Robot()
+# --- Make the shared herding/ package importable from this controller dir ---
+_HERE = os.path.dirname(os.path.abspath(__file__))
+_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
+
+# --- Read runtime cfg early so env vars are set before geometry import ---
+def _load_runtime_config():
+    cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
+    if not os.path.exists(cfg_path):
+        return {}
+    out = {}
+    try:
+        with open(cfg_path) as f:
+            for line in f:
+                line = line.strip()
+                if not line or line.startswith("#") or "=" not in line:
+                    continue
+                k, _, v = line.partition("=")
+                out[k.strip().upper()] = v.strip()
+    except OSError:
+        return {}
+    return out
+
+_runtime_cfg = _load_runtime_config()
+# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
+for _rk, _rv in _runtime_cfg.items():
+    if _rk.startswith("HERDING_") and _rk not in os.environ:
+        os.environ[_rk] = _rv
+
+import numpy as np
+
+from controller import Robot
+
+from herding.control.active_scan import ActiveScanTeacher
+from herding.control.modulation import modulate_speed_near_sheep
+from herding.control.sequential import compute_action as sequential_action
+from herding.control.strombom import compute_action as strombom_action
+from herding.control.universal import compute_action as universal_action
+from herding.perception.obs import build_obs
+from herding.perception.lidar_perception import detections_from_scan
+from herding.perception.sheep_tracker import SheepTracker
+from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
+from herding.world.geometry import (
+    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
+    DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
+    DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
+    PEN_ENTRY, is_penned_position,
+)
+
+
+# ---------------------------------------------------------------------------
+# Mode + policy resolution (cfg already loaded above)
+# ---------------------------------------------------------------------------
+
+MODE = (os.environ.get("HERDING_MODE")
+        or _runtime_cfg.get("HERDING_MODE")
+        or "bc").lower()
+
+
+def _resolve_policy_dir(mode: str) -> str:
+    """Where to look for the trained policy for the given mode.
+
+    Priority:
+      1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
+         to a real directory.
+      2. Drive-mode-specific default:
+            bc → training/runs/bc_differential (or bc_mecanum)
+            rl → training/runs/rl_differential (or rl_mecanum)
+      3. Legacy path (no drive suffix):
+            bc → training/runs/bc
+            rl → training/runs/rl
+    """
+    env_dir = (os.environ.get("HERDING_POLICY_DIR")
+               or _runtime_cfg.get("HERDING_POLICY_DIR"))
+    if env_dir and os.path.isdir(env_dir):
+        return env_dir
+    drive = DRIVE_MODE
+    mode_default = {
+        "bc": os.path.join(_PROJECT_ROOT, "training", "runs",
+                           f"bc_{drive}"),
+        "rl": os.path.join(_PROJECT_ROOT, "training", "runs",
+                           f"rl_{drive}"),
+    }
+    primary = mode_default.get(mode, mode_default["bc"])
+    if os.path.isdir(primary):
+        return primary
+    # Fallback: legacy paths without drive suffix.
+    legacy = {
+        "bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
+        "rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
+    }
+    fallback = legacy.get(mode, legacy["bc"])
+    if os.path.isdir(fallback):
+        return fallback
+    return env_dir or primary
+
+
+_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
+if MODE not in _VALID_MODES:
+    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
+    MODE = "strombom"
+
+POLICY_DIR = _resolve_policy_dir(MODE)
+policy_handle = None
+if MODE in ("bc", "rl"):
+    print(f"[dog] resolved POLICY_DIR={POLICY_DIR}  exists={os.path.isdir(POLICY_DIR)}")
+    try:
+        from policy_loader import load as _load_policy
+        policy_handle = _load_policy(POLICY_DIR)
+        print(f"[dog] policy loaded from {POLICY_DIR}")
+    except Exception as exc:
+        print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
+        MODE = "strombom"
+print(f"[dog] running in mode={MODE}")
+
+# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
+DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
+              or _runtime_cfg.get("HERDING_DRIVE")
+              or "differential").lower()
+if DRIVE_MODE not in ("differential", "mecanum"):
+    print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
+    DRIVE_MODE = "differential"
+print(f"[dog] drive mode={DRIVE_MODE}")
+
+
+# ---------------------------------------------------------------------------
+# Control parameters
+# ---------------------------------------------------------------------------
+
+ACTION_SMOOTH = 0.55           # EMA on (vx, vy) — kills frame-to-frame jitter
+RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")
+
+
+def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
+    """If the dog is near the south barrier and the action would push it
+    further south, override with a northward action. Hard invariant: the
+    dog never enters the pen."""
+    if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
+        return (0.0, 1.0)
+    if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
+        return (vx * 0.5, max(0.0, vy + 0.5))
+    return (vx, vy)
+
+
+def drive_diff(vx: float, vy: float, left_motor, right_motor,
+               compass, motor_max: float):
+    if math.hypot(vx, vy) < 1e-3:
+        left_motor.setVelocity(0.0)
+        right_motor.setVelocity(0.0)
+        return
+    n = compass.getValues()
+    h = math.atan2(n[0], n[1])
+    left, right = velocity_to_wheels(
+        vx, vy, h,
+        max_linear=DOG_MAX_LINEAR,
+        wheel_radius=DOG_WHEEL_RADIUS,
+        max_wheel_omega=motor_max,
+        k_turn=4.0,
+    )
+    left_motor.setVelocity(left)
+    right_motor.setVelocity(right)
+
+
+def drive_mecanum(vx: float, vy: float, omega: float,
+                  fl_motor, fr_motor, rl_motor, rr_motor,
+                  compass, motor_max: float):
+    if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
+        fl_motor.setVelocity(0.0)
+        fr_motor.setVelocity(0.0)
+        rl_motor.setVelocity(0.0)
+        rr_motor.setVelocity(0.0)
+        return
+    n = compass.getValues()
+    h = math.atan2(n[0], n[1])
+    w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
+        vx, vy, omega, h,
+        max_linear=DOG_MAX_LINEAR,
+        wheel_radius=DOG_WHEEL_RADIUS,
+        lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
+        max_wheel_omega=motor_max,
+        k_turn=4.0,
+        wheel_base=DOG_WHEEL_BASE,
+    )
+    fl_motor.setVelocity(w_fl)
+    fr_motor.setVelocity(w_fr)
+    rl_motor.setVelocity(w_rl)
+    rr_motor.setVelocity(w_rr)
+
+
+# ---------------------------------------------------------------------------
+# Webots devices
+# ---------------------------------------------------------------------------
+
+robot = Robot()
 timestep = int(robot.getBasicTimeStep())

-left_motor  = robot.getDevice("left wheel motor")
-right_motor = robot.getDevice("right wheel motor")
-left_motor.setPosition(float("inf"))
-right_motor.setPosition(float("inf"))
-left_motor.setVelocity(0.0)
-right_motor.setVelocity(0.0)
+if DRIVE_MODE == "mecanum":
+    fl_motor = robot.getDevice("front left wheel motor")
+    fr_motor = robot.getDevice("front right wheel motor")
+    rl_motor = robot.getDevice("rear left wheel motor")
+    rr_motor = robot.getDevice("rear right wheel motor")
+    for m in (fl_motor, fr_motor, rl_motor, rr_motor):
+        m.setPosition(float("inf"))
+        m.setVelocity(0.0)
+    MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
+else:
+    left_motor = robot.getDevice("left wheel motor")
+    right_motor = robot.getDevice("right wheel motor")
+    left_motor.setPosition(float("inf"))
+    right_motor.setPosition(float("inf"))
+    left_motor.setVelocity(0.0)
+    right_motor.setVelocity(0.0)
+    MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)

-lidar = robot.getDevice("lidar")
-lidar.enable(timestep)
-lidar.enablePointCloud()
-
-gps     = robot.getDevice("gps");     gps.enable(timestep)
-compass = robot.getDevice("compass"); compass.enable(timestep)
-emitter = robot.getDevice("emitter")
+gps = robot.getDevice("gps");           gps.enable(timestep)
+compass = robot.getDevice("compass");   compass.enable(timestep)
 receiver = robot.getDevice("receiver"); receiver.enable(timestep)
+emitter = robot.getDevice("emitter")
+lidar = robot.getDevice("lidar");       lidar.enable(timestep)

-left_ear  = robot.getDevice("left ear motor")
+tracker = SheepTracker()
+
+# Cosmetic ear motors — animated; not used by control.
+left_ear = robot.getDevice("left ear motor")
 right_ear = robot.getDevice("right ear motor")
 left_ear.setPosition(float("inf"))
 right_ear.setPosition(float("inf"))
 left_ear.setVelocity(0.0)
 right_ear.setVelocity(0.0)
+ear_phase = 0.0
+EAR_AMPLITUDE = 0.35
+EAR_RATE = 8.0

-keyboard = robot.getKeyboard()
-keyboard.enable(timestep)

-MOTOR_MAX   = left_motor.getMaxVelocity()
-speed_level = 0.5   # fraction of MOTOR_MAX; adjusted by +/-
+# ---------------------------------------------------------------------------
+# Main loop
+# ---------------------------------------------------------------------------

-EAR_AMPLITUDE = 0.35   # rad, peak ear deflection
-EAR_RATE      = 8.0    # rad/s, how fast the ears are driven
-ear_phase     = 0.0
+# Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay
+# the import-time cost). Each gets the same ActiveScanTeacher treatment:
+# rotate-on-empty, walk-to-centre, near-sheep speed modulation.
+analytic_teacher = None
+if MODE in ("strombom", "sequential"):
+    base_fn = strombom_action if MODE == "strombom" else sequential_action
+    analytic_teacher = ActiveScanTeacher(base_fn)
+elif MODE == "universal":
+    analytic_teacher = ActiveScanTeacher(universal_action)
+
+# GT positions from sheep emitters — used **only** for the auto-finish
+# sentinel and the GT_penned diagnostic line. Never fed into control.
+_gt_sheep: dict = {}
+_run_done = False
+
+prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
+step_count = 0

 while robot.step(timestep) != -1:
-    speed = MOTOR_MAX * speed_level
-    turn  = speed * 0.6   # differential turn radius
+    step_count += 1

-    left_vel  = 0.0
-    right_vel = 0.0
-    key = keyboard.getKey()
-    while key > 0:
-        if   key in (ord('W'), Keyboard.UP):
-            left_vel  = speed
-            right_vel = speed
-        elif key in (ord('S'), Keyboard.DOWN):
-            left_vel  = -speed
-            right_vel = -speed
-        elif key in (ord('A'), Keyboard.LEFT):
-            left_vel  = -turn
-            right_vel =  turn
-        elif key in (ord('D'), Keyboard.RIGHT):
-            left_vel  =  turn
-            right_vel = -turn
-        elif key in (ord('+'), ord('=')):
-            speed_level = min(1.0, speed_level + 0.1)
-            print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
-        elif key in (ord('-'), ord('_')):
-            speed_level = max(0.1, speed_level - 0.1)
-            print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
-        key = keyboard.getKey()
-
-    left_motor.setVelocity(left_vel)
-    right_motor.setVelocity(right_vel)
+    # Drain sheep emitter messages → GT (diagnostic only).
+    while receiver.getQueueLength() > 0:
+        msg = receiver.getString()
+        receiver.nextPacket()
+        parts = msg.split(":")
+        if len(parts) == 4 and parts[0] == "sheep":
+            try:
+                _gt_sheep[parts[1]] = (float(parts[2]), float(parts[3]))
+            except ValueError:
+                pass

    pos = gps.getValues()
-    emitter.send(f"dog:{pos[0]}:{pos[1]}")
+    dog_xy = (pos[0], pos[1])
+    n = compass.getValues()
+    dog_heading = math.atan2(n[0], n[1])

+    # ---- LiDAR perception → tracker → active sheep positions ----
+    ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
+    detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading)
+    sheep_positions = tracker.update(detections)
+
+    sheep_xy_list = list(sheep_positions.values())
+    sheep_penned_list = [False] * len(sheep_xy_list)
+    single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
+
+    # ---- Action selection ----
+    omega = 0.0
+    if MODE in ("bc", "rl") and policy_handle is not None:
+        action = policy_handle.predict(single_obs)
+        vx, vy = float(action[0]), float(action[1])
+        if DRIVE_MODE == "mecanum" and len(action) >= 3:
+            omega = float(action[2])
+    else:
+        result = analytic_teacher(
+            dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
+            DRIVE_MODE,
+        )
+        if len(result) == 4:
+            vx, vy, omega, _mode_str = result
+        else:
+            vx, vy, _mode_str = result
+
+    # Near-sheep speed modulation (shared by every mode).
+    vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
+
+    # EMA smoothing — kills frame-to-frame action jitter.
+    if DRIVE_MODE == "mecanum":
+        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
+        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
+        omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
+    else:
+        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
+        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
+
+    # Safety: dog must never enter the pen.
+    vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
+    prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)
+
+    if DRIVE_MODE == "mecanum":
+        drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
+                      compass, MOTOR_MAX)
+    else:
+        drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
+    emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
+
+    # Cosmetic ear wiggle.
    ear_phase += 0.12
    ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
    left_ear.setVelocity(EAR_RATE)
    right_ear.setVelocity(EAR_RATE)
-    left_ear.setPosition( ear_pos)
+    left_ear.setPosition(ear_pos)
    right_ear.setPosition(-ear_pos)
+
+    # Auto-finish: when all GT sheep are penned, write the sentinel.
+    # The launcher polls for it and closes Webots so batch evals don't
+    # hang after the task is done. Bounded by `_gt_sheep` so we don't
+    # fire during the first few steps while the receiver fills.
+    if _gt_sheep and not _run_done:
+        gt_active = sum(1 for x, y in _gt_sheep.values()
+                        if not is_penned_position(x, y))
+        if gt_active == 0:
+            os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True)
+            open(RUN_DONE_FILE, "w").close()
+            _run_done = True
+            print(f"[dog] all {len(_gt_sheep)} sheep penned at step "
+                  f"{step_count} — wrote sentinel, launcher will close Webots")
+
+    if step_count % 200 == 0:
+        gt_penned = sum(1 for x, y in _gt_sheep.values()
+                        if is_penned_position(x, y))
+        gt_total = len(_gt_sheep)
+        print(f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
+              f"GT_penned={gt_penned}/{gt_total} "
+              f"tracks_active={tracker.n_active()} "
+              f"tracks_penned={tracker.n_penned()} "
+              f"detections={len(detections)} "
+              f"action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})"
+              if DRIVE_MODE == "mecanum" else
+              f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
+              f"GT_penned={gt_penned}/{gt_total} "
+              f"tracks_active={tracker.n_active()} "
+              f"tracks_penned={tracker.n_penned()} "
+              f"detections={len(detections)} action=({vx:+.2f}, {vy:+.2f})")
@@ -1,33 +1,37 @@
 # Group G25 - Formal & Title & Goals

+This is the original course proposal/goals document. For current setup,
+training, evaluation, and Webots run instructions, see `../README.md`
+and `../training/README.md`.
+
 ## Team members
 - Diogo Costa <up202502576@up.pt>
 - Johnny Fernandes <up202402612@up.pt>
 - Nelson Neto <up202108117@up.pt>

 ## (i) Title and General objectives
-**RL-Based Autonomous Shepherd Robot for Livestock Herding**
+**Autonomous Shepherd Robot for Livestock Herding (Strömbom)**

 - Implement effective herding behaviors through proximity and movement strategies
 - Build a 3D environment with realistic robot dynamics and LIDAR-based perception
- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using Reinforcement Learning
+- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using the Strömbom heuristic approach


 # Group G25 - (ii) Intermediate Goals

 ## Intermediate goals
 - Set up the Webots simulation environment with an open field and target zone
- Implement lightweight Gymnasium-based 2D herding environment
+- Implement lightweight 2D herding environment for algorithm evaluation
 - Design a Sheep and Dog robot
- Implement a sheep flocking model for fast RL iteration
+- Implement a sheep flocking model for fast Strömbom iteration
 - Validate LiDAR sensor feedback for sheep detection and distance estimation


 # Group G25 - Course Project (Final) Goals

 ## (iii) Main goals
- State-of-the-art survey on shepherding algorithms and multi-agent RL herding
- Train the robot using PPO to successfully herd a single sheep into the goal
+- State-of-the-art survey on shepherding algorithms with focus on Strömbom herding
+- Implement and tune Strömbom controller to successfully herd a single sheep into the goal
 - Achieve fully autonomous herding of multiple sheep and a full flock into the target area
 - Optimize robot trajectory to minimize the time required to group the flock
 - Ensure zero collisions between the robot and the sheep during the task
@@ -35,7 +39,7 @@
 - Article, demo video, and final presentation

 ## (iv) Extra Merit
- Curriculum Learning (scaling from 1 sheep to a flock)
+- Progressive evaluation (scaling from 1 sheep to a flock)
 - Comparison of performance between Differential Drive and Mecanum wheels
 - Robustness testing under sensor noise or varying sheep speeds, configurations and parameters
 - Multi-shepherd cooperative mode: 2 dogs learn role specialization (collector vs. driver)
@@ -46,11 +50,10 @@

 ## (v) Tools
 - Webots for 3D physics simulation with ROS2 integration via `webots_ros2` package
- Stable-Baselines3 for the PPO algorithm implementation
- Gymnasium (OpenAI) for the RL environment wrapper (lightweight 2D herding env for fast RL training)
+- Gymnasium (OpenAI) for the simulation wrapper and evaluation tooling
 - Python as the primary programming language (sheep flocking model, reward shaping, evaluation)

 ## (vi) Limitations
- Computational Power: Training time might be high for complex flock behaviors
+- Computational Power: Large batch evaluation and parameter sweeps can still be time-consuming
 - Sim-to-Real Gap: No real-world validation of the herding controller; project is simulation-only (2D + Webots 3D)
 - Model Complexity: Simplified sheep behavior (scripted) may not account for all biological livestock nuances
@@ -0,0 +1,8 @@
+"""Shared core for the shepherd herding project.
+
+This package is the single source of truth for world geometry, sheep
+flocking dynamics, differential-drive kinematics, observation building,
+and the Strömbom heuristic. It is imported both by the Webots
+controllers (for inference) and by the Gymnasium training environment
+(for fast PPO rollouts), so the two paths cannot drift apart.
+"""
@@ -0,0 +1,122 @@
+"""Active-perception wrapper for the analytic shepherd teachers.
+
+Under partial-observability LiDAR perception the tracker starts empty
+— a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
+stops. This wrapper interleaves the underlying teacher with two
+exploration behaviours:
+
+* opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
+  guaranteeing the LiDAR sweeps a full circle before driving;
+* walk-to-centre when the tracker has been empty for at least
+  ``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
+  beyond the 12 m LiDAR range).
+
+When the tracker has detections the base teacher's action is used,
+post-processed by ``modulate_speed_near_sheep`` so the dog doesn't
+charge the flock.
+"""
+
+from __future__ import annotations
+
+import math
+
+from herding.control.modulation import modulate_speed_near_sheep
+
+
+INITIAL_SCAN_STEPS = 80         # ≈1.3 s — covers one full rotation
+EXPLORE_SPEED = 0.7             # action norm while walking blind
+EMPTY_DEBOUNCE_STEPS = 8        # consecutive empty frames before exploring
+
+
+class ActiveScanTeacher:
+    """Stateful wrapper. Construct one per episode (or call ``reset``).
+
+    Call signature::
+
+        vx, vy, omega, mode = teacher(dog_xy, dog_heading, sheep_positions,
+                                      pen_target, drive_mode="differential")
+
+    ``omega`` is the yaw-rate intent (mecanum only); 0.0 for differential
+    drive and during blind exploration phases.
+    """
+
+    def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
+        self.base = base_action_fn
+        self.initial_scan = int(initial_scan_steps)
+        self.reset()
+
+    def reset(self) -> None:
+        self.step = 0
+        self.empty_streak = 0
+        self.last_action: tuple[float, float] = (0.0, 0.0)
+
+    @staticmethod
+    def _scan_action(dog_heading: float) -> tuple[float, float]:
+        # Target opposite to current heading; velocity_to_wheels'
+        # cos(err) clamp drives forward speed to ~0 → in-place rotation.
+        target = dog_heading + math.pi
+        return math.cos(target), math.sin(target)
+
+    @staticmethod
+    def _explore_action(dog_xy) -> tuple[float, float]:
+        """Walk toward (0, 0) while the LiDAR keeps sweeping."""
+        dx, dy = -dog_xy[0], -dog_xy[1]
+        d = math.hypot(dx, dy)
+        if d < 0.5:
+            return 0.0, 0.0
+        return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
+
+    def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target,
+                 drive_mode="differential"):
+        self.step += 1
+        n_visible = len(sheep_positions)
+
+        if n_visible == 0:
+            self.empty_streak += 1
+        else:
+            self.empty_streak = 0
+
+        # Phase 1: opening rotation.
+        if self.step <= self.initial_scan:
+            vx, vy = self._scan_action(dog_heading)
+            self.last_action = (vx, vy)
+            return vx, vy, 0.0, "scan_initial"
+
+        # Phase 2: walk-to-centre after a sustained empty tracker.
+        if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
+            ex, ey = self._explore_action(dog_xy)
+            if ex == 0.0 and ey == 0.0:
+                vx, vy = self._scan_action(dog_heading)
+                mode = "scan_at_centre"
+            else:
+                vx, vy = ex, ey
+                mode = "explore"
+            self.last_action = (vx, vy)
+            return vx, vy, 0.0, mode
+
+        # Phase 2b: brief tracker blink — hold the previous action.
+        if n_visible == 0:
+            vx, vy = self.last_action
+            return vx, vy, 0.0, "hold"
+
+        # Phase 3: hand off to the underlying analytic teacher, then
+        # apply the shared near-sheep speed modulation.
+        # Handle both old-style (dog_xy, sheep, pen) and new-style
+        # (dog_xy, heading, sheep, pen, drive_mode) teachers.
+        try:
+            result = self.base(dog_xy, dog_heading, sheep_positions,
+                               pen_target, drive_mode)
+        except TypeError:
+            try:
+                result = self.base(dog_xy, dog_heading, sheep_positions,
+                                   pen_target)
+            except TypeError:
+                result = self.base(dog_xy, sheep_positions, pen_target)
+        if len(result) == 4:
+            vx, vy, omega, mode = result
+        else:
+            vx, vy, mode = result
+            omega = 0.0
+        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
+        self.last_action = (vx, vy)
+        return vx, vy, omega, mode
@@ -0,0 +1,42 @@
+"""Shared action post-processing.
+
+Every dog mode routes its action through ``modulate_speed_near_sheep``
+so the magnitude is reduced near sheep — direction (intent) is
+preserved.
+"""
+
+from __future__ import annotations
+
+import math
+
+
+SLOW_NEAR_SHEEP = 2.5  # m — distance below which action norm is scaled down
+MIN_SPEED = 0.30       # action norm at zero distance
+
+
+def modulate_speed_near_sheep(
+    vx: float, vy: float,
+    dog_xy: tuple[float, float],
+    sheep_positions,
+    slow_dist: float = SLOW_NEAR_SHEEP,
+    min_scale: float = MIN_SPEED,
+) -> tuple[float, float]:
+    """Linearly ramp action magnitude from ``min_scale`` at distance 0
+    to 1.0 at ``slow_dist``. ``sheep_positions`` may be a
+    ``{name: (x, y)}`` dict or an iterable of ``(x, y)`` tuples.
+    """
+    if not sheep_positions:
+        return vx, vy
+    if hasattr(sheep_positions, "values"):
+        positions = sheep_positions.values()
+    else:
+        positions = sheep_positions
+    nearest = float("inf")
+    for sx, sy in positions:
+        d = math.hypot(sx - dog_xy[0], sy - dog_xy[1])
+        if d < nearest:
+            nearest = d
+    if nearest >= slow_dist or nearest == float("inf"):
+        return vx, vy
+    scale = min_scale + (1.0 - min_scale) * (nearest / slow_dist)
+    return vx * scale, vy * scale
@@ -0,0 +1,74 @@
+"""Sequential "pin-and-push" shepherd-dog controller.
+
+Single-target alternative to Strömbom: each step, target the sheep
+closest to the pen, park behind it, drive it through; once it latches
+penned the next-closest sheep becomes the target. Naturally queues
+the flock through a narrow gate.
+"""
+
+import math
+
+from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen
+
+
+DELTA_DRIVE = 1.5     # standoff behind the target sheep
+APPROACH_GAIN = 1.0   # action magnitude scale (1 = full speed)
+
+
+def _unit(x, y):
+    d = math.hypot(x, y)
+    if d < 1e-6:
+        return 0.0, 0.0
+    return x / d, y / d
+
+
+def _is_active(x, y) -> bool:
+    return (not in_pen(x, y)) and y > GATE_Y
+
+
+def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Return ``(vx, vy, mode)`` — same call signature as Strömbom."""
+    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle"
+
+    name, sx, sy = min(
+        active,
+        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
+    )
+
+    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
+    tx = sx + DELTA_DRIVE * ux
+    ty = sy + DELTA_DRIVE * uy
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
+
+
+def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """``compute_action`` plus a debug dict (target, drive point)."""
+    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle", {
+            "n_active": 0, "target_name": "",
+            "target_x": 0.0, "target_y": 0.0,
+            "drive_x": dog_xy[0], "drive_y": dog_xy[1],
+        }
+
+    name, sx, sy = min(
+        active,
+        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
+    )
+
+    ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
+    tx = sx + DELTA_DRIVE * ux
+    ty = sy + DELTA_DRIVE * uy
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+
+    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
+        "n_active": len(active), "target_name": name,
+        "target_x": sx, "target_y": sy,
+        "drive_x": tx, "drive_y": ty,
+    }
@@ -0,0 +1,95 @@
+"""Strömbom (2014) collect/drive heuristic for the shepherd dog.
+
+When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
+to a point behind the furthest sheep and pushes it back toward the
+flock CoM. Otherwise it drives, parking behind the CoM relative to
+the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.
+
+Reference: Strömbom et al. 2014, "Solving the shepherding problem."
+"""
+
+import math
+
+from herding.world.geometry import PEN_ENTRY, GATE_Y, in_pen
+
+F_FACTOR = 4.0       # collect/drive threshold scaled by √n
+DELTA_COLLECT = 1.5  # drive-position offset behind the furthest sheep
+DELTA_DRIVE = 2.0    # drive-position offset behind the flock CoM
+
+
+def _unit(x, y):
+    d = math.hypot(x, y)
+    if d < 1e-6:
+        return 0.0, 0.0
+    return x / d, y / d
+
+
+def _is_active(x, y) -> bool:
+    """A sheep still in the field counts; one south of the gate doesn't."""
+    return (not in_pen(x, y)) and y > GATE_Y
+
+
+def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
+    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle"
+
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)
+
+    if radius > F_FACTOR * math.sqrt(n):
+        # Collect: aim behind the furthest sheep, opposite the CoM.
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+    else:
+        # Drive: aim behind the CoM, opposite the pen.
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    return ax, ay, mode
+
+
+def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
+    """``compute_action`` plus a small debug dict (CoM, target, radius)."""
+    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, "idle", {
+            "n_active": 0, "radius": 0.0, "threshold": 0.0,
+            "com_x": 0.0, "com_y": 0.0,
+            "target_x": dog_xy[0], "target_y": dog_xy[1],
+        }
+
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)
+    threshold = F_FACTOR * math.sqrt(n)
+
+    if radius > threshold:
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+    else:
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    dbg = {
+        "n_active": n, "radius": radius, "threshold": threshold,
+        "com_x": com_x, "com_y": com_y,
+        "target_x": tx, "target_y": ty,
+    }
+    return ax, ay, mode, dbg
@@ -0,0 +1,200 @@
+"""Universal shepherd teacher — Strömbom core + mecanum omega + straggler recovery.
+
+The core collect/drive logic is **identical** to :mod:`strombom` (same
+``F_FACTOR``, ``DELTA_COLLECT``, ``DELTA_DRIVE`` thresholds and target
+computation) so it inherits the proven ~100 % success rate at n ≤ 8.
+Two additions make it useful as a universal teacher:
+
+1. **Omega for mecanum.**  When ``drive_mode="mecanum"``, the teacher
+   outputs a non-zero ``omega`` channel so the dog **faces the
+   direction of travel**.  During collect the dog faces the target
+   sheep; during drive it faces the pen.  This gives the BC student a
+   real rotation signal to learn from.
+
+2. **Last-straggler recovery.**  When exactly one sheep remains active
+   and it is near the gate, the dog positions itself behind that
+   straggler (opposite the gate) and pushes it straight through.  This
+   handles the edge case where the last sheep circles the gate posts.
+
+Call signature::
+
+    vx, vy, omega, mode = compute_action(
+        dog_xy, dog_heading, sheep_positions, pen_target,
+        drive_mode="differential",
+    )
+
+For differential drive ``omega`` is always 0.0 and can be ignored.
+"""
+
+import math
+
+from herding.world.geometry import (
+    PEN_ENTRY, GATE_X, GATE_Y, in_pen,
+)
+
+# ---------------------------------------------------------------------------
+# Tuning constants — match Strömbom exactly for proven success rates.
+# ---------------------------------------------------------------------------
+
+F_FACTOR = 4.0          # collect/drive threshold scaled by √n
+DELTA_COLLECT = 1.5      # standoff behind the furthest sheep
+DELTA_DRIVE = 2.0        # standoff behind flock CoM
+
+# Omega gain for mecanum (how strongly the dog turns to face target)
+OMEGA_GAIN = 0.6
+
+# Recovery: push small flocks (≤ RECOVERY_MAX_N) through the gate one
+# sheep at a time. n=1 alone is not enough — at n=2..3 on the round
+# field the flock is too small to self-cohere through the 3 m gate but
+# the standard collect/drive standoff just orbits them. Push the sheep
+# nearest the gate first; once it pens, the rule re-applies to the next.
+RECOVERY_MAX_N = 3
+RECOVERY_GATE_DIST = 8.0   # only when target sheep is this close to gate
+RECOVERY_PUSH_DIST = 1.2   # stand-off behind sheep, away from gate
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _unit(x, y):
+    d = math.hypot(x, y)
+    if d < 1e-6:
+        return 0.0, 0.0
+    return x / d, y / d
+
+
+def _is_active(x, y) -> bool:
+    return (not in_pen(x, y)) and y > GATE_Y
+
+
+def _angle_diff(a, b):
+    """Signed shortest angular difference a - b, in [-π, π]."""
+    return math.atan2(math.sin(a - b), math.cos(a - b))
+
+
+def _gate_center():
+    """Centre of the gate opening."""
+    return (0.5 * (GATE_X[0] + GATE_X[1]), GATE_Y)
+
+
+# ---------------------------------------------------------------------------
+# Core teacher
+# ---------------------------------------------------------------------------
+
+def compute_action(dog_xy, dog_heading, sheep_positions,
+                   pen_target=PEN_ENTRY, drive_mode="differential"):
+    """Return ``(vx, vy, omega, mode)``.
+
+    Parameters
+    ----------
+    dog_xy : (float, float)
+        Dog position in world frame.
+    dog_heading : float
+        Dog heading in world frame (rad), 0 = +x axis.
+    sheep_positions : dict[str, (float, float)]
+        Visible sheep positions.
+    pen_target : (float, float)
+        Centre of the pen gate (defaults to geometry.PEN_ENTRY).
+    drive_mode : str
+        ``"differential"`` or ``"mecanum"``.
+
+    Returns
+    -------
+    vx, vy : float
+        Velocity intent in [-1, 1].
+    omega : float
+        Yaw intent in [-1, 1] (0 for differential).
+    mode : str
+        Phase label: ``"idle"``, ``"collect"``, ``"drive"``, ``"recovery"``.
+    """
+    active = [(x, y) for (x, y) in sheep_positions.values()
+              if _is_active(x, y)]
+    if not active:
+        return 0.0, 0.0, 0.0, "idle"
+
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)
+
+    # ---- Small-flock recovery (push sheep through the gate one by one) ----
+    # Triggers when the active flock is small (≤ RECOVERY_MAX_N) and the
+    # sheep nearest the gate is close enough that direct pushing works.
+    # For larger flocks the standard collect/drive logic handles them.
+    gc = _gate_center()
+    if n <= RECOVERY_MAX_N:
+        # Pick the sheep closest to the gate as the recovery target —
+        # finishing that one first reduces the active count and lets the
+        # remaining sheep get their own recovery turn.
+        gate_dists = [math.hypot(p[0] - gc[0], p[1] - gc[1]) for p in active]
+        target_idx = min(range(n), key=lambda i: gate_dists[i])
+        sx, sy = active[target_idx]
+        d_to_gate = gate_dists[target_idx]
+        if d_to_gate < RECOVERY_GATE_DIST:
+            dx_g = sx - gc[0]
+            dy_g = sy - gc[1]
+            d_g = math.hypot(dx_g, dy_g)
+            if d_g > 0.3:
+                ux, uy = dx_g / d_g, dy_g / d_g
+            else:
+                ux, uy = 0.0, 1.0
+            tx = sx + RECOVERY_PUSH_DIST * ux
+            ty = sy + RECOVERY_PUSH_DIST * uy
+            ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+            mode = "recovery"
+            face_target = (sx, sy)
+            omega = 0.0
+            if drive_mode == "mecanum":
+                desired = math.atan2(
+                    face_target[1] - dog_xy[1],
+                    face_target[0] - dog_xy[0],
+                )
+                err = _angle_diff(desired, dog_heading)
+                omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
+            return ax, ay, omega, mode
+
+    # ---- Standard Strömbom collect/drive (proven core) ----
+    if radius > F_FACTOR * math.sqrt(n):
+        # Collect: aim behind the furthest sheep, opposite the CoM.
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+        face_target = (sx, sy)
+    else:
+        # Drive: aim behind the CoM, opposite the pen.
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"
+        face_target = pen_target
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+
+    # ---- Omega (mecanum only) ----
+    omega = 0.0
+    if drive_mode == "mecanum" and mode != "idle":
+        desired_heading = math.atan2(
+            face_target[1] - dog_xy[1],
+            face_target[0] - dog_xy[0],
+        )
+        err = _angle_diff(desired_heading, dog_heading)
+        omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
+
+    return ax, ay, omega, mode
+
+
+def compute_action_diff(dog_xy, dog_heading, sheep_positions,
+                        pen_target=PEN_ENTRY):
+    """Compatibility wrapper returning ``(vx, vy, mode)`` — same as Strömbom.
+
+    Use this when plugging into existing differential-drive code that
+    doesn't expect omega.
+    """
+    vx, vy, _omega, mode = compute_action(
+        dog_xy, dog_heading, sheep_positions, pen_target,
+        drive_mode="differential",
+    )
+    return vx, vy, mode
@@ -0,0 +1,208 @@
+"""Cluster a 2D LiDAR scan into world-frame sheep position estimates.
+
+Pipeline:
+
+    ranges (N,) → hit mask → world-frame points
+                                │
+                                ▼
+                         adjacency clustering (gap > GAP_THRESHOLD
+                         starts a new cluster, walking rays in
+                         angular order)
+                                │
+                                ▼
+                         centroid + span + region + structure filters
+                                │
+                                ▼
+                         list of (x, y) detections
+
+The downstream tracker handles association across frames.
+"""
+
+from __future__ import annotations
+
+import math
+
+import numpy as np
+
+from herding.world.geometry import (
+    FIELD_SHAPE, FIELD_ROUND_R,
+    FIELD_X, FIELD_Y, GATE_X, GATE_Y,
+    PEN_X, PEN_Y,
+)
+from herding.perception.lidar_sim import (
+    LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_N_RAYS, SHEEP_RADIUS, POST_RADIUS,
+    ray_angles,
+)
+
+
+GAP_THRESHOLD = 0.6      # m — adjacent ray-points farther apart start a new cluster
+MAX_CLUSTER_SPAN = 1.5   # m — wider clusters are walls / structures
+RANGE_HIT_EPS = 0.05     # m — hit if range < max_range - eps
+WALL_REJECT = 0.5        # m — drop detections this close to a known wall line
+
+# Multi-peak splitting: within a single cluster, if the range profile
+# has a local dip (i.e. the range increases then decreases) deeper than
+# SPLIT_RANGE_GAP, the cluster is split into two detections.
+SPLIT_RANGE_GAP = 0.20   # m — range increase that triggers a split
+
+# Sheep-sized static features. A cluster centred within STATIC_REJECT of
+# any of these is never a sheep.
+_STATIC_FEATURES_RECT = (
+    ( 10.0, -15.0), ( 13.0, -15.0),                   # gate posts
+    ( 15.0,  15.0), ( 15.0, -15.0),
+    (-15.0,  15.0), (-15.0, -15.0),                   # field corners
+)
+
+_STATIC_FEATURES_ROUND = (
+    (GATE_X[0], GATE_Y),
+    (GATE_X[1], GATE_Y),
+)
+
+STATIC_REJECT = 0.8
+
+
+def _get_static_features():
+    if FIELD_SHAPE == "field_round":
+        return _STATIC_FEATURES_ROUND
+    return _STATIC_FEATURES_RECT
+
+
+_STATIC_FEATURES = _get_static_features()
+
+
+def _in_field_region(cx: float, cy: float) -> bool:
+    """Check if a detection is inside the field (with small margin)."""
+    if FIELD_SHAPE == "field_round":
+        r = math.hypot(cx, cy)
+        return r < FIELD_ROUND_R + 0.2
+    return (FIELD_X[0] - 0.2 < cx < FIELD_X[1] + 0.2 and
+            FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)
+
+
+def _near_wall(cx: float, cy: float) -> bool:
+    """True if the detection is too close to a wall to be a sheep."""
+    if FIELD_SHAPE == "field_round":
+        r = math.hypot(cx, cy)
+        return r > FIELD_ROUND_R - WALL_REJECT
+    return (
+        cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or
+        cy > FIELD_Y[1] - WALL_REJECT or
+        (cy < FIELD_Y[0] + WALL_REJECT and not (PEN_X[0] <= cx <= PEN_X[1]))
+    )
+
+
+def _split_cluster_by_range(
+    points: list[tuple[float, float]],
+    range_vals: list[float],
+) -> list[list[tuple[float, float]]]:
+    """Split a cluster at range-profile local maxima (gaps between sheep).
+
+    When two sheep are close, the LiDAR sees them as one arc, but the
+    range profile has a local peak between them (the ray passes between
+    the two discs). This function finds those peaks and splits.
+    """
+    if len(points) < 4:
+        return [points]
+    # Find the minimum range in the cluster (closest point to dog).
+    r_min = min(range_vals)
+    # Find the maximum range (the dip/gap between sheep).
+    r_max = max(range_vals)
+    # If the range variation is small, it's a single target.
+    if r_max - r_min < SPLIT_RANGE_GAP:
+        return [points]
+    # Find the split point: the index with the maximum range.
+    split_idx = range_vals.index(r_max)
+    if split_idx <= 1 or split_idx >= len(points) - 2:
+        return [points]
+    # Split into two sub-clusters.
+    left = points[:split_idx]
+    right = points[split_idx + 1:]
+    # Recursively split each half.
+    result = []
+    for sub_pts, sub_ranges in [
+        (left, range_vals[:split_idx]),
+        (right, range_vals[split_idx + 1:]),
+    ]:
+        if len(sub_pts) >= 1:
+            result.extend(_split_cluster_by_range(sub_pts, sub_ranges))
+    return result if result else [points]
+
+
+def detections_from_scan(
+    ranges: np.ndarray,
+    dog_x: float, dog_y: float, dog_heading: float,
+    max_range: float = LIDAR_MAX_RANGE,
+) -> list[tuple[float, float]]:
+    """Return list of (x, y) world-frame sheep position estimates."""
+    ranges = np.asarray(ranges, dtype=np.float32)
+    n_rays = ranges.shape[0]
+    if n_rays == 0:
+        return []
+    angles = ray_angles(n_rays, LIDAR_FOV)
+    hit = ranges < max_range - RANGE_HIT_EPS
+
+    world_a = dog_heading + angles
+    px = dog_x + ranges * np.cos(world_a)
+    py = dog_y + ranges * np.sin(world_a)
+
+    # Walk rays in angular order; a large jump between consecutive
+    # world-frame hit points closes the current cluster.
+    # Store (x, y, range) per hit ray for multi-peak splitting.
+    clusters: list[list[tuple[float, float, float]]] = []
+    current: list[tuple[float, float, float]] = []
+    prev_xy: tuple[float, float] | None = None
+    for i in range(n_rays):
+        if not bool(hit[i]):
+            if current:
+                clusters.append(current)
+                current = []
+            prev_xy = None
+            continue
+        pt = (float(px[i]), float(py[i]), float(ranges[i]))
+        if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > GAP_THRESHOLD:
+            clusters.append(current)
+            current = []
+        current.append(pt)
+        prev_xy = (pt[0], pt[1])
+    if current:
+        clusters.append(current)
+
+    detections: list[tuple[float, float]] = []
+    for cluster in clusters:
+        points_xy = [(p[0], p[1]) for p in cluster]
+        range_vals = [p[2] for p in cluster]
+
+        # Multi-peak splitting.
+        if len(cluster) >= 4:
+            sub_clusters = _split_cluster_by_range(points_xy, range_vals)
+        else:
+            sub_clusters = [points_xy]
+
+        for sub in sub_clusters:
+            if len(sub) < 1:
+                continue
+            xs = [p[0] for p in sub]
+            ys = [p[1] for p in sub]
+            cx, cy = sum(xs) / len(xs), sum(ys) / len(ys)
+            span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
+            if span > MAX_CLUSTER_SPAN:
+                continue
+            # Rays hit the front edge of the sheep; offset outward by
+            # SHEEP_RADIUS along the dog→cluster direction.
+            dx, dy = cx - dog_x, cy - dog_y
+            d = math.hypot(dx, dy)
+            if d > 1e-3:
+                cx += SHEEP_RADIUS * dx / d
+                cy += SHEEP_RADIUS * dy / d
+            in_main = _in_field_region(cx, cy)
+            in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
+                             GATE_Y - 1.0 < cy < GATE_Y + 0.2)
+            if not (in_main or in_gate_strip):
+                continue
+            if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT
+                   for fx, fy in _STATIC_FEATURES):
+                continue
+            if _near_wall(cx, cy):
+                continue
+            detections.append((cx, cy))
+    return detections
@@ -0,0 +1,235 @@
+"""Fast 2D LiDAR simulator for the Gymnasium env.
+
+Raycasts against sheep (discs) and static world geometry. For rectangular
+fields this is axis-aligned walls + gate posts; for round fields it is a
+circular wall + gate posts. The env reproduces the false-positive cluster
+distribution Webots produces from real 3D geometry.
+
+Returns a range array matching the Webots Lidar device:
+180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise.
+See ``protos/ShepherdDog.proto``.
+"""
+
+from __future__ import annotations
+
+import math
+
+import numpy as np
+
+from herding.world.geometry import (
+    FIELD_SHAPE, FIELD_ROUND_R,
+    FIELD_X, FIELD_Y,
+    GATE_X, GATE_Y,
+    PEN_X, PEN_Y,
+)
+
+
+# Match protos/ShepherdDog.proto Lidar device — extended to 360° for
+# full situational awareness. The original Webots device is 140° FOV /
+# 180 rays; we use 360 rays for full-circle coverage.
+LIDAR_N_RAYS = 360
+LIDAR_FOV = 2.0 * math.pi  # 360° full circle
+LIDAR_MAX_RANGE = 12.0
+LIDAR_NOISE = 0.005    # m, gaussian std
+
+# Sheep cross-section in the LiDAR plane (horizontal cylinder approx).
+SHEEP_RADIUS = 0.30
+POST_RADIUS = 0.25
+
+
+# ---------------------------------------------------------------------------
+# Rectangular-field static geometry
+# ---------------------------------------------------------------------------
+_VERTICAL_WALLS_RECT = (
+    ( 15.0, -15.0,  15.0),  # field east
+    (-15.0, -15.0,  15.0),  # field west
+    ( 10.0, -22.0, -15.0),  # pen west
+    ( 13.0, -22.0, -15.0),  # pen east
+)
+
+_HORIZONTAL_WALLS_RECT = (
+    ( 15.0, -15.0,  15.0),  # field north
+    (-15.0, -15.0,  10.0),  # field south-west of gate
+    (-15.0,  13.0,  15.0),  # field south-east of gate
+    (-22.0,  10.0,  13.0),  # pen south
+)
+
+_POSTS_RECT = np.array([
+    ( 10.0, -15.0), ( 13.0, -15.0),
+    ( 15.0,  15.0), ( 15.0, -15.0),
+    (-15.0,  15.0), (-15.0, -15.0),
+], dtype=np.float64)
+
+
+# ---------------------------------------------------------------------------
+# Round-field static geometry
+# ---------------------------------------------------------------------------
+# Circular wall with gate gap. Gate posts at the edges of the gate gap.
+_gate_cx = 0.5 * (GATE_X[0] + GATE_X[1])
+_POSTS_ROUND = np.array([
+    (GATE_X[0], GATE_Y),
+    (GATE_X[1], GATE_Y),
+], dtype=np.float64)
+
+# Pen walls for round field
+_VERTICAL_WALLS_ROUND = (
+    (GATE_X[0], PEN_Y[0], GATE_Y),   # pen west
+    (GATE_X[1], PEN_Y[0], GATE_Y),   # pen east
+)
+_HORIZONTAL_WALLS_ROUND = (
+    (PEN_Y[0], GATE_X[0], GATE_X[1]),  # pen south
+)
+
+
+def _build_static_geometry():
+    """Select the correct static geometry for the active field shape."""
+    if FIELD_SHAPE == "field_round":
+        return (
+            _VERTICAL_WALLS_ROUND,
+            _HORIZONTAL_WALLS_ROUND,
+            _POSTS_ROUND,
+            FIELD_ROUND_R,
+        )
+    return (
+        _VERTICAL_WALLS_RECT,
+        _HORIZONTAL_WALLS_RECT,
+        _POSTS_RECT,
+        None,  # no circular wall
+    )
+
+
+_VERTS, _HORIZS, _POSTS, _CIRC_R = _build_static_geometry()
+
+
+# ---------------------------------------------------------------------------
+# Ray helpers
+# ---------------------------------------------------------------------------
+def ray_angles(n: int = LIDAR_N_RAYS, fov: float = LIDAR_FOV) -> np.ndarray:
+    """Local-frame ray angles, CCW from forward, sweeping +fov/2 → -fov/2."""
+    return np.linspace(fov / 2.0, -fov / 2.0, n, dtype=np.float64)
+
+
+_ANGLES = ray_angles()
+_COS = np.cos(_ANGLES)
+_SIN = np.sin(_ANGLES)
+
+
+def _raycast_static(
+    ox: float, oy: float, cos_w: np.ndarray, sin_w: np.ndarray,
+) -> np.ndarray:
+    """Per-ray distance to the nearest wall or post hit (∞ if none)."""
+    n_rays = cos_w.shape[0]
+    best = np.full(n_rays, np.inf, dtype=np.float64)
+
+    EPS = 1e-3
+    safe_cos = np.where(np.abs(cos_w) < 1e-9, 1e-9, cos_w)
+    safe_sin = np.where(np.abs(sin_w) < 1e-9, 1e-9, sin_w)
+
+    # Vertical walls (x = const)
+    for wx, ymin, ymax in _VERTS:
+        t = (wx - ox) / safe_cos
+        y_at = oy + t * sin_w
+        valid = (t > EPS) & (y_at >= ymin - EPS) & (y_at <= ymax + EPS)
+        cand = np.where(valid, t, np.inf)
+        np.minimum(best, cand, out=best)
+
+    # Horizontal walls (y = const)
+    for wy, xmin, xmax in _HORIZS:
+        t = (wy - oy) / safe_sin
+        x_at = ox + t * cos_w
+        valid = (t > EPS) & (x_at >= xmin - EPS) & (x_at <= xmax + EPS)
+        cand = np.where(valid, t, np.inf)
+        np.minimum(best, cand, out=best)
+
+    # Circular wall (round field only)
+    if _CIRC_R is not None:
+        # Ray: P(t) = O + t·D.  ||P(t)||² = R²
+        # t² - 2t(O·D) + (||O||² - R²) = 0
+        # a = 1 (rays are unit), b = -2(O·D), c = ||O||² - R²
+        a = 1.0  # cos_w² + sin_w² = 1
+        b = -(ox * cos_w + oy * sin_w)
+        c = ox * ox + oy * oy - _CIRC_R * _CIRC_R
+        disc = b * b - a * c
+        valid_disc = disc >= 0.0
+        sqrt_disc = np.sqrt(np.maximum(disc, 0.0))
+        # Two intersection candidates: t = (-b ± sqrt(disc)) / a
+        t1 = -b - sqrt_disc
+        t2 = -b + sqrt_disc
+        # We want the smallest positive t.
+        t1_valid = valid_disc & (t1 > EPS)
+        t2_valid = valid_disc & (t2 > EPS)
+        t_circ = np.where(t1_valid, t1, np.where(t2_valid, t2, np.inf))
+
+        # Exclude rays that hit the gate gap: the hit point must not lie
+        # in the gate column (between GATE_X and above GATE_Y).
+        hx = ox + t_circ * cos_w
+        hy = oy + t_circ * sin_w
+        in_gate = ((hx > GATE_X[0]) & (hx < GATE_X[1]) &
+                   (hy > GATE_Y - 2.0) & (hy < GATE_Y + 2.0))
+        t_circ = np.where(in_gate, np.inf, t_circ)
+        np.minimum(best, t_circ, out=best)
+
+    # Posts (treat as discs)
+    if _POSTS.size:
+        px = _POSTS[:, 0] - ox
+        py = _POSTS[:, 1] - oy
+        t_post = np.outer(px, cos_w) + np.outer(py, sin_w)
+        d2 = (px ** 2 + py ** 2)[:, None]
+        perp2 = d2 - t_post ** 2
+        R2 = POST_RADIUS ** 2
+        hit = (perp2 < R2) & (t_post > 0.0)
+        half = np.sqrt(np.clip(R2 - perp2, 0.0, None))
+        cand = np.where(hit, t_post - half, np.inf)
+        nearest = cand.min(axis=0)
+        np.minimum(best, nearest, out=best)
+
+    return best
+
+
+def simulate_scan(
+    dog_x: float, dog_y: float, dog_heading: float,
+    sheep_xy: list[tuple[float, float]],
+    noise: float = LIDAR_NOISE,
+    max_range: float = LIDAR_MAX_RANGE,
+    rng: np.random.Generator | None = None,
+) -> np.ndarray:
+    """Return a (N,) float32 range array. No-hit entries equal ``max_range``.
+
+    ``sheep_xy`` is every sheep (penned or active) in the scene.
+    """
+    ch, sh = math.cos(dog_heading), math.sin(dog_heading)
+    cos_w = ch * _COS - sh * _SIN
+    sin_w = sh * _COS + ch * _SIN
+
+    best = _raycast_static(dog_x, dog_y, cos_w, sin_w)
+
+    if sheep_xy:
+        sx = np.asarray([p[0] for p in sheep_xy], dtype=np.float64) - dog_x
+        sy = np.asarray([p[1] for p in sheep_xy], dtype=np.float64) - dog_y
+        t = np.outer(sx, cos_w) + np.outer(sy, sin_w)
+        s_dist2 = (sx ** 2 + sy ** 2)[:, None]
+        perp2 = s_dist2 - t ** 2
+        R2 = SHEEP_RADIUS ** 2
+        hit = (perp2 < R2) & (t > 0.0)
+        half = np.sqrt(np.clip(R2 - perp2, 0.0, None))
+        candidate = np.where(hit, t - half, np.inf)
+        nearest = candidate.min(axis=0)
+        np.minimum(best, nearest, out=best)
+
+    ranges = np.minimum(best, max_range).astype(np.float32)
+    return _add_noise(ranges, noise, rng, max_range)
+
+
+def _add_noise(ranges: np.ndarray, sigma: float,
+               rng: np.random.Generator | None, max_range: float) -> np.ndarray:
+    if sigma <= 0.0:
+        return ranges
+    if rng is None:
+        rng = np.random.default_rng()
+    hit_mask = ranges < max_range - 1e-3
+    n_hit = int(hit_mask.sum())
+    if n_hit:
+        ranges = ranges.copy()
+        ranges[hit_mask] += rng.normal(0.0, sigma, size=n_hit).astype(np.float32)
+        np.clip(ranges, 0.0, max_range, out=ranges)
+    return ranges
@@ -0,0 +1,122 @@
+"""Observation builder for the shepherd-dog policy.
+
+Order-invariant 32-D feature vector. Sheep never appear by index in
+the observation, only via summary statistics, a polar histogram, and
+two "named" channels (closest-to-pen, rearmost-from-pen) — so the
+policy generalises across flock sizes 1..MAX_SHEEP.
+
+Layout (all components normalised so values stay roughly in [-1, 1]):
+
+    idx    field
+    -----  ----------------------------------------------------------
+     0..3  dog pose: x/15, y/15, cos(h), sin(h)
+     4..5  active-sheep CoM x/15, y/15
+     6..8  flock dispersion: max_radius/15, std_x/15, std_y/15
+     9..11 dog → CoM: dx/30, dy/30, dist/30
+    12..14 dog → pen entry: dx/30, dy/30, dist/30
+    15..16 furthest sheep → CoM: dx/15, dy/15
+    17..18 min sheep-to-wall, min dog-to-wall (both /15)
+       19  active sheep count / MAX_SHEEP
+    20..27 8-bin polar histogram of active sheep in the dog's body frame
+    28..29 dog → closest-to-pen sheep: dx/15, dy/15
+    30..31 dog → rearmost (furthest-from-pen) sheep: dx/15, dy/15
+"""
+
+import math
+import numpy as np
+
+from herding.world.geometry import (
+    PEN_ENTRY, MAX_SHEEP, distance_to_wall,
+)
+
+OBS_DIM = 32
+
+
+def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
+              n_max: int = MAX_SHEEP,
+              n_expected: int | None = None) -> np.ndarray:
+    """Assemble the dog policy's observation vector.
+
+    Parameters
+    ----------
+    dog_xy : tuple (x, y) of the dog's GPS position (m)
+    dog_heading : dog heading in rad
+    sheep_xy_list : iterable of (x, y) for ALL known sheep
+    sheep_penned_list : parallel iterable of bool — True if sheep is penned
+    n_max : maximum supported flock size used for the count normaliser
+    n_expected : unused, kept for API compatibility.
+    """
+    dog_x, dog_y = dog_xy
+    obs = np.zeros(OBS_DIM, dtype=np.float32)
+
+    obs[0] = dog_x / 15.0
+    obs[1] = dog_y / 15.0
+    obs[2] = math.cos(dog_heading)
+    obs[3] = math.sin(dog_heading)
+
+    active = [(x, y) for (x, y), p
+              in zip(sheep_xy_list, sheep_penned_list) if not p]
+    n = len(active)
+
+    pdx0, pdy0 = PEN_ENTRY[0] - dog_x, PEN_ENTRY[1] - dog_y
+    obs[12] = pdx0 / 30.0
+    obs[13] = pdy0 / 30.0
+    obs[14] = math.hypot(pdx0, pdy0) / 30.0
+
+    if n == 0:
+        obs[19] = 0.0
+        return obs
+
+    arr = np.asarray(active, dtype=np.float32)
+    com_x = float(arr[:, 0].mean())
+    com_y = float(arr[:, 1].mean())
+    rel = arr - np.array([com_x, com_y], dtype=np.float32)
+    dists = np.hypot(rel[:, 0], rel[:, 1])
+    radius = float(dists.max())
+    std_x = float(arr[:, 0].std())
+    std_y = float(arr[:, 1].std())
+
+    obs[4] = com_x / 15.0
+    obs[5] = com_y / 15.0
+    obs[6] = radius / 15.0
+    obs[7] = std_x / 15.0
+    obs[8] = std_y / 15.0
+
+    cdx, cdy = com_x - dog_x, com_y - dog_y
+    obs[9]  = cdx / 30.0
+    obs[10] = cdy / 30.0
+    obs[11] = math.hypot(cdx, cdy) / 30.0
+
+    far_idx = int(np.argmax(dists))
+    obs[15] = float(rel[far_idx, 0]) / 15.0
+    obs[16] = float(rel[far_idx, 1]) / 15.0
+
+    min_sheep_wall = float(min(distance_to_wall(sx, sy) for sx, sy in active))
+    min_dog_wall = distance_to_wall(dog_x, dog_y)
+    obs[17] = min_sheep_wall / 15.0
+    obs[18] = float(min_dog_wall) / 15.0
+    obs[19] = n / n_max
+
+    # Polar histogram in the dog's body frame.
+    rel_dx = arr[:, 0] - dog_x
+    rel_dy = arr[:, 1] - dog_y
+    angles = np.arctan2(rel_dy, rel_dx) - dog_heading
+    angles = np.arctan2(np.sin(angles), np.cos(angles))
+    bins = np.floor((angles + math.pi) / (2 * math.pi) * 8).astype(int)
+    bins = np.clip(bins, 0, 7)
+    hist = np.bincount(bins, minlength=8).astype(np.float32)
+    hist /= max(1, n)
+    obs[20:28] = hist
+
+    # Closest-to-pen and rearmost (furthest-from-pen) sheep. Without
+    # these named channels the obs cannot uniquely identify which sheep
+    # the teacher is steering toward, and BC fails to mimic it.
+    pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
+    closest_idx = int(np.argmin(pen_dists))
+    rearmost_idx = int(np.argmax(pen_dists))
+    obs[28] = (float(arr[closest_idx, 0]) - dog_x) / 15.0
+    obs[29] = (float(arr[closest_idx, 1]) - dog_y) / 15.0
+    obs[30] = (float(arr[rearmost_idx, 0]) - dog_x) / 15.0
+    obs[31] = (float(arr[rearmost_idx, 1]) - dog_y) / 15.0
+
+    return obs
@@ -0,0 +1,237 @@
+"""Multi-target tracker for LiDAR-detected sheep.
+
+Greedy nearest-neighbour data association across frames, with a wider
+re-acquisition gate for stale tracks (sheep flee during occlusion and
+reappear off-position), plus memory of last-seen positions for sheep
+out of FOV. Output is ``{name: (x, y)}`` — Strömbom / Sequential
+consume it directly.
+
+When **predictive mode** is enabled (the default), tracks carry a
+constant-velocity state ``(vx, vy)`` estimated from the last two
+observations. While a track is occluded its position is extrapolated
+using this velocity for up to ``PREDICT_STEPS`` frames, keeping the
+teacher's CoM estimate stable during brief losses. After prediction
+expires, the track falls back to its last-seen position (static memory)
+until ``FORGET_STEPS`` deletes it entirely.
+
+A track is marked penned once its estimated position crosses the gate
+plane south (``is_penned_position``). Penned tracks are excluded from
+``get_positions`` and kept indefinitely.
+"""
+
+from __future__ import annotations
+
+import math
+
+from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position
+
+
+GATE_M = 2.5              # m — primary NN gate (recently observed tracks)
+REACQUIRE_GATE_M = 4.5    # m — wider gate for re-binding stale tracks
+REACQUIRE_MIN_AGE = 20    # steps — track must be this stale to use the wider gate
+PENNED_GATE_M = 4.0       # m — gate for matching detections to existing penned tracks
+FORGET_STEPS = 200        # ~3.2 s — delete stale active tracks (penned ones kept forever)
+MAX_ACTIVE_TRACKS = MAX_SHEEP
+
+# Predictive tracking constants.
+PREDICT_STEPS = 120       # ~1.9 s — extrapolate velocity this many frames
+VELOCITY_CLAMP = 1.0      # m/s — max predicted speed (sheep max is ~0.78 m/s)
+
+
+class Track:
+    """Single track with position, velocity, and age."""
+
+    __slots__ = ("x", "y", "vx", "vy", "last_seen", "penned")
+
+    def __init__(self, x: float, y: float, step: int, penned: bool = False):
+        self.x = x
+        self.y = y
+        self.vx = 0.0
+        self.vy = 0.0
+        self.last_seen = step
+        self.penned = penned
+
+    @property
+    def age(self) -> int:
+        """Not-a-property in the hot loop — callers pass current step."""
+        raise NotImplementedError
+
+    def predicted_position(self, current_step: int) -> tuple[float, float]:
+        """Extrapolated position using constant velocity, clamped."""
+        dt = current_step - self.last_seen
+        if dt <= 0 or dt > PREDICT_STEPS:
+            return self.x, self.y
+        speed = math.hypot(self.vx, self.vy)
+        if speed < 1e-4:
+            return self.x, self.y
+        # Clamp extrapolation distance.
+        max_d = VELOCITY_CLAMP * dt * 0.016  # steps → seconds
+        d = min(speed * dt * 0.016, max_d)
+        return (
+            self.x + d * (self.vx / speed),
+            self.y + d * (self.vy / speed),
+        )
+
+    def update(self, x: float, y: float, step: int) -> None:
+        """Absorb a new detection and re-estimate velocity."""
+        dt = step - self.last_seen
+        if dt > 0:
+            dt_s = dt * 0.016  # steps → seconds
+            new_vx = (x - self.x) / dt_s
+            new_vy = (y - self.y) / dt_s
+            # Exponential smoothing on velocity.
+            alpha = 0.6
+            self.vx = alpha * new_vx + (1.0 - alpha) * self.vx
+            self.vy = alpha * new_vy + (1.0 - alpha) * self.vy
+        self.x = x
+        self.y = y
+        self.last_seen = step
+
+
+class SheepTracker:
+    """Online tracker with NN association, prediction, and forgetful memory.
+
+    Each track is a :class:`Track` with position, velocity estimate,
+    last-seen step, and penned flag.
+    """
+
+    def __init__(self, gate: float = GATE_M):
+        self.gate = gate
+        self._tracks: dict[int, Track] = {}
+        self._next_id = 0
+        self.step = 0
+
+    def reset(self) -> None:
+        self._tracks.clear()
+        self._next_id = 0
+        self.step = 0
+
+    def update(self, detections: list[tuple[float, float]]) -> dict[str, tuple[float, float]]:
+        """Fold a new set of detections in and return active positions."""
+        self.step += 1
+
+        det_used: set[int] = set()
+        updated_tids: set[int] = set()
+
+        # Pass 1 — match active tracks within the primary gate.
+        # Use predicted positions for matching, oldest-first.
+        active_tids = [tid for tid, t in self._tracks.items() if not t.penned]
+        active_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
+        for tid in active_tids:
+            track = self._tracks[tid]
+            # Use predicted position for matching.
+            tx, ty = track.predicted_position(self.step)
+            best_j, best_d = -1, self.gate
+            for j, (dx, dy) in enumerate(detections):
+                if j in det_used:
+                    continue
+                d = math.hypot(dx - tx, dy - ty)
+                if d < best_d:
+                    best_d = d
+                    best_j = j
+            if best_j >= 0:
+                dx, dy = detections[best_j]
+                track.update(dx, dy, self.step)
+                det_used.add(best_j)
+                updated_tids.add(tid)
+
+        # Pass 1b — re-acquisition with wider gate for stale tracks.
+        for tid in active_tids:
+            if tid in updated_tids:
+                continue
+            track = self._tracks[tid]
+            if (self.step - track.last_seen) < REACQUIRE_MIN_AGE:
+                continue
+            tx, ty = track.predicted_position(self.step)
+            best_j, best_d = -1, REACQUIRE_GATE_M
+            for j, (dx, dy) in enumerate(detections):
+                if j in det_used:
+                    continue
+                d = math.hypot(dx - tx, dy - ty)
+                if d < best_d:
+                    best_d = d
+                    best_j = j
+            if best_j >= 0:
+                dx, dy = detections[best_j]
+                track.update(dx, dy, self.step)
+                det_used.add(best_j)
+                updated_tids.add(tid)
+
+        # Pass 2 — match remaining detections to penned tracks.
+        penned_tids = [tid for tid, t in self._tracks.items() if t.penned]
+        for tid in penned_tids:
+            track = self._tracks[tid]
+            best_j, best_d = -1, PENNED_GATE_M
+            for j, (dx, dy) in enumerate(detections):
+                if j in det_used:
+                    continue
+                d = math.hypot(dx - track.x, dy - track.y)
+                if d < best_d:
+                    best_d = d
+                    best_j = j
+            if best_j >= 0:
+                dx, dy = detections[best_j]
+                track.update(dx, dy, self.step)
+                det_used.add(best_j)
+
+        # Spawn new tracks for unmatched detections.
+        for j, (dx, dy) in enumerate(detections):
+            if j in det_used:
+                continue
+            penned = in_pen(dx, dy) or is_penned_position(dx, dy)
+            self._tracks[self._next_id] = Track(dx, dy, self.step, penned)
+            self._next_id += 1
+
+        # Promote active tracks whose current estimate crosses the gate.
+        for track in self._tracks.values():
+            if track.penned:
+                continue
+            px, py = track.predicted_position(self.step)
+            if is_penned_position(px, py):
+                track.penned = True
+
+        # Forget stale active tracks; penned tracks live forever.
+        stale = [tid for tid, t in self._tracks.items()
+                 if not t.penned and (self.step - t.last_seen) > FORGET_STEPS]
+        for tid in stale:
+            del self._tracks[tid]
+
+        # Hard cap on the active set — drop the oldest-seen overflow.
+        active = [(tid, t.last_seen) for tid, t in self._tracks.items()
+                  if not t.penned]
+        if len(active) > MAX_ACTIVE_TRACKS:
+            active.sort(key=lambda kv: kv[1])
+            for tid, _ in active[: len(active) - MAX_ACTIVE_TRACKS]:
+                del self._tracks[tid]
+
+        return self.get_positions()
+
+    def get_positions(self) -> dict[str, tuple[float, float]]:
+        """Active (not-penned) tracks as a ``{name: (x, y)}`` dict.
+
+        For tracks currently being predicted (occluded but within
+        PREDICT_STEPS), returns the extrapolated position so the teacher
+        sees a smooth estimate.
+        """
+        result = {}
+        for tid, track in self._tracks.items():
+            if track.penned:
+                continue
+            px, py = track.predicted_position(self.step)
+            result[f"t{tid}"] = (px, py)
+        return result
+
+    def get_penned_set(self) -> set[str]:
+        return {f"t{tid}" for tid, t in self._tracks.items() if t.penned}
+
+    def n_active(self) -> int:
+        return sum(1 for t in self._tracks.values() if not t.penned)
+
+    def n_penned(self) -> int:
+        return sum(1 for t in self._tracks.values() if t.penned)
+
+    def n_predicted(self) -> int:
+        """Number of active tracks currently being extrapolated (not directly observed)."""
+        return sum(1 for t in self._tracks.values()
+                   if not t.penned and (self.step - t.last_seen) > 0
+                   and (self.step - t.last_seen) <= PREDICT_STEPS)
@@ -0,0 +1,190 @@
+"""Differential-drive and mecanum kinematics, shared by the env and Webots
+controllers.
+
+First-order rigid-body model — no slip, wheel-accel limits, or contact
+forces. Webots' ODE physics handles those at inference; the env stays
+close enough to first order that a policy trained here transfers.
+"""
+
+import math
+
+
+def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
+    """Integrate one step of differential-drive forward kinematics.
+
+    Inputs
+    ------
+    x, y : robot position (m)
+    h    : robot heading (rad), 0 = +x axis
+    w_left, w_right : wheel angular velocities (rad/s)
+    wheel_radius, wheel_base : robot dimensions (m)
+    dt   : timestep (s)
+
+    Returns (new_x, new_y, new_h).
+    """
+    v = (w_right + w_left) * wheel_radius * 0.5
+    omega = (w_right - w_left) * wheel_radius / wheel_base
+    new_x = x + v * math.cos(h) * dt
+    new_y = y + v * math.sin(h) * dt
+    new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
+    return new_x, new_y, new_h
+
+
+def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
+                       k_turn=4.0):
+    """Convert a desired (vx, vy) intent in [-1, 1]² to wheel speeds.
+
+    Forward speed scales by ``cos(err)`` (clamped to ±90°); a P
+    controller on heading error contributes the wheel-rate differential.
+    """
+    speed_ms = math.hypot(vx, vy) * max_linear
+    if speed_ms < 1e-3:
+        return 0.0, 0.0
+    target_h = math.atan2(vy, vx)
+    err = math.atan2(math.sin(target_h - h), math.cos(target_h - h))
+    clamped_err = max(-math.pi / 2, min(math.pi / 2, err))
+    fwd_ms = speed_ms * math.cos(clamped_err)
+    fwd_rad = fwd_ms / wheel_radius
+    turn = k_turn * err
+    left = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad - turn))
+    right = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad + turn))
+    return left, right
+
+
+def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
+                            k_turn=4.0):
+    """Sheep variant: speed in wheel rad/s, target as a heading angle."""
+    err = math.atan2(math.sin(heading - h), math.cos(heading - h))
+    fwd = max(0.0, math.cos(err)) * speed_motor
+    turn = k_turn * err
+    left = max(-max_wheel_omega, min(max_wheel_omega, fwd - turn))
+    right = max(-max_wheel_omega, min(max_wheel_omega, fwd + turn))
+    return left, right
+
+
+# ---------------------------------------------------------------------------
+# Mecanum (4-wheel omnidirectional) kinematics
+# ---------------------------------------------------------------------------
+
+def mecanum_kinematics_step(x, y, h, w_fl, w_fr, w_rl, w_rr,
+                             wheel_radius, lx, ly, dt):
+    """Integrate one step of mecanum forward kinematics.
+
+    Parameters
+    ----------
+    x, y : robot position (m)
+    h    : robot heading (rad), 0 = +x axis
+    w_fl, w_fr, w_rl, w_rr : wheel angular velocities (rad/s)
+    wheel_radius : wheel radius (m)
+    lx   : half the front-to-back axle distance (m)
+    ly   : half the left-to-right axle distance (m)
+    dt   : timestep (s)
+
+    Returns (new_x, new_y, new_h).
+    """
+    r = wheel_radius
+    vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
+    vy_body = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
+    omega = (-w_fl + w_fr - w_rl + w_rr) * r / (4.0 * (lx + ly))
+
+    cos_h = math.cos(h)
+    sin_h = math.sin(h)
+    vx_world = vx_body * cos_h - vy_body * sin_h
+    vy_world = vx_body * sin_h + vy_body * cos_h
+
+    new_x = x + vx_world * dt
+    new_y = y + vy_world * dt
+    new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
+    return new_x, new_y, new_h
+
+
+def mecanum_inverse(vx_body, vy_body, omega, wheel_radius, lx, ly,
+                    max_wheel_omega):
+    """Mecanum inverse kinematics: body-frame velocities to 4 wheel speeds.
+
+    Parameters
+    ----------
+    vx_body, vy_body : desired body-frame linear velocities (m/s)
+    omega            : desired yaw rate (rad/s)
+    wheel_radius     : wheel radius (m)
+    lx               : half front-to-back axle distance (m)
+    ly               : half left-to-right axle distance (m)
+    max_wheel_omega  : wheel angular velocity clamp (rad/s)
+
+    Returns (w_fl, w_fr, w_rl, w_rr).
+    """
+    r = wheel_radius
+    k = lx + ly
+    w_fl = (vx_body - vy_body - k * omega) / r
+    w_fr = (vx_body + vy_body + k * omega) / r
+    w_rl = (vx_body + vy_body - k * omega) / r
+    w_rr = (vx_body - vy_body + k * omega) / r
+
+    scale = max(abs(w_fl), abs(w_fr), abs(w_rl), abs(w_rr), 1e-9)
+    if scale > max_wheel_omega:
+        ratio = max_wheel_omega / scale
+        w_fl *= ratio
+        w_fr *= ratio
+        w_rl *= ratio
+        w_rr *= ratio
+
+    return w_fl, w_fr, w_rl, w_rr
+
+
+def velocity_to_mecanum_wheels(vx, vy, omega, h, max_linear, wheel_radius,
+                               lx, ly, max_wheel_omega,
+                               k_turn=4.0, wheel_base=0.28):
+    """Convert world-frame (vx, vy, omega) action in [-1, 1]^3 to 4 wheel speeds.
+
+    Truly holonomic interpretation: (vx, vy) is the desired *world-frame*
+    velocity (magnitude up to ``max_linear`` m/s) and ``omega`` is the
+    desired yaw rate (independent of motion direction). The dog can
+    crab-walk and rotate at the same time.
+
+    This matches the universal teacher's signal: drive toward a standoff
+    point while facing the sheep / pen separately. With the older
+    non-holonomic version, ``omega`` from the teacher fought against the
+    forward-only kinematics and dropped success rates instead of helping.
+
+    Parameters
+    ----------
+    vx, vy : desired world-frame velocity intent in [-1, 1] (clamped on
+             magnitude to ≤ 1)
+    omega  : desired yaw rate intent in [-1, 1]
+    h      : current heading (rad), 0 = +x
+    max_linear     : max linear speed (m/s)
+    wheel_radius   : wheel radius (m)
+    lx, ly         : half axle distances (m)
+    max_wheel_omega : wheel angular velocity clamp (rad/s)
+    k_turn         : unused (kept for signature compatibility)
+    wheel_base     : unused (kept for signature compatibility)
+
+    Returns (w_fl, w_fr, w_rl, w_rr).
+    """
+    # Clamp the action magnitude in the (vx, vy) unit disk.
+    norm = math.hypot(vx, vy)
+    if norm > 1.0:
+        vx /= norm
+        vy /= norm
+
+    # World-frame velocity → body-frame velocity (rotate by -h).
+    vx_world = vx * max_linear
+    vy_world = vy * max_linear
+    cos_h = math.cos(h)
+    sin_h = math.sin(h)
+    vx_body =  cos_h * vx_world + sin_h * vy_world
+    vy_body = -sin_h * vx_world + cos_h * vy_world
+
+    # Yaw rate: omega ∈ [-1, 1] maps to ± max_linear / (lx + ly) — same
+    # peak yaw as the old "omega_extra" channel, but used directly
+    # rather than added to a heading-tracker.
+    yaw_max = max_linear / max(lx + ly, 1e-6)
+    omega_rad = omega * yaw_max
+
+    if abs(vx_body) < 1e-3 and abs(vy_body) < 1e-3 and abs(omega_rad) < 1e-3:
+        return 0.0, 0.0, 0.0, 0.0
+
+    return mecanum_inverse(
+        vx_body, vy_body, omega_rad,
+        wheel_radius, lx, ly, max_wheel_omega,
+    )
@@ -0,0 +1,181 @@
+"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987.
+
+Per-sheep behavioural step used by both the Webots sheep controller
+and the training environment. Each step a force stack is summed:
+
+    flee       — quadratic ramp away from dog within FLEE_DIST
+                 (Strömbom 2014, term ρa)
+    cohesion   — drift toward local centre of mass of peers within
+                 COHESION_DIST (Strömbom 2014, term c). Weight is
+                 higher while fleeing — fear-induced cohesion.
+    separation — short-range inverse-distance repulsion from peers
+                 (Strömbom 2014 term α; Reynolds 1987)
+    wander     — small persistent drift (Strömbom 2014 noise term ε)
+
+Walls, the south-wall gate column, and in-pen containment are
+environment-specific additions for the fenced Webots field.
+
+References
+----------
+- Strömbom et al. (2014). "Solving the shepherding problem: heuristics
+  for herding autonomous, interacting agents." J R Soc Interface 11.
+- Reynolds (1987). "Flocks, herds and schools: A distributed
+  behavioural model." SIGGRAPH '87.
+"""
+
+import math
+import random
+
+from herding.world.geometry import (
+    FIELD_SHAPE, FIELD_ROUND_R,
+    FIELD_X, FIELD_Y,
+    PEN_X, PEN_Y,
+    GATE_X, GATE_Y,
+)
+
+# Speeds are in wheel rad/s (motor units); m/s = speed * SHEEP_WHEEL_RADIUS.
+MAX_SPEED = 22.0
+FLEE_SPEED = 20.0
+WANDER_SPEED = 3.0
+
+WALL_MARGIN = 5.0
+WALL_HARD_MARGIN = 1.0
+WALL_HARD_GAIN = 50.0
+
+FLEE_DIST = 7.0
+SEPARATION_DIST = 2.5
+COHESION_DIST = 12.0
+
+PEN_MARGIN = 0.8
+
+
+def _peers_iter(peers):
+    """Accept either a {name: (x, y)} dict or an iterable of (x, y) tuples."""
+    if isinstance(peers, dict):
+        return list(peers.values())
+    return list(peers)
+
+
+def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
+    """Return ``(heading, speed, new_wander_angle)`` for one sheep step.
+
+    ``speed`` is in wheel rad/s, bounded by ``[WANDER_SPEED, FLEE_SPEED]``.
+    ``heading`` is the world-frame target heading (atan2 convention).
+    ``rng`` is an optional ``random.Random`` used for wander jitter; if
+    ``None`` uses the module's global ``random``.
+    """
+    fx, fy = 0.0, 0.0
+    peer_list = _peers_iter(peers)
+    rnd = rng if rng is not None else random
+
+    if penned:
+        # Pen containment: bounce off all four pen walls.
+        pm = PEN_MARGIN
+        if x < PEN_X[0] + pm:
+            fx += ((PEN_X[0] + pm - x) / pm) * 15.0
+        if x > PEN_X[1] - pm:
+            fx -= ((x - (PEN_X[1] - pm)) / pm) * 15.0
+        if y < PEN_Y[0] + pm:
+            fy += ((PEN_Y[0] + pm - y) / pm) * 15.0
+        if y > PEN_Y[1] - pm:
+            fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0
+
+        # Mild peer separation so penned sheep don't crowd one corner.
+        for px, py in peer_list:
+            dx, dy = px - x, py - y
+            d = math.hypot(dx, dy)
+            if 0.05 < d < SEPARATION_DIST:
+                push = (SEPARATION_DIST - d) / d
+                fx -= (dx / d) * push * 2.5
+                fy -= (dy / d) * push * 2.5
+
+        if rnd.random() < 0.02:
+            wander_angle += rnd.uniform(-0.6, 0.6)
+        fx += math.cos(wander_angle) * 0.5
+        fy += math.sin(wander_angle) * 0.5
+
+    else:
+        # Free-roaming sheep in the field.
+        fleeing = False
+        if dog_xy is not None:
+            ddx = dog_xy[0] - x
+            ddy = dog_xy[1] - y
+            dist = math.hypot(ddx, ddy)
+            if 0.01 < dist < FLEE_DIST:
+                fleeing = True
+                t = 1.0 - dist / FLEE_DIST
+                s = t * t * 20.0
+                fx -= (ddx / dist) * s
+                fy -= (ddy / dist) * s
+
+        # Cohesion: drift toward the local CoM of peers within
+        # COHESION_DIST. Stronger while fleeing — fear-induced
+        # cohesion keeps the flock together through the gate.
+        cx, cy, cn = 0.0, 0.0, 0
+        for px, py in peer_list:
+            d = math.hypot(px - x, py - y)
+            if 0.3 < d < COHESION_DIST:
+                cx += px
+                cy += py
+                cn += 1
+        if cn > 0:
+            w = 3.0 if fleeing else 1.0
+            fx += (cx / cn - x) * w
+            fy += (cy / cn - y) * w
+
+        # Separation — inverse-distance push from peers.
+        for px, py in peer_list:
+            ddx, ddy = px - x, py - y
+            d = math.hypot(ddx, ddy)
+            if 0.05 < d < SEPARATION_DIST:
+                push = (SEPARATION_DIST - d) / d
+                fx -= (ddx / d) * push * 2.5
+                fy -= (ddy / d) * push * 2.5
+
+        # Wall soft repulsion.
+        if FIELD_SHAPE == "field_round":
+            r = math.hypot(x, y)
+            wall_d = FIELD_ROUND_R - r
+            in_gate_col = (GATE_X[0] <= x <= GATE_X[1]
+                           and y < GATE_Y + WALL_MARGIN)
+            if wall_d < WALL_MARGIN and r > 1e-6 and not in_gate_col:
+                gain = ((WALL_MARGIN - wall_d) / WALL_MARGIN) * 6.0
+                fx -= (x / r) * gain
+                fy -= (y / r) * gain
+            # Hard escape band.
+            if wall_d < WALL_HARD_MARGIN and not in_gate_col:
+                hgain = WALL_HARD_GAIN * (1.0 - wall_d / WALL_HARD_MARGIN)
+                fx -= (x / r) * hgain
+                fy -= (y / r) * hgain
+        else:
+            # Rectangular: south wall absent inside the gate column.
+            if x < FIELD_X[0] + WALL_MARGIN:
+                fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
+            if x > FIELD_X[1] - WALL_MARGIN:
+                fx -= ((x - (FIELD_X[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
+            if y > FIELD_Y[1] - WALL_MARGIN:
+                fy -= ((y - (FIELD_Y[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
+            if y < FIELD_Y[0] + WALL_MARGIN and not (GATE_X[0] <= x <= GATE_X[1]):
+                fy += ((FIELD_Y[0] + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
+
+            # Hard escape band — overrides everything else near a wall.
+            m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
+            if x - FIELD_X[0] < m:
+                fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
+            if FIELD_X[1] - x < m:
+                fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
+            if FIELD_Y[1] - y < m:
+                fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
+            if (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
+                fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))
+
+        if not fleeing:
+            if rnd.random() < 0.02:
+                wander_angle += rnd.uniform(-0.6, 0.6)
+            fx += math.cos(wander_angle) * 0.5
+            fy += math.sin(wander_angle) * 0.5
+
+    heading = math.atan2(fy, fx)
+    mag = math.hypot(fx, fy)
+    speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
+    return heading, speed, wander_angle
@@ -0,0 +1,185 @@
+"""World geometry and robot specs.
+
+Coordinates are metres; (0, 0) is the field centre, +x east, +y north.
+These constants mirror ``worlds/field.wbt`` and the proto files — if
+the world changes, this file is the single point of update.
+
+    field (rectangular)
+    +-----------+
+    |           |
+    |  ......   |
+    +---||||----+   y = -15  (south wall, 3 m gate at x in [10, 13])
+        ||||
+        |pen|       y in [-22, -15]
+        +---+
+
+    field_round (circular, R = 15 m)
+         .---.
+       /  ...  \\
+      |  .....  |    gate at south, x in [-1.83, 1.83]
+       \\  ...  /
+         '-+-'       pen y in [-22, -15]
+"""
+
+import os
+import math
+
+# ---------------------------------------------------------------------------
+# Field shape selection — controlled by HERDING_WORLD env var at runtime.
+# Defaults to "field" (rectangular). The launcher writes it into the
+# runtime cfg so the controller can pick it up too.
+# ---------------------------------------------------------------------------
+FIELD_SHAPE = (os.environ.get("HERDING_WORLD", "field")).lower()
+
+
+# ==================== Rectangular field (field.wbt) ====================
+FIELD_X = (-15.0, 15.0)
+FIELD_Y = (-15.0, 15.0)
+FIELD_INSIDE_MARGIN = 0.5
+
+# Pen (external, south of the field)
+PEN_X = (10.0, 13.0)
+PEN_Y = (-22.0, -15.0)
+PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
+PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
+
+# Gate (hole in the south wall)
+GATE_X = PEN_X
+GATE_Y = -15.0
+
+
+# ==================== Round field (field_round.wbt) ====================
+FIELD_ROUND_R = 15.0
+FIELD_ROUND_PEN_X = (-1.5, 1.5)
+FIELD_ROUND_PEN_Y = (-22.0, -15.0)
+FIELD_ROUND_PEN_CENTER = (
+    0.5 * (FIELD_ROUND_PEN_X[0] + FIELD_ROUND_PEN_X[1]),
+    0.5 * (FIELD_ROUND_PEN_Y[0] + FIELD_ROUND_PEN_Y[1]),
+)
+FIELD_ROUND_PEN_ENTRY = (0.0, -15.0)
+FIELD_ROUND_GATE_X = FIELD_ROUND_PEN_X
+FIELD_ROUND_GATE_Y = -15.0
+
+
+# ==================== Active geometry (resolved at import) ===============
+# Rectangular defaults are already assigned above.  Override for round.
+if FIELD_SHAPE == "field_round":
+    PEN_X = FIELD_ROUND_PEN_X
+    PEN_Y = FIELD_ROUND_PEN_Y
+    PEN_CENTER = FIELD_ROUND_PEN_CENTER
+    PEN_ENTRY = FIELD_ROUND_PEN_ENTRY
+    GATE_X = FIELD_ROUND_GATE_X
+    GATE_Y = FIELD_ROUND_GATE_Y
+
+
+def configure(shape: str) -> None:
+    """Switch the active field geometry at runtime.
+
+    Call this **before** importing any other ``herding.*`` module that
+    depends on the constants below (flocking_sim, lidar_sim, obs, etc.).
+    The import-time env-var path (``HERDING_WORLD``) still works; this
+    function is for scripts that need to choose the world via a CLI flag.
+    """
+    global FIELD_SHAPE, PEN_X, PEN_Y, PEN_CENTER, PEN_ENTRY, GATE_X, GATE_Y
+    shape = shape.lower()
+    FIELD_SHAPE = shape
+    if shape == "field_round":
+        PEN_X = FIELD_ROUND_PEN_X
+        PEN_Y = FIELD_ROUND_PEN_Y
+        PEN_CENTER = FIELD_ROUND_PEN_CENTER
+        PEN_ENTRY = FIELD_ROUND_PEN_ENTRY
+        GATE_X = FIELD_ROUND_GATE_X
+        GATE_Y = FIELD_ROUND_GATE_Y
+    else:
+        PEN_X = (10.0, 13.0)
+        PEN_Y = (-22.0, -15.0)
+        PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
+        PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
+        GATE_X = PEN_X
+        GATE_Y = -15.0
+
+# Dog spec — protos/ShepherdDog.proto
+DOG_WHEEL_RADIUS = 0.038         # m
+DOG_WHEEL_BASE = 0.28            # m, axle-to-axle
+DOG_MAX_WHEEL_OMEGA = 70.0       # rad/s
+DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA  # ≈ 2.66 m/s
+
+# Dog mecanum spec — 4-wheel omnidirectional layout
+DOG_WHEEL_BASE_X = 0.28            # m, front-to-back axle distance
+DOG_WHEEL_BASE_Y = 0.28            # m, left-to-right axle distance
+
+# Sheep spec — protos/Sheep.proto
+SHEEP_WHEEL_RADIUS = 0.031       # m
+SHEEP_WHEEL_BASE = 0.20          # m
+SHEEP_MAX_WHEEL_OMEGA = 25.0     # rad/s
+SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA  # ≈ 0.78 m/s
+
+WEBOTS_DT = 0.016                # seconds (matches WorldInfo.basicTimeStep)
+
+# Virtual south wall — env and controller both keep the dog north of this.
+DOG_SOUTH_LIMIT = -14.5
+
+MAX_SHEEP = 10
+
+
+def in_pen(x: float, y: float) -> bool:
+    """True if (x, y) lies inside the external pen rectangle."""
+    return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
+
+
+def in_field(x: float, y: float, margin: float = 0.0) -> bool:
+    if FIELD_SHAPE == "field_round":
+        r = FIELD_ROUND_R - margin
+        return x * x + y * y <= r * r
+    return (FIELD_X[0] + margin <= x <= FIELD_X[1] - margin
+            and FIELD_Y[0] + margin <= y <= FIELD_Y[1] - margin)
+
+
+def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:
+    """True if (x, y) lies in the column of the gate (between field and pen)."""
+    return (GATE_X[0] - margin <= x <= GATE_X[1] + margin
+            and PEN_Y[0] - margin <= y <= GATE_Y + margin)
+
+
+def is_penned_position(x: float, y: float, latch_margin: float = 0.2) -> bool:
+    """True iff (x, y) is in the gate column and south of the gate line."""
+    return (GATE_X[0] - latch_margin <= x <= GATE_X[1] + latch_margin
+            and y <= GATE_Y)
+
+
+def distance_to_pen_entry(x: float, y: float) -> float:
+    return math.hypot(x - PEN_ENTRY[0], y - PEN_ENTRY[1])
+
+
+def distance_to_wall(x: float, y: float) -> float:
+    """Shortest distance from (x, y) to the nearest field wall.
+
+    For a rectangular field this is the minimum Manhattan distance to the
+    four bounding walls.  For a round field it is ``R - sqrt(x²+y²)``.
+    Returns a negative value if the point is outside the field.
+    """
+    if FIELD_SHAPE == "field_round":
+        return FIELD_ROUND_R - math.hypot(x, y)
+    return min(
+        x - FIELD_X[0], FIELD_X[1] - x,
+        y - FIELD_Y[0], FIELD_Y[1] - y,
+    )
+
+
+def clip_to_field(x: float, y: float, margin: float = 0.2) -> tuple[float, float]:
+    """Clip (x, y) inside the field boundary with a small margin.
+
+    For round fields the point is projected radially inward if it exceeds
+    the circular boundary.
+    """
+    if FIELD_SHAPE == "field_round":
+        r = math.hypot(x, y)
+        limit = FIELD_ROUND_R - margin
+        if r > limit and r > 1e-6:
+            scale = limit / r
+            return x * scale, y * scale
+        return x, y
+    return (
+        max(FIELD_X[0] + margin, min(FIELD_X[1] - margin, x)),
+        max(FIELD_Y[0] + margin, min(FIELD_Y[1] - margin, y)),
+    )
@@ -0,0 +1,885 @@
+#VRML_SIM R2025a utf8
+# Shepherd Dog Robot - mecanum-wheeled base with dog character on top
+# 4-wheel omnidirectional drive (front-left, front-right, rear-left, rear-right).
+
+PROTO ShepherdDogMecanum [
+  field SFVec3f    translation     0 0 0
+  field SFRotation rotation        0 1 0 0
+  field SFString   name            "ShepherdDog"
+  field SFString   controller      "shepherd_dog"
+  field MFString   controllerArgs  []
+  field SFString   customData      ""
+  field SFBool     supervisor      FALSE
+  field SFBool     synchronization TRUE
+]
+{
+  Robot {
+    translation IS translation
+    rotation IS rotation
+    name IS name
+    controller IS controller
+    controllerArgs IS controllerArgs
+    customData IS customData
+    supervisor IS supervisor
+    synchronization IS synchronization
+    children [
+      # ========== CHASSIS / BASE ==========
+      DEF CHASSIS Transform {
+        translation 0 0 0.05
+        children [
+          Shape {
+            appearance DEF CHASSIS_APP PBRAppearance {
+              baseColor 0.2 0.2 0.2
+              roughness 0.6
+              metalness 0.3
+            }
+            geometry Box {
+              size 0.32 0.16 0.06
+            }
+          }
+        ]
+      }
+      # Front slope
+      DEF CHASSIS_FRONT Transform {
+        translation 0.14 0 0.07
+        children [
+          Shape {
+            appearance USE CHASSIS_APP
+            geometry Box {
+              size 0.06 0.14 0.04
+            }
+          }
+        ]
+      }
+      # Rear slope
+      DEF CHASSIS_REAR Transform {
+        translation -0.14 0 0.07
+        children [
+          Shape {
+            appearance USE CHASSIS_APP
+            geometry Box {
+              size 0.06 0.14 0.04
+            }
+          }
+        ]
+      }
+
+      # ========== DOG BODY on top of chassis ==========
+      DEF BODY Transform {
+        translation 0 0 0.11
+        children [
+          Shape {
+            appearance DEF FUR_BROWN PBRAppearance {
+              baseColor 0.55 0.35 0.17
+              roughness 0.85
+              metalness 0.0
+            }
+            geometry Box {
+              size 0.30 0.16 0.08
+            }
+          }
+        ]
+      }
+
+      # ========== CHEST ==========
+      DEF CHEST Transform {
+        translation 0.12 0 0.11
+        children [
+          Shape {
+            appearance DEF FUR_CREAM PBRAppearance {
+              baseColor 0.85 0.72 0.55
+              roughness 0.85
+              metalness 0.0
+            }
+            geometry Box {
+              size 0.08 0.18 0.08
+            }
+          }
+        ]
+      }
+
+      # ========== HEAD ==========
+      DEF HEAD Transform {
+        translation 0.20 0 0.17
+        children [
+          Shape {
+            appearance USE FUR_BROWN
+            geometry Box {
+              size 0.10 0.12 0.09
+            }
+          }
+        ]
+      }
+
+      # ========== SNOUT + LIDAR ==========
+      DEF SNOUT Transform {
+        translation 0.28 0 0.155
+        children [
+          Shape {
+            appearance USE FUR_CREAM
+            geometry Box {
+              size 0.08 0.07 0.05
+            }
+          }
+          # Nose
+          Transform {
+            translation 0.04 0 0.01
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.1 0.1 0.1
+                  roughness 0.4
+                }
+                geometry Sphere {
+                  radius 0.013
+                  subdivision 2
+                }
+              }
+            ]
+          }
+          # Lidar — front-facing 140° FOV, mounted at snout tip
+          Lidar {
+            translation 0.05 0 0.01
+            name "lidar"
+            horizontalResolution 180
+            fieldOfView 2.44
+            numberOfLayers 1
+            minRange 0.10
+            maxRange 12.0
+            noise 0.005
+          }
+        ]
+      }
+
+      # ========== LEFT EAR ==========
+      DEF LEFT_EAR HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 0 1
+          anchor 0.19 0.055 0.21
+        }
+        device [
+          RotationalMotor {
+            name "left ear motor"
+            maxVelocity 10.0
+            minPosition -0.5
+            maxPosition 0.5
+          }
+        ]
+        endPoint Solid {
+          translation 0.19 0.055 0.21
+          rotation 0 0 1 0.2
+          name "left ear"
+          children [
+            Shape {
+              appearance DEF FUR_DARK PBRAppearance {
+                baseColor 0.35 0.20 0.10
+                roughness 0.85
+                metalness 0.0
+              }
+              geometry Box {
+                size 0.035 0.025 0.06
+              }
+            }
+          ]
+          boundingObject Box {
+            size 0.035 0.025 0.06
+          }
+          physics Physics {
+            density -1
+            mass 0.005
+          }
+        }
+      }
+
+      # ========== RIGHT EAR ==========
+      DEF RIGHT_EAR HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 0 1
+          anchor 0.19 -0.055 0.21
+        }
+        device [
+          RotationalMotor {
+            name "right ear motor"
+            maxVelocity 10.0
+            minPosition -0.5
+            maxPosition 0.5
+          }
+        ]
+        endPoint Solid {
+          translation 0.19 -0.055 0.21
+          rotation 0 0 -1 0.2
+          name "right ear"
+          children [
+            Shape {
+              appearance USE FUR_DARK
+              geometry Box {
+                size 0.035 0.025 0.06
+              }
+            }
+          ]
+          boundingObject Box {
+            size 0.035 0.025 0.06
+          }
+          physics Physics {
+            density -1
+            mass 0.005
+          }
+        }
+      }
+
+      # ========== EYES ==========
+      DEF LEFT_EYE Transform {
+        translation 0.25 0.05 0.19
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.95 0.95 0.95
+              roughness 0.3
+            }
+            geometry Sphere {
+              radius 0.016
+              subdivision 2
+            }
+          }
+          # Pupil
+          Transform {
+            translation 0.012 0 0.004
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.1 0.1 0.1
+                  roughness 0.2
+                }
+                geometry Sphere {
+                  radius 0.009
+                  subdivision 2
+                }
+              }
+            ]
+          }
+        ]
+      }
+      DEF RIGHT_EYE Transform {
+        translation 0.25 -0.05 0.19
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.95 0.95 0.95
+              roughness 0.3
+            }
+            geometry Sphere {
+              radius 0.016
+              subdivision 2
+            }
+          }
+          # Pupil
+          Transform {
+            translation 0.012 0 0.004
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.1 0.1 0.1
+                  roughness 0.2
+                }
+                geometry Sphere {
+                  radius 0.009
+                  subdivision 2
+                }
+              }
+            ]
+          }
+        ]
+      }
+
+      # ========== COLLAR ==========
+      DEF COLLAR Transform {
+        translation 0.16 0 0.125
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.8 0.1 0.1
+              roughness 0.5
+            }
+            geometry Cylinder {
+              height 0.02
+              radius 0.095
+              subdivision 16
+            }
+          }
+          # ID tag
+          Transform {
+            translation 0 0.10 0
+            rotation 1 0 0 1.5708
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.75 0.75 0.0
+                  metalness 0.8
+                  roughness 0.2
+                }
+                geometry Cylinder {
+                  height 0.003
+                  radius 0.018
+                  subdivision 8
+                }
+              }
+            ]
+          }
+        ]
+      }
+
+      # ========== TAIL (lidar inside tail tip ball) ==========
+      DEF TAIL HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor -0.15 0 0.11
+        }
+        device [
+          RotationalMotor {
+            name "tail motor"
+            maxVelocity 5.0
+            minPosition -1.0
+            maxPosition 1.0
+          }
+        ]
+        endPoint Solid {
+          translation -0.17 0 0.13
+          name "tail solid"
+          children [
+            Shape {
+              appearance USE FUR_BROWN
+              geometry Capsule {
+                height 0.12
+                radius 0.013
+                top FALSE
+              }
+            }
+            # Tail tip ball
+            Transform {
+              translation 0 0 0.08
+              children [
+                Shape {
+                  appearance PBRAppearance {
+                    baseColor 0.2 0.2 0.2
+                    roughness 0.3
+                    metalness 0.6
+                  }
+                  geometry Sphere {
+                    radius 0.028
+                    subdivision 4
+                  }
+                }
+              ]
+            }
+          ]
+          boundingObject Group {
+            children [
+              Capsule {
+                height 0.12
+                radius 0.013
+              }
+              Transform {
+                translation 0 0 0.08
+                children [
+                  Sphere {
+                    radius 0.028
+                  }
+                ]
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.08
+          }
+        }
+      }
+
+      # ========== AXLE ARMS (4 corners) ==========
+      DEF FRONT_RIGHT_AXLE Transform {
+        translation 0.14 -0.115 0.038
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.5 0.5 0.5
+              roughness 0.3
+              metalness 0.8
+            }
+            geometry Box {
+              size 0.02 0.08 0.02
+            }
+          }
+        ]
+      }
+      DEF FRONT_LEFT_AXLE Transform {
+        translation 0.14 0.115 0.038
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.5 0.5 0.5
+              roughness 0.3
+              metalness 0.8
+            }
+            geometry Box {
+              size 0.02 0.08 0.02
+            }
+          }
+        ]
+      }
+      DEF REAR_RIGHT_AXLE Transform {
+        translation -0.14 -0.115 0.038
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.5 0.5 0.5
+              roughness 0.3
+              metalness 0.8
+            }
+            geometry Box {
+              size 0.02 0.08 0.02
+            }
+          }
+        ]
+      }
+      DEF REAR_LEFT_AXLE Transform {
+        translation -0.14 0.115 0.038
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.5 0.5 0.5
+              roughness 0.3
+              metalness 0.8
+            }
+            geometry Box {
+              size 0.02 0.08 0.02
+            }
+          }
+        ]
+      }
+
+      # ========== FRONT RIGHT WHEEL ==========
+      DEF FRONT_RIGHT_WHEEL_JOINT HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor 0.14 -0.14 0.038
+        }
+        device [
+          RotationalMotor {
+            name "front right wheel motor"
+            maxVelocity 70.0
+            maxTorque 20.0
+          }
+          PositionSensor {
+            name "front right wheel sensor"
+            resolution 0.00628
+          }
+        ]
+        endPoint Solid {
+          translation 0.14 -0.14 0.038
+          rotation 0 -1 0 1.570796
+          children [
+            DEF WHEEL_VIS Pose {
+              rotation 1 0 0 -1.5708
+              children [
+                # Hub drum
+                Shape {
+                  appearance PBRAppearance {
+                    baseColor 0.5 0.5 0.5
+                    roughness 0.3
+                    metalness 0.7
+                  }
+                  geometry Cylinder {
+                    height 0.018
+                    radius 0.022
+                    subdivision 16
+                  }
+                }
+                # Axle boss
+                Shape {
+                  appearance PBRAppearance {
+                    baseColor 0.6 0.6 0.6
+                    roughness 0.2
+                    metalness 0.8
+                  }
+                  geometry Cylinder {
+                    height 0.022
+                    radius 0.008
+                    subdivision 8
+                  }
+                }
+                # Mecanum roller 1 (top, +y)
+                DEF ROLLER_1 Pose {
+                  translation 0 0.031 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+                # Mecanum roller 2 (right, +x)
+                DEF ROLLER_2 Pose {
+                  translation 0.031 0 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+                # Mecanum roller 3 (bottom, -y)
+                DEF ROLLER_3 Pose {
+                  translation 0 -0.031 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+                # Mecanum roller 4 (left, -x)
+                DEF ROLLER_4 Pose {
+                  translation -0.031 0 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+                # Mecanum roller 5 (diagonal +x+y)
+                DEF ROLLER_5 Pose {
+                  translation 0.022 0.022 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+                # Mecanum roller 6 (diagonal +x-y)
+                DEF ROLLER_6 Pose {
+                  translation 0.022 -0.022 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+                # Mecanum roller 7 (diagonal -x-y)
+                DEF ROLLER_7 Pose {
+                  translation -0.022 -0.022 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+                # Mecanum roller 8 (diagonal -x+y)
+                DEF ROLLER_8 Pose {
+                  translation -0.022 0.022 0
+                  rotation 0 0 1 0.7854
+                  children [
+                    Shape {
+                      appearance PBRAppearance {
+                        baseColor 0.12 0.12 0.12
+                        roughness 0.7
+                        metalness 0.1
+                      }
+                      geometry Capsule {
+                        height 0.020
+                        radius 0.007
+                        subdivision 8
+                      }
+                    }
+                  ]
+                }
+              ]
+            }
+          ]
+          name "front right wheel"
+          contactMaterial "MecanumWheel"
+          boundingObject Pose {
+            rotation 1 0 0 -1.5708
+            children [
+              Cylinder {
+                height 0.022
+                radius 0.038
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.06
+            centerOfMass [
+              0 0 0
+            ]
+          }
+        }
+      }
+
+      # ========== FRONT LEFT WHEEL ==========
+      DEF FRONT_LEFT_WHEEL_JOINT HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor 0.14 0.14 0.038
+        }
+        device [
+          RotationalMotor {
+            name "front left wheel motor"
+            maxVelocity 70.0
+            maxTorque 20.0
+          }
+          PositionSensor {
+            name "front left wheel sensor"
+            resolution 0.00628
+          }
+        ]
+        endPoint Solid {
+          translation 0.14 0.14 0.038
+          rotation 0.707105 0 0.707109 -3.14159
+          children [
+            USE WHEEL_VIS
+          ]
+          name "front left wheel"
+          contactMaterial "MecanumWheel"
+          boundingObject Pose {
+            rotation 1 0 0 -1.5708
+            children [
+              Cylinder {
+                height 0.022
+                radius 0.038
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.06
+            centerOfMass [
+              0 0 0
+            ]
+          }
+        }
+      }
+
+      # ========== REAR RIGHT WHEEL ==========
+      DEF REAR_RIGHT_WHEEL_JOINT HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor -0.14 -0.14 0.038
+        }
+        device [
+          RotationalMotor {
+            name "rear right wheel motor"
+            maxVelocity 70.0
+            maxTorque 20.0
+          }
+          PositionSensor {
+            name "rear right wheel sensor"
+            resolution 0.00628
+          }
+        ]
+        endPoint Solid {
+          translation -0.14 -0.14 0.038
+          rotation 0 -1 0 1.570796
+          children [
+            USE WHEEL_VIS
+          ]
+          name "rear right wheel"
+          contactMaterial "MecanumWheel"
+          boundingObject Pose {
+            rotation 1 0 0 -1.5708
+            children [
+              Cylinder {
+                height 0.022
+                radius 0.038
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.06
+            centerOfMass [
+              0 0 0
+            ]
+          }
+        }
+      }
+
+      # ========== REAR LEFT WHEEL ==========
+      DEF REAR_LEFT_WHEEL_JOINT HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor -0.14 0.14 0.038
+        }
+        device [
+          RotationalMotor {
+            name "rear left wheel motor"
+            maxVelocity 70.0
+            maxTorque 20.0
+          }
+          PositionSensor {
+            name "rear left wheel sensor"
+            resolution 0.00628
+          }
+        ]
+        endPoint Solid {
+          translation -0.14 0.14 0.038
+          rotation 0.707105 0 0.707109 -3.14159
+          children [
+            USE WHEEL_VIS
+          ]
+          name "rear left wheel"
+          contactMaterial "MecanumWheel"
+          boundingObject Pose {
+            rotation 1 0 0 -1.5708
+            children [
+              Cylinder {
+                height 0.022
+                radius 0.038
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.06
+            centerOfMass [
+              0 0 0
+            ]
+          }
+        }
+      }
+
+      # ========== IMU SENSORS ==========
+      Accelerometer {
+        translation 0 0 0.10
+        name "accelerometer"
+      }
+      Gyro {
+        translation 0 0 0.10
+        name "gyro"
+      }
+      Compass {
+        translation 0 0 0.10
+        name "compass"
+      }
+
+      # ========== GPS ==========
+      GPS {
+        translation 0 0 0.17
+        name "gps"
+      }
+
+      # ========== RECEIVER ==========
+      Receiver {
+        name "receiver"
+        channel 1
+      }
+
+      # ========== EMITTER ==========
+      Emitter {
+        name "emitter"
+        channel 1
+        range 50.0
+      }
+    ]
+
+    # ========== BOUNDING OBJECT ==========
+    boundingObject Group {
+      children [
+        # Chassis box
+        Transform {
+          translation 0 0 0.05
+          children [
+            Box {
+              size 0.32 0.16 0.06
+            }
+          ]
+        }
+        # Body box
+        Transform {
+          translation 0 0 0.11
+          children [
+            Box {
+              size 0.30 0.16 0.08
+            }
+          ]
+        }
+      ]
+    }
+
+    # ========== PHYSICS ==========
+    physics Physics {
+      density -1
+      mass 5.0
+      centerOfMass [
+        0 0 0.03
+      ]
+    }
+  }
+}
@@ -0,0 +1,7 @@
+make[1]: Entering directory '/run/host/home/johnnyf/Documents/Projects/TIR/project'
+make DRIVE=differential WORLD=field
+make[2]: Entering directory '/run/host/home/johnnyf/Documents/Projects/TIR/project'
+python -m training.eval --policy training/runs/rl_differential_field \
+	--max-flock 10 --max-steps 15000 --n-seeds 10 \
+	--drive-mode differential --world field
+make[2]: Leaving directory '/run/host/home/johnnyf/Documents/Projects/TIR/project'
@@ -0,0 +1,8 @@
+"""Pytest configuration — ensure the project root is on ``sys.path``."""
+
+import os
+import sys
+
+_PROJECT_ROOT = os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
+if _PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, _PROJECT_ROOT)
@@ -0,0 +1,188 @@
+"""Control primitives: speed modulation, Strömbom, Sequential, ActiveScan."""
+
+import math
+
+import pytest
+
+from herding.control.active_scan import (
+    EMPTY_DEBOUNCE_STEPS, INITIAL_SCAN_STEPS, ActiveScanTeacher,
+)
+from herding.control.modulation import (
+    MIN_SPEED, SLOW_NEAR_SHEEP, modulate_speed_near_sheep,
+)
+from herding.control.sequential import compute_action as sequential_action
+from herding.control.strombom import (
+    DELTA_DRIVE, F_FACTOR, compute_action as strombom_action,
+)
+from herding.control.universal import compute_action as universal_action
+from herding.world.geometry import PEN_ENTRY
+
+
+# ---------------------------------------------------------------------------
+# Modulation
+# ---------------------------------------------------------------------------
+
+def test_modulation_empty_input_passthrough():
+    assert modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), []) == (1.0, 0.0)
+    assert modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), {}) == (1.0, 0.0)
+
+
+def test_modulation_far_sheep_passthrough():
+    vx, vy = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), [(100.0, 0.0)])
+    assert (vx, vy) == (1.0, 0.0)
+
+
+def test_modulation_close_sheep_min_speed():
+    vx, vy = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0), [(0.0, 0.0)])
+    assert math.isclose(vx, MIN_SPEED)
+    assert vy == 0.0
+
+
+def test_modulation_preserves_direction():
+    vx, vy = modulate_speed_near_sheep(0.6, 0.8, (0.0, 0.0), [(1.0, 0.0)])
+    ratio = math.hypot(vx, vy)
+    # Direction preserved.
+    assert math.isclose(vx / ratio, 0.6, abs_tol=1e-6)
+    assert math.isclose(vy / ratio, 0.8, abs_tol=1e-6)
+
+
+def test_modulation_linear_ramp_midpoint():
+    vx, _ = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0),
+                                      [(SLOW_NEAR_SHEEP / 2, 0.0)])
+    expected = MIN_SPEED + (1.0 - MIN_SPEED) * 0.5
+    assert math.isclose(vx, expected, abs_tol=1e-6)
+
+
+def test_modulation_accepts_dict_input():
+    vx_list, _ = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0),
+                                           [(1.0, 0.0)])
+    vx_dict, _ = modulate_speed_near_sheep(1.0, 0.0, (0.0, 0.0),
+                                           {"t0": (1.0, 0.0)})
+    assert math.isclose(vx_list, vx_dict)
+
+
+# ---------------------------------------------------------------------------
+# Strömbom
+# ---------------------------------------------------------------------------
+
+def test_strombom_empty_input_idle():
+    vx, vy, mode = strombom_action((0.0, 0.0), {}, PEN_ENTRY)
+    assert (vx, vy, mode) == (0.0, 0.0, "idle")
+
+
+def test_strombom_tight_flock_drives():
+    # A tight 3-sheep cluster centred at (0, 8): radius < F_FACTOR·√3.
+    sheep = {"s0": (0.0, 8.0), "s1": (0.5, 8.5), "s2": (-0.5, 8.0)}
+    vx, vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
+    assert mode == "drive"
+    assert math.isclose(math.hypot(vx, vy), 1.0, abs_tol=1e-3)
+
+
+def test_strombom_scattered_flock_collects():
+    # Sparse, max radius > F_FACTOR·√n.
+    sheep = {"s0": (10.0, 10.0), "s1": (-10.0, -10.0), "s2": (0.0, 0.0)}
+    _vx, _vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
+    assert mode == "collect"
+
+
+def test_strombom_ignores_already_penned_sheep():
+    """Sheep south of the gate plane are excluded from the active set."""
+    sheep = {
+        "s_active": (5.0, 5.0),
+        "s_penned": (11.5, -20.0),
+    }
+    # With one active sheep, Strömbom drives (radius = 0 < threshold).
+    _vx, _vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
+    assert mode == "drive"
+
+
+# ---------------------------------------------------------------------------
+# Sequential
+# ---------------------------------------------------------------------------
+
+def test_sequential_empty_input_idle():
+    vx, vy, mode = sequential_action((0.0, 0.0), {}, PEN_ENTRY)
+    assert (vx, vy, mode) == (0.0, 0.0, "idle")
+
+
+def test_sequential_targets_closest_to_pen():
+    near = (10.0, -5.0)       # closer to pen entry (11.5, -15)
+    far = (-10.0, 10.0)
+    sheep = {"near": near, "far": far}
+    _vx, _vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY)
+    assert mode.startswith("drive:near")
+
+
+# ---------------------------------------------------------------------------
+# ActiveScan wrapper
+# ---------------------------------------------------------------------------
+
+def test_active_scan_initial_phase_rotates():
+    teacher = ActiveScanTeacher(strombom_action)
+    # First call → opening rotation regardless of input.
+    vx, vy, omega, mode = teacher(
+        (0.0, 0.0), 0.0, {"s0": (5.0, 0.0)}, PEN_ENTRY)
+    assert mode == "scan_initial"
+    assert omega == 0.0
+    assert math.isclose(math.hypot(vx, vy), 1.0, abs_tol=1e-6)
+
+
+def test_active_scan_hands_off_to_base_after_opener():
+    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=2)
+    # Burn through the opener.
+    for _ in range(2):
+        teacher((0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
+    _vx, _vy, _omega, mode = teacher(
+        (0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
+    # Either drive (Strömbom mode label) or collect; not scan_initial.
+    assert "scan" not in mode
+
+
+def test_active_scan_holds_last_action_on_brief_empty():
+    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=1)
+    # Step once (opening), then once with a visible sheep — sets last_action.
+    teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
+    teacher((0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
+    last = teacher.last_action
+    # Now a single empty frame → hold.
+    vx, vy, _omega, mode = teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
+    assert mode == "hold"
+    assert (vx, vy) == last
+
+
+def test_active_scan_explores_after_sustained_empty():
+    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=1)
+    teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)  # opener
+    for _ in range(EMPTY_DEBOUNCE_STEPS):
+        last_vx, last_vy, _omega, mode = teacher(
+            (5.0, 5.0), 0.0, {}, PEN_ENTRY)
+    assert mode in ("explore", "scan_at_centre")
+
+
+def test_active_scan_preserves_mecanum_omega():
+    """Regression: ActiveScanTeacher must propagate omega from a mecanum
+    base teacher, not silently drop it. Without this, BC mecanum demos
+    have omega=0 everywhere and the policy never learns to rotate.
+    """
+    teacher = ActiveScanTeacher(universal_action, initial_scan_steps=1)
+    # Burn the opener so we exit phase 1.
+    teacher((0.0, 0.0), 0.0, {"s0": (8.0, 8.0)}, PEN_ENTRY,
+            drive_mode="mecanum")
+    # Place a sheep off to the side so the dog needs to face it.
+    # Dog at origin facing +x (heading=0); target at (0, 8) → desired
+    # heading +π/2, so omega should be positive.
+    vx, vy, omega, mode = teacher(
+        (0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY,
+        drive_mode="mecanum")
+    assert mode in ("collect", "drive", "recovery")
+    assert abs(omega) > 0.05, f"omega should be non-zero on mecanum, got {omega}"
+
+
+def test_active_scan_reset_clears_state():
+    teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=5)
+    for _ in range(3):
+        teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
+    assert teacher.step == 3
+    teacher.reset()
+    assert teacher.step == 0
+    assert teacher.empty_streak == 0
@@ -0,0 +1,192 @@
+"""Differential-drive and mecanum kinematics tests."""
+
+import math
+
+import pytest
+
+from herding.world.diffdrive import (
+    heading_speed_to_wheels, kinematics_step,
+    mecanum_inverse, mecanum_kinematics_step,
+    velocity_to_mecanum_wheels, velocity_to_wheels,
+)
+
+
+WHEEL_R = 0.038
+WHEEL_B = 0.28
+MAX_OMEGA = 70.0
+MAX_LINEAR = WHEEL_R * MAX_OMEGA
+DT = 0.016
+
+
+def test_kinematics_zero_input_is_identity():
+    x, y, h = kinematics_step(1.0, 2.0, 0.5, 0.0, 0.0, WHEEL_R, WHEEL_B, DT)
+    assert (x, y, h) == (1.0, 2.0, 0.5)
+
+
+def test_kinematics_forward_motion():
+    # Equal wheel speeds → pure translation along the heading.
+    x, y, h = kinematics_step(0.0, 0.0, 0.0, 10.0, 10.0, WHEEL_R, WHEEL_B, DT)
+    assert h == 0.0
+    assert math.isclose(x, 10.0 * WHEEL_R * DT)
+    assert y == 0.0
+
+
+def test_kinematics_pure_rotation():
+    # Opposite wheel speeds → pure rotation, position unchanged.
+    x, y, h = kinematics_step(0.0, 0.0, 0.0, -5.0, 5.0, WHEEL_R, WHEEL_B, DT)
+    assert (x, y) == (0.0, 0.0)
+    assert h > 0.0
+
+
+def test_kinematics_heading_wrapped_to_pi():
+    _, _, h = kinematics_step(0.0, 0.0, math.pi - 0.01, 100.0, -100.0,
+                              WHEEL_R, WHEEL_B, DT)
+    assert -math.pi <= h <= math.pi
+
+
+def test_velocity_to_wheels_zero_velocity():
+    left, right = velocity_to_wheels(0.0, 0.0, 0.0,
+                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA)
+    assert (left, right) == (0.0, 0.0)
+
+
+def test_velocity_to_wheels_aligned_forward():
+    # Target straight ahead → equal positive wheel speeds.
+    left, right = velocity_to_wheels(1.0, 0.0, 0.0,
+                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=4.0)
+    assert math.isclose(left, right, abs_tol=1e-6)
+    assert left > 0.0
+
+
+def test_velocity_to_wheels_perpendicular_target_spins():
+    # Target 90° from heading → forward speed ≈ 0, wheels equal-and-opposite.
+    left, right = velocity_to_wheels(0.0, 1.0, 0.0,
+                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=4.0)
+    assert left + right == pytest.approx(0.0, abs=1e-6)
+    assert right > 0.0  # turning CCW (left of heading is +y for h=0)
+
+
+def test_velocity_to_wheels_clamped_to_max_omega():
+    # Far overshoot — both wheel commands clamped at ±MAX_OMEGA.
+    left, right = velocity_to_wheels(-1.0, 0.0, 0.0,
+                                     MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=20.0)
+    assert -MAX_OMEGA <= left <= MAX_OMEGA
+    assert -MAX_OMEGA <= right <= MAX_OMEGA
+
+
+def test_heading_speed_to_wheels_aligned():
+    left, right = heading_speed_to_wheels(0.0, 10.0, 0.0, MAX_OMEGA)
+    assert math.isclose(left, right, abs_tol=1e-6)
+    assert left > 0.0
+
+
+def test_heading_speed_to_wheels_reverse_target_forwards_zero():
+    left, right = heading_speed_to_wheels(math.pi, 10.0, 0.0, MAX_OMEGA)
+    # cos(π) clamped at 0 → no forward; pure rotation.
+    assert left + right == pytest.approx(0.0, abs=1e-6)
+
+
+# ---------------------------------------------------------------------------
+# Mecanum kinematics tests
+# ---------------------------------------------------------------------------
+
+LX = 0.14   # half wheel_base_x
+LY = 0.14   # half wheel_base_y
+
+
+def test_mecanum_kinematics_zero_is_identity():
+    x, y, h = mecanum_kinematics_step(
+        1.0, 2.0, 0.5, 0.0, 0.0, 0.0, 0.0, WHEEL_R, LX, LY, DT,
+    )
+    assert (x, y, h) == (1.0, 2.0, 0.5)
+
+
+def test_mecanum_kinematics_pure_forward():
+    # All 4 wheels equal → pure forward (vx_body > 0, vy_body = 0).
+    w = 10.0
+    x, y, h = mecanum_kinematics_step(
+        0.0, 0.0, 0.0, w, w, w, w, WHEEL_R, LX, LY, DT,
+    )
+    assert h == pytest.approx(0.0, abs=1e-9)
+    assert y == pytest.approx(0.0, abs=1e-9)
+    assert math.isclose(x, w * WHEEL_R * DT, rel_tol=1e-6)
+
+
+def test_mecanum_kinematics_pure_strafe():
+    # Strafe right (positive vy_body) with zero forward:
+    # vx_body = (w_fl+w_fr+w_rl+w_rr)*r/4 = 0 → sum of wheels = 0
+    # vy_body = (-w_fl+w_fr+w_rl-w_rr)*r/4 > 0
+    # Use w_fl=-10, w_fr=10, w_rl=10, w_rr=-10.
+    w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, 10.0, -10.0
+    x, y, h = mecanum_kinematics_step(
+        0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
+    )
+    assert h == pytest.approx(0.0, abs=1e-9)
+    assert x == pytest.approx(0.0, abs=1e-9)
+    expected_vy = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
+    assert math.isclose(y, expected_vy * DT, rel_tol=1e-6)
+
+
+def test_mecanum_kinematics_pure_rotation():
+    # Pure rotation: vx_body=0, vy_body=0, omega>0.
+    # w_fl=-10, w_fr=10, w_rl=-10, w_rr=10 → all sums cancel except omega.
+    w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, -10.0, 10.0
+    x, y, h = mecanum_kinematics_step(
+        0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
+    )
+    assert x == pytest.approx(0.0, abs=1e-9)
+    assert y == pytest.approx(0.0, abs=1e-9)
+    assert h > 0.0
+
+
+def test_mecanum_inverse_roundtrip():
+    # Inverse → forward: pick desired body velocities, compute wheels,
+    # then verify forward kinematics recovers the same velocities.
+    vx_b = 0.5
+    vy_b = 0.3
+    omega = 0.2
+    w_fl, w_fr, w_rl, w_rr = mecanum_inverse(
+        vx_b, vy_b, omega, WHEEL_R, LX, LY, MAX_OMEGA,
+    )
+    vx_check = (w_fl + w_fr + w_rl + w_rr) * WHEEL_R / 4.0
+    vy_check = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
+    omega_check = (-w_fl + w_fr - w_rl + w_rr) * WHEEL_R / (4.0 * (LX + LY))
+    assert math.isclose(vx_b, vx_check, rel_tol=1e-6)
+    assert math.isclose(vy_b, vy_check, rel_tol=1e-6)
+    assert math.isclose(omega, omega_check, rel_tol=1e-6)
+
+
+def test_mecanum_inverse_clamped():
+    # Request an extreme velocity — all wheels should be clamped.
+    w_fl, w_fr, w_rl, w_rr = mecanum_inverse(
+        100.0, 100.0, 50.0, WHEEL_R, LX, LY, MAX_OMEGA,
+    )
+    assert max(abs(w_fl), abs(w_fr), abs(w_rl), abs(w_rr)) <= MAX_OMEGA
+
+
+def test_velocity_to_mecanum_wheels_zero():
+    result = velocity_to_mecanum_wheels(
+        0.0, 0.0, 0.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
+        wheel_base=WHEEL_B,
+    )
+    assert result == (0.0, 0.0, 0.0, 0.0)
+
+
+def test_velocity_to_mecanum_wheels_forward():
+    w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
+        1.0, 0.0, 0.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
+        wheel_base=WHEEL_B,
+    )
+    # All 4 wheels should be positive and roughly equal.
+    assert all(w > 0.0 for w in (w_fl, w_fr, w_rl, w_rr))
+    assert math.isclose(w_fl, w_rr, rel_tol=1e-6)
+    assert math.isclose(w_fr, w_rl, rel_tol=1e-6)
+
+
+def test_velocity_to_mecanum_wheels_clamped():
+    # Extreme input — all wheels within max.
+    ws = velocity_to_mecanum_wheels(
+        1.0, 1.0, 1.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
+        wheel_base=WHEEL_B,
+    )
+    assert all(abs(w) <= MAX_OMEGA for w in ws)
@@ -0,0 +1,116 @@
+"""Gymnasium env: contract, determinism, reward components."""
+
+import math
+
+import numpy as np
+import pytest
+
+from herding.world.geometry import MAX_SHEEP, PEN_ENTRY
+from herding.perception.obs import OBS_DIM
+from herding.control.strombom import compute_action as strombom_action
+from training.herding_env import HerdingEnv
+
+
+def test_env_obs_action_shapes_single_frame():
+    env = HerdingEnv(n_sheep=3, seed=0, use_lidar=False)
+    obs, info = env.reset()
+    assert obs.shape == (OBS_DIM,)
+    assert obs.dtype == np.float32
+    obs, reward, term, trunc, info = env.step(
+        np.array([0.5, 0.0], dtype=np.float32))
+    assert obs.shape == (OBS_DIM,)
+    assert isinstance(reward, float)
+    assert isinstance(term, bool) and isinstance(trunc, bool)
+
+
+def test_env_observation_space_matches_frame_stack():
+    env = HerdingEnv(n_sheep=2, seed=0, use_lidar=False, frame_stack=4)
+    obs, _ = env.reset()
+    assert obs.shape == (OBS_DIM * 4,)
+    assert env.observation_space.shape == (OBS_DIM * 4,)
+
+
+def test_env_reset_determinism_same_seed():
+    a = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
+    b = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
+    obs_a, _ = a.reset(seed=42)
+    obs_b, _ = b.reset(seed=42)
+    assert np.allclose(obs_a, obs_b)
+
+
+def test_env_constructor_seed_applies_to_first_reset():
+    a = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
+    b = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
+    obs_a, _ = a.reset()
+    obs_b, _ = b.reset()
+    assert np.allclose(obs_a, obs_b)
+
+
+def test_env_curriculum_samples_full_range():
+    env = HerdingEnv(seed=0, use_lidar=False)
+    sizes = set()
+    for _ in range(40):
+        _, info = env.reset()
+        sizes.add(info["n_sheep"])
+    assert 1 in sizes
+    assert max(sizes) <= MAX_SHEEP
+
+
+def test_env_step_returns_finite_values():
+    env = HerdingEnv(n_sheep=2, max_steps=200, seed=1, use_lidar=False)
+    obs, _ = env.reset()
+    for _ in range(200):
+        action = np.array([0.5, 0.5], dtype=np.float32)
+        obs, reward, term, trunc, _ = env.step(action)
+        assert np.isfinite(obs).all()
+        assert math.isfinite(reward)
+        if term or trunc:
+            break
+
+
+def test_env_options_n_sheep_overrides_curriculum():
+    env = HerdingEnv(seed=0, use_lidar=False)
+    _, info = env.reset(options={"n_sheep": 7})
+    assert info["n_sheep"] == 7
+
+
+def test_env_perceived_positions_lidar_vs_privileged():
+    env_priv = HerdingEnv(n_sheep=3, seed=0, use_lidar=False)
+    env_priv.reset(seed=0)
+    pos_priv = env_priv.perceived_positions()
+    assert len(pos_priv) == 3
+
+    env_lidar = HerdingEnv(n_sheep=3, seed=0, use_lidar=True)
+    env_lidar.reset(seed=0)
+    pos_lidar = env_lidar.perceived_positions()
+    # LiDAR mode returns whatever the tracker has — may be fewer than 3
+    # if sheep are out of FOV / range, but never more.
+    assert len(pos_lidar) <= 3
+
+
+def test_env_set_time_weight_affects_reward():
+    env = HerdingEnv(n_sheep=1, seed=0, use_lidar=False)
+    env.reset(seed=0)
+    _, r_default, *_ = env.step(np.array([0.0, 0.0], dtype=np.float32))
+    env.set_time_weight(-1.0)
+    env.reset(seed=0)
+    _, r_penalised, *_ = env.step(np.array([0.0, 0.0], dtype=np.float32))
+    assert r_penalised < r_default
+
+
+def test_env_strombom_rollout_moves_dog():
+    env = HerdingEnv(n_sheep=2, max_steps=400, seed=1, use_lidar=False)
+    env.reset()
+    start = (env.dog_x, env.dog_y)
+    for _ in range(400):
+        positions = env.perceived_positions()
+        if not positions:
+            break
+        vx, vy, _ = strombom_action(
+            (env.dog_x, env.dog_y), positions, PEN_ENTRY)
+        obs, _r, term, trunc, _ = env.step(
+            np.array([vx, vy], dtype=np.float32))
+        if term or trunc:
+            break
+    displacement = math.hypot(env.dog_x - start[0], env.dog_y - start[1])
+    assert displacement > 0.05
@@ -0,0 +1,75 @@
+"""Geometric predicates and constants."""
+
+import math
+
+from herding.world.geometry import (
+    FIELD_X, FIELD_Y, GATE_X, GATE_Y, MAX_SHEEP, PEN_ENTRY, PEN_X, PEN_Y,
+    distance_to_pen_entry, in_field, in_gate_corridor, in_pen,
+    is_penned_position,
+)
+
+
+def test_field_dimensions():
+    assert FIELD_X == (-15.0, 15.0)
+    assert FIELD_Y == (-15.0, 15.0)
+
+
+def test_pen_geometry():
+    assert PEN_X == (10.0, 13.0)
+    assert PEN_Y == (-22.0, -15.0)
+    assert PEN_ENTRY == (11.5, -15.0)
+    assert GATE_X == PEN_X
+    assert GATE_Y == -15.0
+
+
+def test_in_pen_strict_interior():
+    assert in_pen(11.5, -18.0)
+    assert not in_pen(10.0, -18.0)        # boundary excluded
+    assert not in_pen(11.5, -15.0)        # gate plane excluded
+    assert not in_pen(0.0, 0.0)
+
+
+def test_in_field_with_margin():
+    assert in_field(0.0, 0.0)
+    assert in_field(14.0, 14.0)
+    assert not in_field(15.5, 0.0)
+    assert in_field(14.4, 0.0, margin=0.5)
+    assert not in_field(14.6, 0.0, margin=0.5)
+
+
+def test_in_gate_corridor():
+    assert in_gate_corridor(11.5, -18.0)
+    assert in_gate_corridor(10.0, -15.0)
+    assert not in_gate_corridor(11.5, -10.0)
+    assert not in_gate_corridor(5.0, -18.0)
+
+
+def test_is_penned_position_latches_below_gate():
+    # In the gate column and south of the gate plane → penned.
+    assert is_penned_position(11.5, -15.0)
+    assert is_penned_position(10.5, -18.0)
+    assert is_penned_position(12.5, -22.0)
+    # Above the gate plane → not yet.
+    assert not is_penned_position(11.5, -14.9)
+    # Outside the gate column → not penned even if south.
+    assert not is_penned_position(0.0, -16.0)
+    assert not is_penned_position(14.0, -16.0)
+
+
+def test_is_penned_position_latch_margin():
+    # Slight tolerance on the gate column.
+    assert is_penned_position(9.9, -15.5)
+    assert is_penned_position(13.1, -15.5)
+    assert not is_penned_position(9.7, -15.5)
+
+
+def test_distance_to_pen_entry():
+    assert distance_to_pen_entry(*PEN_ENTRY) == 0.0
+    assert math.isclose(distance_to_pen_entry(11.5, -10.0), 5.0)
+    assert math.isclose(distance_to_pen_entry(0.0, 0.0),
+                        math.hypot(11.5, 15.0))
+
+
+def test_max_sheep_positive_int():
+    assert isinstance(MAX_SHEEP, int)
+    assert MAX_SHEEP >= 1
@@ -0,0 +1,71 @@
+"""Observation builder — shape, normalisation, order invariance."""
+
+import math
+
+import numpy as np
+import pytest
+
+from herding.perception.obs import OBS_DIM, build_obs
+
+
+def test_obs_shape_and_dtype():
+    obs = build_obs((0.0, 0.0), 0.0, [(5.0, 5.0)], [False])
+    assert obs.shape == (OBS_DIM,)
+    assert obs.dtype == np.float32
+
+
+def test_obs_no_active_sheep_terminal():
+    # All sheep penned → flock-summary fields zero, count zero.
+    obs = build_obs((0.0, 0.0), 0.0, [(1.0, 1.0), (2.0, 2.0)], [True, True])
+    assert obs[19] == 0.0
+    # Aggregate fields (CoM, radius, std, vectors) should all be zero.
+    assert np.allclose(obs[4:12], 0.0)
+
+
+def test_obs_dog_pose_normalised():
+    obs = build_obs((15.0, -15.0), math.pi / 2, [(0.0, 0.0)], [False])
+    assert math.isclose(obs[0], 1.0)
+    assert math.isclose(obs[1], -1.0)
+    assert math.isclose(obs[2], math.cos(math.pi / 2), abs_tol=1e-6)
+    assert math.isclose(obs[3], math.sin(math.pi / 2), abs_tol=1e-6)
+
+
+def test_obs_order_invariance():
+    """Sheep order in the input list must not affect the observation."""
+    sheep = [(3.0, 2.0), (-5.0, 1.0), (0.0, 8.0)]
+    p = [False] * 3
+    a = build_obs((0.0, 0.0), 0.0, sheep, p)
+    b = build_obs((0.0, 0.0), 0.0, list(reversed(sheep)), list(reversed(p)))
+    assert np.allclose(a, b)
+
+
+def test_obs_count_field_normalised_by_n_max():
+    sheep = [(1.0, 1.0)] * 5
+    p = [False] * 5
+    obs = build_obs((0.0, 0.0), 0.0, sheep, p, n_max=10)
+    assert math.isclose(obs[19], 0.5)
+
+
+def test_obs_polar_histogram_sums_to_one():
+    sheep = [(1.0, 0.0), (-1.0, 0.0), (0.0, 1.0), (0.0, -1.0)]
+    obs = build_obs((0.0, 0.0), 0.0, sheep, [False] * 4)
+    assert math.isclose(float(obs[20:28].sum()), 1.0, abs_tol=1e-6)
+
+
+def test_obs_named_channels_closest_rearmost():
+    # Channels 28..29 = (closest_to_pen - dog) / 15
+    # Channels 30..31 = (rearmost - dog) / 15
+    pen_x, pen_y = 11.5, -15.0
+    near = (pen_x + 1.0, pen_y + 1.0)
+    far = (-10.0, 10.0)
+    obs = build_obs((0.0, 0.0), 0.0, [near, far], [False, False])
+    tol = 1e-5
+    assert math.isclose(obs[28], near[0] / 15.0, abs_tol=tol)
+    assert math.isclose(obs[29], near[1] / 15.0, abs_tol=tol)
+    assert math.isclose(obs[30], far[0] / 15.0, abs_tol=tol)
+    assert math.isclose(obs[31], far[1] / 15.0, abs_tol=tol)
+
+
+def test_obs_pen_vector_zero_at_pen_entry():
+    obs = build_obs((11.5, -15.0), 0.0, [(0.0, 0.0)], [False])
+    assert math.isclose(obs[14], 0.0)         # distance to pen
@@ -0,0 +1,166 @@
+"""LiDAR simulation + perception pipeline + multi-target tracker."""
+
+import math
+
+import numpy as np
+import pytest
+
+from herding.perception.lidar_perception import (
+    STATIC_REJECT, detections_from_scan,
+)
+from herding.perception.lidar_sim import (
+    LIDAR_MAX_RANGE, LIDAR_N_RAYS, SHEEP_RADIUS, ray_angles, simulate_scan,
+)
+from herding.perception.sheep_tracker import (
+    FORGET_STEPS, GATE_M, MAX_ACTIVE_TRACKS, REACQUIRE_GATE_M,
+    REACQUIRE_MIN_AGE, SheepTracker,
+)
+
+
+# ---------------------------------------------------------------------------
+# Sim
+# ---------------------------------------------------------------------------
+
+def test_simulate_scan_shape_and_dtype():
+    ranges = simulate_scan(0.0, 0.0, 0.0, [(5.0, 0.0)], noise=0.0)
+    assert ranges.shape == (LIDAR_N_RAYS,)
+    assert ranges.dtype == np.float32
+
+
+def test_simulate_scan_no_sheep_far_from_walls():
+    # Dog at origin, no sheep, walls all ≥ 15 m away → all rays at max.
+    ranges = simulate_scan(0.0, 0.0, 0.0, [], noise=0.0)
+    # Walls (east/west at ±15) are beyond LIDAR_MAX_RANGE=12, so no hits.
+    assert (ranges == LIDAR_MAX_RANGE).all()
+
+
+def test_simulate_scan_sheep_in_front_returns_centre_hit():
+    # Sheep dead ahead at 5 m. Centre ray should hit ~ 5 - SHEEP_RADIUS.
+    ranges = simulate_scan(0.0, 0.0, 0.0, [(5.0, 0.0)], noise=0.0)
+    centre = ranges[LIDAR_N_RAYS // 2]
+    assert math.isclose(float(centre), 5.0 - SHEEP_RADIUS, abs_tol=0.01)
+
+
+def test_simulate_scan_sheep_behind_dog_not_hit():
+    # With 360° FOV, a sheep behind the dog IS now hit.
+    ranges = simulate_scan(0.0, 0.0, 0.0, [(-5.0, 0.0)], noise=0.0)
+    assert (ranges < LIDAR_MAX_RANGE).any()
+    # Verify the closest hit is near 5m (sheep at distance 5).
+    assert float(ranges.min()) < 5.3
+
+
+def test_simulate_scan_wall_hit():
+    # Dog 1 m south of the north wall, facing north → centre ray ≈ 1 m.
+    ranges = simulate_scan(0.0, 14.0, math.pi / 2, [], noise=0.0)
+    centre = ranges[LIDAR_N_RAYS // 2]
+    assert math.isclose(float(centre), 1.0, abs_tol=0.01)
+
+
+# ---------------------------------------------------------------------------
+# Perception
+# ---------------------------------------------------------------------------
+
+def test_detections_recover_sheep_position():
+    sheep = [(5.0, 0.0), (3.0, 1.0)]
+    ranges = simulate_scan(0.0, 0.0, 0.0, sheep, noise=0.0)
+    det = detections_from_scan(ranges, 0.0, 0.0, 0.0)
+    assert len(det) == 2
+    # Centroid bias is corrected to within ~5 cm.
+    for truth in sheep:
+        assert any(math.hypot(d[0] - truth[0], d[1] - truth[1]) < 0.1
+                   for d in det)
+
+
+def test_detections_filter_gate_post():
+    # An empty scene at the dog right next to a gate post produces no
+    # detections — the static-feature filter drops the post return.
+    ranges = simulate_scan(11.5, -10.0, -math.pi / 2, [], noise=0.0)
+    det = detections_from_scan(ranges, 11.5, -10.0, -math.pi / 2)
+    for cx, cy in det:
+        assert math.hypot(cx - 10.0, cy + 15.0) > STATIC_REJECT
+        assert math.hypot(cx - 13.0, cy + 15.0) > STATIC_REJECT
+
+
+def test_detections_empty_scan_returns_nothing():
+    assert detections_from_scan(np.array([], dtype=np.float32),
+                                0.0, 0.0, 0.0) == []
+
+
+# ---------------------------------------------------------------------------
+# Tracker
+# ---------------------------------------------------------------------------
+
+def test_tracker_creates_track_for_new_detection():
+    t = SheepTracker()
+    t.update([(5.0, 0.0)])
+    assert t.n_active() == 1
+
+
+def test_tracker_associates_close_detections():
+    """A small movement within the gate keeps the same track."""
+    t = SheepTracker()
+    t.update([(5.0, 0.0)])
+    t.update([(5.5, 0.0)])
+    assert t.n_active() == 1
+
+
+def test_tracker_spawns_new_track_far_detection():
+    t = SheepTracker()
+    t.update([(5.0, 0.0)])
+    t.update([(-5.0, 0.0)])           # well outside the gate
+    assert t.n_active() == 2
+
+
+def test_tracker_reacquisition_for_stale_track():
+    """A stale track within the wider re-acquisition gate rebinds rather
+    than spawning a duplicate."""
+    t = SheepTracker()
+    t.update([(0.0, 0.0)])
+    # Let it go stale.
+    for _ in range(REACQUIRE_MIN_AGE):
+        t.update([])
+    # Re-emerges within REACQUIRE_GATE but outside the primary GATE.
+    offset = (GATE_M + REACQUIRE_GATE_M) / 2.0
+    t.update([(offset, 0.0)])
+    assert t.n_active() == 1
+
+
+def test_tracker_forgets_stale_tracks():
+    t = SheepTracker()
+    t.update([(0.0, 0.0)])
+    for _ in range(FORGET_STEPS + 1):
+        t.update([])
+    assert t.n_active() == 0
+
+
+def test_tracker_penned_position_promotes_track():
+    t = SheepTracker()
+    t.update([(11.5, -16.0)])         # spawn inside the pen column
+    # is_penned_position is True for this point.
+    assert t.n_penned() == 1
+    assert t.n_active() == 0
+
+
+def test_tracker_penned_tracks_persist():
+    t = SheepTracker()
+    t.update([(11.5, -16.0)])
+    for _ in range(FORGET_STEPS * 2):
+        t.update([])
+    # Penned tracks are not forgotten.
+    assert t.n_penned() == 1
+
+
+def test_tracker_caps_active_set():
+    t = SheepTracker()
+    # Spawn more than the cap, each well outside the others' gates.
+    for k in range(MAX_ACTIVE_TRACKS + 5):
+        t.update([(k * (GATE_M + 1.0), 0.0)])
+    assert t.n_active() <= MAX_ACTIVE_TRACKS
+
+
+def test_tracker_reset_clears_state():
+    t = SheepTracker()
+    t.update([(0.0, 0.0)])
+    t.reset()
+    assert t.n_active() == 0
+    assert t.step == 0
@@ -0,0 +1,84 @@
+"""Benchmark LiDAR perception improvements.
+
+Measures success rate, mean steps, and tracker quality metrics for
+demo collection across multiple seeds. Compares configurations.
+
+Usage::
+
+    python -m tools.benchmark_lidar --n-sheep 5 --seeds 15
+    HERDING_WORLD=field_round python -m tools.benchmark_lidar --n-sheep 5
+"""
+
+from __future__ import annotations
+
+import argparse
+import time
+from collections import Counter
+
+from training.bc.collect import collect_one
+from herding.control.universal import compute_action
+
+
+def run_benchmark(n_sheep: int, n_seeds: int, max_steps: int = 100000,
+                  drive_mode: str = "differential"):
+    results = []
+    t0 = time.time()
+    for seed in range(n_seeds):
+        obs, actions, success, steps = collect_one(
+            n_sheep, seed, max_steps, 5, compute_action,
+            frame_stack=1, privileged=False, drive_mode=drive_mode,
+        )
+        results.append({
+            "seed": seed,
+            "success": success,
+            "steps": steps,
+            "logged": len(obs),
+        })
+        tag = "+" if success else "x"
+        print(f"  [{tag}] seed={seed:>2d}  steps={steps:>6d}")
+    elapsed = time.time() - t0
+
+    successes = [r for r in results if r["success"]]
+    failures = [r for r in results if not r["success"]]
+    n_ok = len(successes)
+    rate = 100.0 * n_ok / len(results)
+
+    mean_steps_ok = (sum(r["steps"] for r in successes) / n_ok) if n_ok else 0
+    mean_steps_all = sum(r["steps"] for r in results) / len(results)
+
+    print(f"\n  Results: {n_ok}/{len(results)} success ({rate:.0f}%)")
+    print(f"  Mean steps (success): {mean_steps_ok:>8.0f}")
+    print(f"  Mean steps (all):     {mean_steps_all:>8.0f}")
+    print(f"  Elapsed: {elapsed:.0f}s")
+    return {
+        "n_sheep": n_sheep,
+        "n_seeds": n_seeds,
+        "success_rate": rate,
+        "n_success": n_ok,
+        "mean_steps_success": mean_steps_ok,
+        "mean_steps_all": mean_steps_all,
+        "elapsed_s": elapsed,
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--n-sheep", type=int, default=5)
+    parser.add_argument("--seeds", type=int, default=15)
+    parser.add_argument("--max-steps", type=int, default=100000)
+    parser.add_argument("--drive-mode", default="differential",
+                        choices=["differential", "mecanum"])
+    args = parser.parse_args()
+
+    from herding.world.geometry import FIELD_SHAPE
+    print(f"[bench] world={FIELD_SHAPE}  n_sheep={args.n_sheep}  "
+          f"seeds={args.seeds}  drive={args.drive_mode}")
+    print()
+    result = run_benchmark(args.n_sheep, args.seeds, args.max_steps,
+                           args.drive_mode)
+    print()
+    print("[bench] summary:", result)
+
+
+if __name__ == "__main__":
+    main()
@@ -1,22 +0,0 @@
-"""
-Viewpoint inspector — prints position, orientation and FOV to the console
-once per second.  Attach as the controller of a dummy supervisor robot to
-copy-paste exact camera values into field.wbt.
-"""
-
-from controller import Supervisor
-
-robot    = Supervisor()
-timestep = int(robot.getBasicTimeStep())
-vp       = robot.getFromDef("VIEWPOINT")
-
-step = 0
-while robot.step(timestep) != -1:
-    if step % 60 == 0:
-        pos = vp.getField("position").getSFVec3f()
-        ori = vp.getField("orientation").getSFRotation()
-        fov = vp.getField("fieldOfView").getSFFloat()
-        print(f"position:    {pos[0]:.3f} {pos[1]:.3f} {pos[2]:.3f}")
-        print(f"orientation: {ori[0]:.3f} {ori[1]:.3f} {ori[2]:.3f} {ori[3]:.3f}")
-        print(f"fieldOfView: {fov:.3f}\n")
-    step += 1
@@ -0,0 +1,174 @@
+#!/bin/bash
+# Launch Webots with N sheep enabled and the chosen controller mode.
+# Generates a temporary world file in worlds/field_test.wbt with sheep
+# beyond N commented out, sets the env vars the dog controller reads,
+# then execs Webots on it.
+#
+# Usage:
+#   tools/run_webots.sh [N] [MODE] [DRIVE] [WORLD]
+#     N     : number of active sheep (1..10), default 10
+#     MODE  : "bc" | "rl" | "strombom" | "sequential", default "bc"
+#     DRIVE : "differential" | "mecanum", default "differential"
+#     WORLD : base world name (without .wbt), default "field"
+#             Supported: "field" (rectangular), "field_round" (circular)
+#
+# Examples:
+#   tools/run_webots.sh 10 bc                     # behaviour-cloned MLP, diff drive
+#   tools/run_webots.sh 10 rl mecanum             # KL-PPO fine-tune, mecanum wheels
+#   tools/run_webots.sh 5 sequential field_round  # analytic baseline, round field
+#   tools/run_webots.sh 3 strombom mecanum field_round  # Strömbom, mecanum, round
+#
+# Notes:
+# * bc loads training/runs/bc/policy.zip, rl loads training/runs/rl.
+#   Override via HERDING_POLICY_DIR=/path/to/run env var.
+# * Conda env "tir" must be active (provides stable-baselines3 + torch).
+#
+# Headless-ish (no 3D view, fast sim, no modal dialogs):
+#   WEBOTS_HEADLESS=1 make webots N=10 MODE=rl DRIVE=mecanum
+#   WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
+# This passes --no-rendering --minimize --mode=fast --batch to webots.
+# Webots still needs a display (Qt); on a machine without one use e.g.:
+#   xvfb-run -a env WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
+# Optional extra CLI tokens (space-separated):
+#   WEBOTS_EXTRA_ARGS="--stdout --stderr" WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl
+
+set -e
+N=${1:-10}
+MODE=${2:-bc}
+DRIVE=${3:-differential}
+WORLD=${4:-field}
+
+if (( N < 1 || N > 10 )); then
+    echo "N must be 1..10, got $N" >&2; exit 1
+fi
+case "$MODE" in
+    bc|rl|strombom|sequential|universal) ;;
+    *) echo "MODE must be bc|rl|strombom|sequential|universal, got '$MODE'" >&2; exit 1 ;;
+esac
+case "$DRIVE" in
+    differential|mecanum) ;;
+    *) echo "DRIVE must be differential|mecanum, got '$DRIVE'" >&2; exit 1 ;;
+esac
+
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
+SRC="$ROOT/worlds/${WORLD}.wbt"
+if [[ ! -f "$SRC" ]]; then
+    echo "World file not found: $SRC" >&2; exit 1
+fi
+DST="$ROOT/worlds/${WORLD}_test.wbt"
+
+if [[ -n "${HERDING_POLICY_DIR:-}" ]]; then
+    RESOLVED_POLICY_DIR="$HERDING_POLICY_DIR"
+else
+    # Try drive-mode-specific path first, then legacy path.
+    if [[ "$MODE" == "rl" ]]; then
+        DRIVED="$ROOT/training/runs/rl_${DRIVE}"
+        LEGACY="$ROOT/training/runs/rl"
+    else
+        DRIVED="$ROOT/training/runs/bc_${DRIVE}"
+        LEGACY="$ROOT/training/runs/bc"
+    fi
+    if [[ -d "$DRIVED" ]]; then
+        RESOLVED_POLICY_DIR="$DRIVED"
+    else
+        RESOLVED_POLICY_DIR="$LEGACY"
+    fi
+fi
+
+cp "$SRC" "$DST"
+
+# Swap robot proto based on drive mode.
+# Base worlds reference ShepherdDog (diff-drive). For mecanum we swap in
+# ShepherdDogMecanum and inject mecanum contact properties.
+if [[ "$DRIVE" == "mecanum" ]]; then
+    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
+    sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
+    # Inject mecanum contact properties after the existing contactProperties block.
+    python3 -c "
+import re, sys
+with open(sys.argv[1], 'r') as f:
+    txt = f.read()
+# Find the closing ']' of contactProperties and insert before it.
+mec = '''
+    ContactProperties {
+      material1 \"MecanumWheel\"
+      coulombFriction [
+        2
+      ]
+      bounce 0
+      forceDependentSlip [
+        10
+      ]
+      softCFM 0.0001
+    }'''
+# Insert before the first ']' that closes contactProperties [...]
+txt = re.sub(r'(contactProperties\s*\[[^\]]*)(\])', r'\1' + mec + r'\2', txt, count=1)
+with open(sys.argv[1], 'w') as f:
+    f.write(txt)
+" "$DST"
+fi
+
+# Comment out sheep N+1..10 by prefixing the matching Sheep { ... } line.
+for i in $(seq $((N+1)) 10); do
+    sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST"
+done
+
+active=$(grep -c '^Sheep' "$DST")
+echo "------------------------------------------------------------"
+echo "World      : $DST"
+echo "Mode       : $MODE"
+echo "Drive      : $DRIVE"
+echo "Sheep      : $active active"
+echo "Policy dir : $RESOLVED_POLICY_DIR"
+echo "------------------------------------------------------------"
+
+# Webots strips HERDING_* env vars from controller subprocesses in some
+# setups, so we also write a runtime config file the controller reads.
+cat > "$ROOT/herding_runtime.cfg" <<EOF
+HERDING_MODE=$MODE
+HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
+HERDING_DRIVE=$DRIVE
+HERDING_WORLD=$WORLD
+EOF
+
+export HERDING_MODE="$MODE"
+export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
+export HERDING_DRIVE="$DRIVE"
+export HERDING_WORLD="$WORLD"
+
+# The controller writes this sentinel when all GT sheep are penned. We
+# poll for it and kill Webots so the run finishes cleanly instead of
+# idling for minutes after the task is done.
+DONE_FILE="$ROOT/training/.run_done"
+mkdir -p "$(dirname "$DONE_FILE")"
+rm -f "$DONE_FILE"
+
+if [[ "${WEBOTS_HEADLESS:-}" == "1" ]]; then
+    echo "[run_webots] headless flags: --no-rendering --minimize --mode=fast --batch"
+    # shellcheck disable=SC2086
+    webots --no-rendering --minimize --mode=fast --batch ${WEBOTS_EXTRA_ARGS:-} "$DST" &
+else
+    # shellcheck disable=SC2086
+    webots ${WEBOTS_EXTRA_ARGS:-} "$DST" &
+fi
+WEBOTS_PID=$!
+
+cleanup() {
+    kill "$WEBOTS_PID" 2>/dev/null || true
+    wait "$WEBOTS_PID" 2>/dev/null || true
+    exit 0
+}
+trap cleanup INT TERM
+
+# Poll for the sentinel; bail when Webots exits on its own or when the
+# user closes the window.
+while kill -0 "$WEBOTS_PID" 2>/dev/null; do
+    if [[ -f "$DONE_FILE" ]]; then
+        echo "[run_webots] all sheep penned — closing Webots"
+        sleep 1                       # let the controller print its line
+        kill "$WEBOTS_PID" 2>/dev/null || true
+        break
+    fi
+    sleep 1
+done
+wait "$WEBOTS_PID" 2>/dev/null || true
@@ -0,0 +1,90 @@
+# Training and Evaluation Details
+
+This file is the command-level companion to the root README. It focuses
+on data collection, BC, PPO fine-tuning, evaluation flags, and generated
+artifacts; use the root README for the high-level architecture and
+Webots demo quick start.
+
+Two stages, strictly sequential:
+
+```
+sim demos (Strömbom on tracker output, K=4 frame stack)
+    │
+    ▼
+bc/pretrain.py  ──►  runs/bc   (Strömbom-imitated MLP)
+    │
+    ▼  KL-regularised PPO fine-tune
+    │
+runs/rl                        (deployed `rl` mode — beats BC and Strömbom)
+```
+
+## Files
+
+```
+herding_env.py     — Gymnasium env (LiDAR raycast + tracker by default)
+bc/pretrain.py     — MSE + cosine BC of (obs, action) demos into MlpPolicy
+rl/train.py       — KL-regularised PPO fine-tune of a BC checkpoint
+eval.py            — multi-seed analytic / learned policy comparison
+runs/              — checkpoints (whitelisted entries in top-level .gitignore)
+
+(Unit + integration tests live in the top-level ``tests/`` directory;
+run with ``python -m pytest tests/``.)
+```
+
+## End-to-end pipeline
+
+The simplest way to run everything is the Makefile at the project
+root: ``make`` does the full chain, ``make rl`` rebuilds whatever's
+needed up to that point, etc. The individual stages below are kept
+explicit for cases where you want to tune a single step.
+
+```bash
+# 1. Sim demos with the active-scan + Strömbom teacher under LiDAR
+#    perception. K=4 frame stack so the MLP has temporal context.
+python -m training.bc.collect --teacher strombom \
+    --out training/bc/demos.npz --seeds-per-n 15 --subsample 3 --frame-stack 4
+
+# 2. Behaviour-clone.
+python -m training.bc.pretrain --demos training/bc/demos.npz \
+    --out training/runs/bc --epochs 60 --net-arch 512,512
+
+# 3. KL-regularised PPO fine-tune of bc.
+python -m training.rl.train \
+    --bc training/runs/bc --out training/runs/rl \
+    --total-timesteps 1000000
+
+# 4. Multi-seed eval (env-side, fast).
+python -m training.eval --policy training/runs/rl \
+    --max-flock 10 --max-steps 15000 --n-seeds 10
+```
+
+`bc/pretrain.py` saves the **best-val_cos** snapshot, not the final
+epoch — multi-modal teachers make training noisy and the last epoch is
+often worse than an earlier one.
+
+`rl/train.py` loads BC weights into both a trainable policy and a
+frozen reference, fixes `log_std` small, and adds `β · KL(π‖π_ref)` to
+the loss so the policy can only move within a trust region around BC.
+See the file header for hyperparameter rationale.
+
+## Available analytic teachers
+
+| Name | What it does | Notes |
+|---|---|---|
+| `strombom` | Strömbom 2014 — collect when flock is scattered, drive CoM otherwise | Default; works for n=1–10 under tight cohesion |
+| `sequential` | Pick the sheep closest to the pen and drive only it | Alternative; needs loose-cohesion regime |
+
+Both are wrapped at demo-collection time in
+`herding/control/active_scan.py:ActiveScanTeacher`, which adds an
+opening in-place rotation, walk-to-centre when the LiDAR sees
+nothing, and near-sheep speed modulation (same modulation
+`herding/control/modulation.py` applies to every dog mode at
+inference).
+
+## Evaluating analytic teachers directly
+
+```
+python -m training.eval --policy strombom    --max-flock 10 --max-steps 15000 --n-seeds 10
+python -m training.eval --policy sequential  --max-flock 10 --max-steps 15000 --n-seeds 10
+```
+
@@ -0,0 +1,211 @@
+"""Collect (obs, action) demonstrations from an analytic teacher.
+
+Runs the chosen teacher across a grid of ``(n_sheep, seed)`` combos at
+full difficulty, logs every Nth ``(obs, action)`` pair, and saves
+successful trajectories to ``.npz`` for behaviour cloning. The teacher
+is wrapped in :class:`ActiveScanTeacher` by default so it operates on
+the same partial-obs view the student will have at deployment.
+
+Usage::
+
+    python -m training.bc.collect --teacher strombom \\
+        --out training/bc/demos.npz --frame-stack 4
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import time
+from pathlib import Path
+
+import numpy as np
+
+# Early CLI parse so we can configure geometry before heavy imports.
+# (argparse is used again below for the full parse; this is a lightweight
+# pre-pass that only reads --world.)
+_pre_argv = [a for a in os.sys.argv[1:]]
+_pre_world = None
+for i, a in enumerate(_pre_argv):
+    if a == "--world" and i + 1 < len(_pre_argv):
+        _pre_world = _pre_argv[i + 1]
+        break
+    if a.startswith("--world="):
+        _pre_world = a.split("=", 1)[1]
+        break
+if _pre_world is not None:
+    from herding.world.geometry import configure as _geo_configure
+    _geo_configure(_pre_world)
+    os.environ["HERDING_WORLD"] = _pre_world
+
+from herding.control.active_scan import ActiveScanTeacher
+from herding.world.geometry import PEN_ENTRY, FIELD_SHAPE
+from herding.control.sequential import compute_action as sequential_action
+from herding.control.strombom import compute_action as strombom_action
+from herding.control.universal import compute_action as universal_action
+from training.herding_env import HerdingEnv
+
+
+TEACHERS = {
+    "sequential": sequential_action,
+    "strombom": strombom_action,
+    "universal": universal_action,
+}
+
+
+def _call_teacher(fn, dog_xy, dog_heading, sheep_positions, pen_target,
+                  drive_mode="differential"):
+    """Call any teacher function and return (vx, vy, omega, mode).
+
+    Normalizes across 3-tuple teachers (vx, vy, mode) and 4-tuple
+    universal teacher (vx, vy, omega, mode).  ActiveScanTeacher (when
+    invoked with drive_mode="mecanum") propagates the base teacher's
+    omega — see test_active_scan_preserves_mecanum_omega.
+    """
+    # The universal teacher and ActiveScanTeacher accept the extended
+    # (dog_xy, heading, sheep, pen, drive_mode) signature.  Older
+    # teachers accept (dog_xy, sheep, pen).  Detect by trying the
+    # extended call first.
+    try:
+        result = fn(dog_xy, dog_heading, sheep_positions, pen_target,
+                    drive_mode)
+    except TypeError:
+        try:
+            result = fn(dog_xy, dog_heading, sheep_positions, pen_target)
+        except TypeError:
+            result = fn(dog_xy, sheep_positions, pen_target)
+
+    if len(result) == 4:
+        return result  # (vx, vy, omega, mode)
+    vx, vy, mode = result
+    return vx, vy, 0.0, mode
+
+
+def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
+                teacher_fn, frame_stack: int = 1, privileged: bool = False,
+                drive_mode: str = "differential"):
+    env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
+                    difficulty=1.0, seed=seed, frame_stack=frame_stack,
+                    drive_mode=drive_mode)
+    obs, _ = env.reset(seed=seed)
+    obs_list, action_list = [], []
+    scan_teacher = ActiveScanTeacher(teacher_fn)
+    for step in range(max_steps):
+        if privileged:
+            positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
+                         for i in range(env.n_sheep) if not env.sheep_penned[i]}
+            if not positions:
+                break
+            vx, vy, omega, _mode = _call_teacher(
+                teacher_fn, (env.dog_x, env.dog_y), env.dog_heading,
+                positions, PEN_ENTRY, drive_mode,
+            )
+        else:
+            positions = env.perceived_positions()
+            result = _call_teacher(
+                scan_teacher, (env.dog_x, env.dog_y), env.dog_heading,
+                positions, PEN_ENTRY, drive_mode,
+            )
+            vx, vy, omega, _mode = result
+        if drive_mode == "mecanum":
+            action = np.array([vx, vy, omega], dtype=np.float32)
+        else:
+            action = np.array([vx, vy], dtype=np.float32)
+        if step % subsample == 0:
+            obs_list.append(obs.copy())
+            action_list.append(action.copy())
+        obs, _r, term, trunc, _info = env.step(action)
+        if term or trunc:
+            break
+    success = bool(env.sheep_penned.all())
+    return (
+        np.asarray(obs_list, dtype=np.float32),
+        np.asarray(action_list, dtype=np.float32),
+        success,
+        env.steps,
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--out", default="training/bc/demos.npz")
+    parser.add_argument("--n-sheep-list", default="1,2,3,5,8,10")
+    parser.add_argument("--seeds-per-n", type=int, default=15)
+    parser.add_argument("--max-steps", type=int, default=30000)
+    parser.add_argument("--subsample", type=int, default=5,
+                        help="Keep every Nth (obs, action) pair.")
+    parser.add_argument("--keep-failures", action="store_true",
+                        help="Include partial-success trajectories. Default off.")
+    parser.add_argument("--teacher", default="universal",
+                        choices=list(TEACHERS.keys()),
+                        help="Which analytic teacher to demonstrate.")
+    parser.add_argument("--frame-stack", type=int, default=1,
+                        help="Concatenate the last K obs into a "
+                             "(32·K)-D vector for the policy.")
+    parser.add_argument("--privileged", action="store_true",
+                        help="Teacher reads ground truth instead of "
+                             "tracker output (asymmetric BC).")
+    parser.add_argument("--drive-mode", default="differential",
+                        choices=["differential", "mecanum"],
+                        help="Drive mode for the dog robot.")
+    parser.add_argument("--world", default=None,
+                        choices=["field", "field_round"],
+                        help="World shape. If not set, uses HERDING_WORLD "
+                             "env var or defaults to 'field'. Must be set "
+                             "before geometry is imported.")
+    args = parser.parse_args()
+
+    # Validate --world matches geometry (already configured by the
+    # early pre-parse above, or by HERDING_WORLD env var).
+    if args.world is not None and args.world != FIELD_SHAPE:
+        print(f"[demos] WARNING: --world={args.world} but geometry is "
+              f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
+
+    teacher_fn = TEACHERS[args.teacher]
+    print(f"[demos] teacher: {args.teacher}  world: {FIELD_SHAPE}")
+
+    n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
+    print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
+          f"max_steps={args.max_steps}, subsample={args.subsample}")
+
+    all_obs, all_actions, all_meta = [], [], []
+    t_start = time.time()
+    n_success = 0; n_total = 0
+
+    for n in n_sheep_list:
+        for seed in range(args.seeds_per_n):
+            obs, actions, success, total_steps = collect_one(
+                n, seed, args.max_steps, args.subsample, teacher_fn,
+                frame_stack=args.frame_stack, privileged=args.privileged,
+                drive_mode=args.drive_mode,
+            )
+            n_total += 1
+            if success:
+                n_success += 1
+            keep = success or args.keep_failures
+            if keep and len(obs) > 0:
+                all_obs.append(obs)
+                all_actions.append(actions)
+                all_meta.append((n, seed, len(obs), int(success), total_steps))
+            tag = "✓" if success else "✗"
+            print(f"  [{tag}] n={n:>2d} seed={seed:>2d}  steps={total_steps:>6d}  "
+                  f"logged={len(obs):>5d}")
+
+    if not all_obs:
+        raise RuntimeError("No trajectories kept — try --keep-failures.")
+
+    obs = np.concatenate(all_obs, axis=0)
+    actions = np.concatenate(all_actions, axis=0)
+    meta = np.array(all_meta, dtype=np.int32)
+
+    Path(args.out).parent.mkdir(parents=True, exist_ok=True)
+    np.savez(args.out, obs=obs, actions=actions, meta=meta)
+
+    elapsed = time.time() - t_start
+    print(f"\n=== {n_success}/{n_total} trajectories successful ({100*n_success/n_total:.0f}%) ===")
+    print(f"=== {len(obs)} transitions saved to {args.out} ===")
+    print(f"=== obs={obs.shape}, actions={actions.shape}, elapsed={elapsed:.0f}s ===")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,235 @@
+"""Behaviour cloning of an analytic teacher into an SB3 MlpPolicy.
+
+Trains the mean-action head against ``(obs, action)`` demos from
+``training.bc.collect`` using ``MSE + (1 − cos_sim)`` — the cosine
+term prevents collapse toward zero against unit-vector targets. The
+best-by-val_cos snapshot is restored at the end of training because
+multi-modal teachers make the last epoch unreliable.
+
+Output zip is loadable by ``PPO.load(...)`` and consumed by
+``HERDING_MODE=bc`` in the dog controller.
+
+Usage::
+
+    python -m training.bc.pretrain \\
+        --demos training/bc/demos.npz \\
+        --out training/runs/bc
+"""
+
+from __future__ import annotations
+
+import argparse
+import time
+from pathlib import Path
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, TensorDataset
+
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv
+
+from training.herding_env import HerdingEnv
+
+
+def build_model(net_arch_pi, net_arch_vf, log_std_init: float,
+                frame_stack: int = 1, drive_mode: str = "differential"):
+    """Build a fresh SB3 PPO solely as a vehicle for the policy weights.
+
+    PPO's training-loop plumbing isn't used during BC. ``frame_stack``
+    must match the demo file so the env's obs space agrees with the
+    recorded obs shape.
+    """
+    env = DummyVecEnv([lambda: HerdingEnv(frame_stack=frame_stack,
+                                          drive_mode=drive_mode)])
+    model = PPO(
+        "MlpPolicy", env,
+        policy_kwargs=dict(
+            net_arch=dict(pi=net_arch_pi, vf=net_arch_vf),
+            log_std_init=log_std_init,
+        ),
+        verbose=0,
+    )
+    return model, env
+
+
+def policy_forward_mean(policy, obs_batch):
+    """Return the deterministic mean action for an obs batch.
+
+    SB3's ActorCriticPolicy routes ``forward`` through a Distribution
+    wrapper; we replicate the underlying chain
+    ``extract_features → mlp_extractor → action_net``.
+    """
+    features = policy.extract_features(obs_batch)
+    pi_features = features[0] if isinstance(features, tuple) else features
+    latent_pi, _ = policy.mlp_extractor(pi_features)
+    return policy.action_net(latent_pi)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--demos", default="training/bc/demos.npz")
+    parser.add_argument("--out", default="training/runs/bc")
+    parser.add_argument("--epochs", type=int, default=60)
+    parser.add_argument("--batch-size", type=int, default=256)
+    parser.add_argument("--lr", type=float, default=1e-3)
+    parser.add_argument("--val-split", type=float, default=0.1)
+    parser.add_argument("--net-arch", default="256,256",
+                        help="Comma-separated hidden layer widths.")
+    parser.add_argument("--log-std-init", type=float, default=0.5)
+    parser.add_argument("--cos-weight", type=float, default=1.0,
+                        help="Weight of the (1 - cosine_similarity) loss "
+                             "term; balances against MSE.")
+    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument("--device", default="cpu")
+    parser.add_argument("--drive-mode", default=None,
+                        choices=["differential", "mecanum"],
+                        help="Drive mode. If not set, inferred from "
+                             "demo action dimension (2→differential, 3→mecanum).")
+    args = parser.parse_args()
+
+    torch.manual_seed(args.seed)
+    np.random.seed(args.seed)
+
+    # --- Load demos ---
+    print(f"[bc] loading demos from {args.demos}")
+    data = np.load(args.demos)
+    obs = data["obs"].astype(np.float32)
+    actions = data["actions"].astype(np.float32)
+    meta = data["meta"]
+    print(f"[bc] obs={obs.shape}  actions={actions.shape}  trajectories={len(meta)}")
+    if obs.size == 0:
+        raise RuntimeError("Empty demo file.")
+
+    a_norms = np.linalg.norm(actions, axis=1)
+    print(f"[bc] action L2 norm: mean={a_norms.mean():.3f}  "
+          f"min={a_norms.min():.3f}  max={a_norms.max():.3f}")
+
+    # --- Train/val split ---
+    n = len(obs)
+    perm = np.random.permutation(n)
+    n_val = int(n * args.val_split)
+    val_idx, train_idx = perm[:n_val], perm[n_val:]
+    print(f"[bc] train={len(train_idx)}  val={len(val_idx)}")
+
+    obs_t = torch.from_numpy(obs)
+    act_t = torch.from_numpy(actions)
+    train_loader = DataLoader(
+        TensorDataset(obs_t[train_idx], act_t[train_idx]),
+        batch_size=args.batch_size, shuffle=True,
+    )
+    val_loader = DataLoader(
+        TensorDataset(obs_t[val_idx], act_t[val_idx]),
+        batch_size=args.batch_size, shuffle=False,
+    )
+
+    net_arch_pi = [int(x) for x in args.net_arch.split(",")]
+    net_arch_vf = net_arch_pi[:]
+    # Frame stack is inferred from the demo obs dim.
+    obs_dim = obs.shape[1]
+    from herding.perception.obs import OBS_DIM as _SINGLE
+    if obs_dim % _SINGLE != 0:
+        raise RuntimeError(f"demo obs dim {obs_dim} is not a multiple of {_SINGLE}")
+    frame_stack = obs_dim // _SINGLE
+    if frame_stack > 1:
+        print(f"[bc] inferred frame_stack={frame_stack} from demo obs dim {obs_dim}")
+
+    # Infer drive mode from action dimension if not explicitly set.
+    action_dim = actions.shape[1]
+    if args.drive_mode is not None:
+        drive_mode = args.drive_mode
+    elif action_dim == 3:
+        drive_mode = "mecanum"
+    else:
+        drive_mode = "differential"
+    print(f"[bc] drive_mode={drive_mode} (action_dim={action_dim})")
+
+    model, _env = build_model(net_arch_pi, net_arch_vf, args.log_std_init,
+                              frame_stack=frame_stack, drive_mode=drive_mode)
+    policy = model.policy.to(args.device)
+    optimizer = optim.Adam(policy.parameters(), lr=args.lr)
+
+    # --- Train ---
+    print(f"[bc] training: epochs={args.epochs}  batch={args.batch_size}  "
+          f"lr={args.lr}  device={args.device}")
+    t_start = time.time()
+    best_val = float("inf")
+    best_cos = -1.0
+    best_state = None  # restored at the end so noisy last epochs don't win
+
+    def combined_loss(pred, target):
+        mse = nn.functional.mse_loss(pred, target)
+        p_norm = pred.norm(dim=1).clamp_min(1e-6)
+        t_norm = target.norm(dim=1).clamp_min(1e-6)
+        cos_sim = (pred * target).sum(dim=1) / (p_norm * t_norm)
+        cos_loss = (1.0 - cos_sim).mean()
+        return mse + args.cos_weight * cos_loss, mse.item(), cos_sim.mean().item()
+
+    for epoch in range(args.epochs):
+        policy.train()
+        train_loss_total, train_mse_total, train_cos_total, train_count = 0.0, 0.0, 0.0, 0
+        for ob_batch, act_batch in train_loader:
+            ob_batch = ob_batch.to(args.device)
+            act_batch = act_batch.to(args.device)
+            optimizer.zero_grad()
+            mean_action = policy_forward_mean(policy, ob_batch)
+            loss, mse_val, cos_val = combined_loss(mean_action, act_batch)
+            loss.backward()
+            optimizer.step()
+            bs = ob_batch.size(0)
+            train_loss_total += loss.item() * bs
+            train_mse_total += mse_val * bs
+            train_cos_total += cos_val * bs
+            train_count += bs
+        train_mse = train_mse_total / max(1, train_count)
+        train_cos = train_cos_total / max(1, train_count)
+
+        policy.eval()
+        val_total, val_count = 0.0, 0
+        cos_sim_total = 0.0
+        with torch.no_grad():
+            for ob_batch, act_batch in val_loader:
+                ob_batch = ob_batch.to(args.device)
+                act_batch = act_batch.to(args.device)
+                mean_action = policy_forward_mean(policy, ob_batch)
+                bs = ob_batch.size(0)
+                val_total += nn.functional.mse_loss(
+                    mean_action, act_batch, reduction="sum",
+                ).item()
+                m_norm = mean_action.norm(dim=1).clamp_min(1e-6)
+                a_norm = act_batch.norm(dim=1).clamp_min(1e-6)
+                cos = (mean_action * act_batch).sum(dim=1) / (m_norm * a_norm)
+                cos_sim_total += cos.sum().item()
+                val_count += bs
+        val_mse = val_total / max(1, val_count) / actions.shape[1]
+        cos_sim = cos_sim_total / max(1, val_count)
+        print(f"  epoch {epoch+1:>2d}/{args.epochs}  "
+              f"train_mse={train_mse:.4f}  train_cos={train_cos:+.3f}  "
+              f"val_mse={val_mse:.4f}  val_cos={cos_sim:+.3f}")
+        if val_mse < best_val:
+            best_val = val_mse
+        if cos_sim > best_cos:
+            best_cos = cos_sim
+            best_state = {k: v.detach().cpu().clone()
+                          for k, v in policy.state_dict().items()}
+
+    if best_state is not None:
+        policy.load_state_dict(best_state)
+        print(f"[bc] restored best-val_cos snapshot (cos={best_cos:.3f})")
+
+    elapsed = time.time() - t_start
+    print(f"[bc] done in {elapsed:.0f}s  best_val_mse={best_val:.4f}")
+
+    # --- Save ---
+    out_dir = Path(args.out)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    model.save(out_dir / "policy.zip")
+    print(f"[bc] saved policy to {out_dir / 'policy.zip'}")
+    print(f"\n[bc] verify with:  "
+          f"python -m training.eval --policy {out_dir}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,175 @@
+"""Env-side evaluation of analytic or learned policies.
+
+Reports success rate, mean steps and mean penned per flock size for
+``n_sheep ∈ 1..max_flock`` across ``--n-seeds`` seeds each.
+
+Usage::
+
+    python -m training.eval --policy training/runs/rl --n-seeds 10
+    python -m training.eval --policy strombom
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+from pathlib import Path
+from statistics import mean
+
+import numpy as np
+
+# Early CLI pre-parse for --world so geometry is configured before
+# other herding.* modules are imported.
+_pre_argv = [a for a in os.sys.argv[1:]]
+_pre_world = None
+for i, a in enumerate(_pre_argv):
+    if a == "--world" and i + 1 < len(_pre_argv):
+        _pre_world = _pre_argv[i + 1]
+        break
+    if a.startswith("--world="):
+        _pre_world = a.split("=", 1)[1]
+        break
+if _pre_world is not None:
+    from herding.world.geometry import configure as _geo_configure
+    _geo_configure(_pre_world)
+    os.environ["HERDING_WORLD"] = _pre_world
+
+from herding.world.geometry import MAX_SHEEP, PEN_ENTRY
+from herding.control.sequential import compute_action as sequential_action
+from herding.control.strombom import compute_action as strombom_action
+from training.herding_env import HerdingEnv
+
+
+def rollout(env: HerdingEnv, predict_fn, max_steps: int) -> dict:
+    obs, _ = env.reset()
+    for t in range(max_steps):
+        action = predict_fn(env, obs)
+        obs, _r, terminated, truncated, info = env.step(action)
+        if terminated or truncated:
+            return {
+                "success": bool(info.get("is_success", False)),
+                "steps": info.get("steps", t + 1),
+                "n_penned": info.get("n_penned", 0),
+            }
+    return {"success": False, "steps": max_steps,
+            "n_penned": int(env.sheep_penned.sum())}
+
+
+def make_analytic_predictor(action_fn, drive_mode: str = "differential"):
+    """Wrap an analytic teacher so it runs on the env's exposed
+    perception (tracker in LiDAR mode, GT in privileged mode)."""
+    def _predict(env, _obs):
+        positions = env.perceived_positions()
+        vx, vy, _mode = action_fn((env.dog_x, env.dog_y), positions, PEN_ENTRY)
+        if drive_mode == "mecanum":
+            return np.array([vx, vy, 0.0], dtype=np.float32)
+        return np.array([vx, vy], dtype=np.float32)
+    return _predict
+
+
+def make_strombom_predictor(drive_mode: str = "differential"):
+    return make_analytic_predictor(strombom_action, drive_mode)
+
+
+def make_policy_predictor(model, vecnorm):
+    def _predict(_env, obs):
+        obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
+        if vecnorm is not None:
+            obs_b = vecnorm.normalize_obs(obs_b)
+        action, _ = model.predict(obs_b, deterministic=True)
+        return action[0]
+    return _predict
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--policy", required=True,
+                        help="'strombom', 'sequential', or path to a "
+                             "policy directory / zip.")
+    parser.add_argument("--n-seeds", type=int, default=10)
+    parser.add_argument("--max-steps", type=int, default=5000)
+    parser.add_argument("--max-flock", type=int, default=MAX_SHEEP)
+    parser.add_argument("--difficulty", type=float, default=1.0,
+                        help="0 = sheep spawn near the gate (easy); "
+                             "1 = full field (deployment distribution).")
+    parser.add_argument("--drive-mode", default="differential",
+                        choices=["differential", "mecanum"],
+                        help="Drive mode for the dog robot.")
+    parser.add_argument("--world", default=None,
+                        choices=["field", "field_round"],
+                        help="World shape. If not set, uses HERDING_WORLD "
+                             "env var or defaults to 'field'.")
+    args = parser.parse_args()
+
+    drive_mode = args.drive_mode
+    frame_stack = 1
+    if args.policy == "strombom":
+        predict = make_analytic_predictor(strombom_action, drive_mode)
+    elif args.policy == "sequential":
+        predict = make_analytic_predictor(sequential_action, drive_mode)
+    else:
+        from stable_baselines3 import PPO
+        run = Path(args.policy)
+        if run.is_file():
+            zip_path = run
+        else:
+            for name in ("policy.zip", "final.zip"):
+                if (run / name).exists():
+                    zip_path = run / name
+                    break
+            else:
+                raise FileNotFoundError(
+                    f"No checkpoint found in {run} "
+                    f"(tried policy.zip, final.zip)"
+                )
+        model = PPO.load(str(zip_path), device="auto")
+        from herding.perception.obs import OBS_DIM as _SINGLE
+        policy_obs_dim = int(model.observation_space.shape[0])
+        if policy_obs_dim % _SINGLE == 0 and policy_obs_dim // _SINGLE >= 1:
+            frame_stack = policy_obs_dim // _SINGLE
+            if frame_stack > 1:
+                print(f"[eval] policy expects frame_stack={frame_stack}")
+        vecnorm = None
+        vn_path = run / "vecnormalize.pkl"
+        if not vn_path.exists() and run.parent.name != "best":
+            vn_path = run.parent / "vecnormalize.pkl"
+        if vn_path.exists():
+            import pickle
+            with open(vn_path, "rb") as f:
+                vecnorm = pickle.load(f)
+            vecnorm.training = False
+            vecnorm.norm_reward = False
+        predict = make_policy_predictor(model, vecnorm)
+
+    # Infer drive_mode from policy action dim if using a learned policy.
+    if args.policy not in ("strombom", "sequential"):
+        policy_action_dim = int(model.action_space.shape[0])
+        if policy_action_dim == 2 and drive_mode == "mecanum":
+            drive_mode = "differential"
+            print(f"[eval] policy has 2D actions — overriding drive_mode "
+                  f"to differential")
+        elif policy_action_dim == 3 and drive_mode == "differential":
+            drive_mode = "mecanum"
+            print(f"[eval] policy has 3D actions — overriding drive_mode "
+                  f"to mecanum")
+
+    print(f"{'n_sheep':>8} {'success%':>10} {'mean_steps':>12} {'mean_penned':>12}")
+    print("-" * 46)
+    for n in range(1, args.max_flock + 1):
+        successes, steps, penned = [], [], []
+        for seed in range(args.n_seeds):
+            env = HerdingEnv(n_sheep=n, max_steps=args.max_steps,
+                             difficulty=args.difficulty, seed=seed,
+                             frame_stack=frame_stack, drive_mode=drive_mode)
+            r = rollout(env, predict, args.max_steps)
+            successes.append(int(r["success"]))
+            steps.append(r["steps"])
+            penned.append(r["n_penned"])
+        sr = 100.0 * mean(successes)
+        ms = mean(steps)
+        mp = mean(penned)
+        print(f"{n:>8d} {sr:>9.1f}% {ms:>12.0f} {mp:>12.2f}")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,143 +0,0 @@
-"""
-Evaluation script for a trained herding policy.
-
-Runs N episodes and reports the three project metrics:
-  1. Success rate       — fraction of episodes where all sheep are penned
-  2. Time-to-pen        — mean steps across successful episodes (per sheep)
-  3. Flock dispersion   — mean pairwise distance among active sheep, averaged
-                          over all timesteps (lower = tighter herding)
-
-Usage
-----
-    python evaluate.py --model runs/ppo_herding/best_model/best_model.zip \
-                       --vecnorm runs/ppo_herding/vecnorm.pkl \
-                       --n-sheep 5 --episodes 100
-
-Add --render to watch the first episode in a matplotlib window.
-"""
-
-import argparse
-
-import numpy as np
-from stable_baselines3 import PPO
-from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
-
-from herding_env import HerdingEnv
-
-
-def make_single_env(n_sheep: int, max_steps: int, render_mode: str = None):
-    def _init():
-        return HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
-                          render_mode=render_mode)
-    return _init
-
-
-def pairwise_mean(positions: np.ndarray, n_active: int) -> float:
-    """Mean pairwise distance among the first n_active sheep."""
-    if n_active < 2:
-        return 0.0
-    pts = positions[:n_active]
-    dists = []
-    for i in range(n_active):
-        for j in range(i + 1, n_active):
-            dists.append(float(np.linalg.norm(pts[i] - pts[j])))
-    return float(np.mean(dists))
-
-
-def parse_args():
-    p = argparse.ArgumentParser()
-    p.add_argument("--model",    required=True,
-                   help="Path to saved model .zip")
-    p.add_argument("--vecnorm",  default=None,
-                   help="Path to VecNormalize stats .pkl (optional)")
-    p.add_argument("--n-sheep",  type=int, default=1)
-    p.add_argument("--episodes", type=int, default=50)
-    p.add_argument("--max-steps", type=int, default=2000)
-    p.add_argument("--render",   action="store_true",
-                   help="Render first episode in matplotlib")
-    p.add_argument("--seed",     type=int, default=42)
-    return p.parse_args()
-
-
-def main():
-    args = parse_args()
-
-    render_mode = "human" if args.render else None
-    raw_env = DummyVecEnv([make_single_env(args.n_sheep, args.max_steps,
-                                           render_mode)])
-    if args.vecnorm:
-        env = VecNormalize.load(args.vecnorm, raw_env)
-        env.training  = False
-        env.norm_reward = False
-    else:
-        env = raw_env
-
-    model = PPO.load(args.model, env=env)
-
-    successes       = []
-    steps_to_pen    = []   # steps for successful episodes
-    dispersions     = []   # per-episode mean flock dispersion
-
-    for ep in range(args.episodes):
-        obs = env.reset()
-        done = False
-        ep_steps = 0
-        ep_dispersion = []
-        first_ep = ep == 0
-
-        while not done:
-            action, _ = model.predict(obs, deterministic=True)
-            obs, _, dones, infos = env.step(action)
-            done = dones[0]
-            ep_steps += 1
-
-            # Access the underlying HerdingEnv for dispersion calculation
-            inner = env.envs[0] if hasattr(env, "envs") else env.venv.envs[0]
-            if not inner.penned[:inner.n_sheep].all():
-                ep_dispersion.append(
-                    pairwise_mean(inner.sheep_pos, inner.n_sheep)
-                )
-
-            if first_ep and render_mode == "human":
-                pass   # render() is called inside step()
-
-        info = infos[0]
-        n_penned = info.get("n_penned", 0)
-        n_sheep  = info.get("n_sheep",  args.n_sheep)
-        success  = n_penned == n_sheep
-
-        successes.append(int(success))
-        if success:
-            steps_to_pen.append(ep_steps / n_sheep)
-        if ep_dispersion:
-            dispersions.append(float(np.mean(ep_dispersion)))
-
-        if (ep + 1) % 10 == 0:
-            print(f"  Episode {ep + 1:>4}/{args.episodes}  "
-                  f"success={int(success)}  steps={ep_steps}")
-
-    env.close()
-
-    # -----------------------------------------------------------------------
-    # Report
-    # -----------------------------------------------------------------------
-    success_rate = float(np.mean(successes))
-    mean_ttp     = float(np.mean(steps_to_pen)) if steps_to_pen else float("nan")
-    mean_disp    = float(np.mean(dispersions))   if dispersions  else float("nan")
-
-    print("\n" + "=" * 50)
-    print(f"  Model           : {args.model}")
-    print(f"  Sheep           : {args.n_sheep}")
-    print(f"  Episodes        : {args.episodes}")
-    print("-" * 50)
-    print(f"  Success rate    : {success_rate * 100:.1f}%"
-          f"  ({sum(successes)}/{args.episodes})")
-    print(f"  Time-to-pen     : {mean_ttp:.1f} steps/sheep"
-          f"  (successful episodes only)")
-    print(f"  Flock dispersion: {mean_disp:.2f} m"
-          f"  (mean pairwise distance while active)")
-    print("=" * 50)
-
-
-if __name__ == "__main__":
-    main()
@@ -1,353 +1,484 @@
-"""
-2D herding environment for PPO training (Gymnasium-compatible).
+"""Gymnasium environment for the shepherd-dog herding task.

-The dog agent (action: 2D velocity vector) must herd n_sheep into the
-quarantine pen.  Sheep dynamics mirror the Webots controller exactly:
-flee (quadratic ramp), separation (inverse-distance), cohesion, wall
-avoidance, and wander.
+Single-agent: the dog is the policy; sheep are env-controlled flocking
+agents (``herding.world.flocking_sim``). Kinematics match the proto specs
+(``herding.world.diffdrive``) so a policy trained here transfers to Webots
+without re-tuning.

-Coordinate system matches the Webots world file:
-    field  : x ∈ [-15, 15],  y ∈ [-15, 15]
-    pen    : x ∈ [10, 13],   y ∈ [-15, -8]   (SE corner, open north)
-
-Observation is always sized for MAX_SHEEP (currently 5) regardless of
-how many sheep are active.  Inactive slots are pre-penned at the pen
-centre with flag=1.  This keeps the model input dimension fixed across
-curriculum stages so VecNormalize statistics are preserved throughout.
+* **Action** (differential): ``Box(-1, 1, (2,))`` — ``(vx, vy)`` intent.
+* **Action** (mecanum):    ``Box(-1, 1, (3,))`` — ``(vx, vy, omega)`` intent.
+* **Observation**: ``Box(-inf, inf, (32·K,))`` from ``herding.perception.obs.build_obs``
+  with optional frame stacking K (concatenated oldest → newest).
+* **Reset**: ``options["n_sheep"]`` overrides flock size; otherwise
+  sampled uniformly from ``[1, max_n_sheep]``.
+* **Reward**: dense shaping (per-sheep distance progress, time
+  penalty, Strömbom-imitation cosine bonus) + sparse pen/done jackpots.
+  Weights live as class attributes on :class:`HerdingEnv`.
 """

-import numpy as np
+from __future__ import annotations
+
+import math
+import random
+from typing import Optional
+
 import gymnasium as gym
+import numpy as np
 from gymnasium import spaces

+from herding.world.diffdrive import (
+    heading_speed_to_wheels, kinematics_step,
+    mecanum_kinematics_step, velocity_to_mecanum_wheels, velocity_to_wheels,
+)
+from herding.world.flocking_sim import (
+    FLEE_SPEED, MAX_SPEED, WANDER_SPEED, compute_heading_speed,
+)
+from herding.world.geometry import (
+    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
+    DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X, DOG_WHEEL_BASE_Y,
+    DOG_WHEEL_RADIUS, FIELD_SHAPE, FIELD_ROUND_R, FIELD_X, FIELD_Y,
+    GATE_X, GATE_Y, MAX_SHEEP,
+    PEN_ENTRY, PEN_X, PEN_Y,
+    SHEEP_MAX_WHEEL_OMEGA, SHEEP_WHEEL_BASE, SHEEP_WHEEL_RADIUS,
+    WEBOTS_DT, clip_to_field, is_penned_position,
+)
+from herding.perception.lidar_perception import detections_from_scan
+from herding.perception.lidar_sim import simulate_scan
+from herding.perception.obs import OBS_DIM, build_obs
+from herding.perception.sheep_tracker import SheepTracker
+from herding.control.strombom import compute_action as strombom_action
+

 class HerdingEnv(gym.Env):
-    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
+    """Single-agent shepherd-dog herding env.

-    # -----------------------------------------------------------------------
-    # World constants — must match Webots world file
-    # -----------------------------------------------------------------------
-    MAX_SHEEP = 5
-    FIELD     = 15.0                         # half-size; positions ∈ [-FIELD, FIELD]
-    PEN_X     = (10.0, 13.0)                 # quarantine pen x bounds
-    PEN_Y     = (-15.0, -8.0)               # quarantine pen y bounds
-    PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
+    Each step is one Webots ``basicTimeStep`` (16 ms). Episodes terminate
+    when all sheep are penned, or after ``max_steps`` steps (truncation).
+    """

-    # -----------------------------------------------------------------------
-    # Dynamics — calibrated to match Webots robot specs
-    # wheel radius 0.031 m; sheep FLEE_SPEED 20 rad/s → 0.62 m/s
-    # wheel radius 0.038 m; dog  maxVelocity 70 rad/s → 2.66 m/s
-    # -----------------------------------------------------------------------
-    DOG_SPEED      = 2.5    # m/s
-    SHEEP_FLEE_V   = 0.65   # m/s
-    SHEEP_WANDER_V = 0.20   # m/s
-    DT             = 0.1    # seconds per step
+    metadata = {"render_modes": []}

-    # Boid parameters — identical to sheep.py
-    FLEE_DIST       = 7.0
-    SEPARATION_DIST = 2.5
-    COHESION_DIST   = 8.0
-    WALL_MARGIN     = 3.5
+    # Reward weights. Sparse jackpots (W_PEN_DELTA, W_DONE) dominate;
+    # dense shaping (W_PROGRESS on Δ mean-distance-to-pen) provides the
+    # gradient; W_IMITATE adds a small cosine bonus toward the analytic
+    # teacher's action; W_TIME is a per-step penalty (0 by default).
+    W_PEN_DELTA = 100.0
+    W_PROGRESS  = 20.0
+    W_IMITATE   = 0.5
+    W_TIME      = 0.0
+    W_WALL      = 0.0
+    W_COLLISION = 0.0
+    W_DONE      = 500.0

-    # -----------------------------------------------------------------------
-    # Reward weights
-    # -----------------------------------------------------------------------
-    W_ALIGN      = 0.4     # dense: dog on anti-pen side of each active sheep
-    W_SHAPING    = 0.5     # dense: mean sheep distance to pen
-    W_APPROACH   = 0.1     # dense: dog within flee range of nearest sheep
-    W_PEN_BONUS  = 5.0     # sparse: per sheep successfully penned
-    W_COMPLETE   = 20.0    # bonus when ALL active sheep are penned
-    W_STEP_COST  = 0.002   # penalty per step (encourages efficiency)
+    # In-env action EMA. 0 = none; the Webots controller applies its own
+    # EMA at inference, so the policy needn't learn smoothness.
+    ACTION_SMOOTH = 0.0

-    def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
-                 render_mode: str = None):
+    DEFAULT_MAX_STEPS = 5000
+    COLLISION_DIST = 0.30
+
+    def __init__(
+        self,
+        n_sheep: Optional[int] = None,
+        max_n_sheep: int = MAX_SHEEP,
+        max_steps: int = DEFAULT_MAX_STEPS,
+        difficulty: float = 0.0,
+        seed: Optional[int] = None,
+        use_lidar: bool = True,
+        frame_stack: int = 1,
+        drive_mode: str = "differential",
+    ):
        super().__init__()
-        assert 1 <= n_sheep <= self.MAX_SHEEP
-        self.n_sheep    = n_sheep
-        self.max_steps  = max_steps
-        self.render_mode = render_mode
+        # ``use_lidar=True`` (default): obs and imitation-reward teacher
+        # see only LiDAR-perceived positions via a tracker, matching the
+        # Webots controller. ``False`` exposes ground truth for ablation.
+        self._use_lidar = bool(use_lidar)
+        self._tracker = SheepTracker() if self._use_lidar else None
+        self._np_rng_lidar: Optional[np.random.Generator] = None

-        # Observation: dog(x,y) + MAX_SHEEP×sheep(x,y) + MAX_SHEEP×penned
-        # Fixed size across all curriculum stages.
-        obs_dim = 2 + 2 * self.MAX_SHEEP + self.MAX_SHEEP
+        # Frame stacking: the policy receives the last K obs concatenated,
+        # giving a memoryless MLP temporal context. K=1 → single frame.
+        self._frame_stack = max(1, int(frame_stack))
+        self._frame_buffer: list[np.ndarray] = []
+
+        # Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omni).
+        self._drive_mode = drive_mode.lower()
+        if self._drive_mode not in ("differential", "mecanum"):
+            raise ValueError(f"Unknown drive_mode: {drive_mode!r}")
+        action_dim = 3 if self._drive_mode == "mecanum" else 2
+        self.action_space = spaces.Box(-1.0, 1.0, shape=(action_dim,),
+                                       dtype=np.float32)
+        self._single_obs_dim = OBS_DIM
        self.observation_space = spaces.Box(
-            low=-1.0, high=1.0, shape=(obs_dim,), dtype=np.float32
+            low=-np.inf, high=np.inf,
+            shape=(OBS_DIM * self._frame_stack,), dtype=np.float32,
        )

-        # Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
-        self.action_space = spaces.Box(
-            low=-1.0, high=1.0, shape=(2,), dtype=np.float32
-        )
+        # n_sheep=None → sample uniformly from [1, max_n_sheep] each reset.
+        self._fixed_n_sheep = n_sheep
+        self._max_n_sheep = max_n_sheep
+        self.max_steps = max_steps
+        # difficulty ∈ [0, 1]: 0 = sheep spawn near the gate (easy);
+        # 1 = sheep spawn anywhere in the field (deployment distribution).
+        self._difficulty = float(difficulty)
+        self._initial_seed = seed
+        self._initial_seed_used = False

-        # Runtime state (populated by reset)
-        self._step_count   = 0
-        self._prev_penned  = 0
-        self.dog_pos       = np.zeros(2, dtype=np.float32)
-        self.sheep_pos     = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
-        self.penned        = np.ones(self.MAX_SHEEP, dtype=bool)
-        self.wander_ang    = np.zeros(self.MAX_SHEEP, dtype=np.float32)
+        # Env-owned RNG for wander jitter, re-seeded from np_random on reset.
+        self._py_rng = random.Random()
+        self._action_dim = action_dim

-        self._fig = None    # lazy matplotlib figure
+        # State (initialised in reset)
+        self.dog_x = self.dog_y = self.dog_heading = 0.0
+        self.sheep_x = np.zeros(0, dtype=np.float32)
+        self.sheep_y = np.zeros(0, dtype=np.float32)
+        self.sheep_h = np.zeros(0, dtype=np.float32)
+        self.sheep_penned = np.zeros(0, dtype=bool)
+        self.sheep_wander = np.zeros(0, dtype=np.float32)

-    # ------------------------------------------------------------------
-    # Curriculum interface
-    # ------------------------------------------------------------------
+        self.prev_action = np.zeros(self._action_dim, dtype=np.float32)
+        self.smoothed_action = np.zeros(self._action_dim, dtype=np.float32)
+        self.steps = 0
+        self.n_sheep = 0
+        self.prev_n_penned = 0
+        self.prev_d_pen = 0.0
+        self.prev_radius = 0.0

-    def set_n_sheep(self, n: int):
-        """Advance curriculum difficulty; takes effect on next reset()."""
-        assert 1 <= n <= self.MAX_SHEEP
-        self.n_sheep = n
+    # --- Public knobs ---
+    def set_max_n_sheep(self, value: int) -> None:
+        self._max_n_sheep = int(np.clip(value, 1, MAX_SHEEP))

-    # ------------------------------------------------------------------
-    # Gymnasium API
-    # ------------------------------------------------------------------
+    def set_difficulty(self, value: float) -> None:
+        self._difficulty = float(np.clip(value, 0.0, 1.0))

-    def reset(self, seed=None, options=None):
+    def set_imitate_weight(self, value: float) -> None:
+        """Override the instance W_IMITATE — used to disable Strömbom
+        imitation during PPO fine-tune."""
+        self.W_IMITATE = float(value)
+
+    def set_time_weight(self, value: float) -> None:
+        """Override the instance W_TIME — set negative to penalise step
+        count and encourage faster time-to-pen during PPO fine-tune."""
+        self.W_TIME = float(value)
+
+    # --- gym API ---
+    def reset(self, *, seed=None, options=None):
+        if (seed is None and self._initial_seed is not None
+                and not self._initial_seed_used):
+            seed = self._initial_seed
+        self._initial_seed_used = True
        super().reset(seed=seed)
-        self._step_count  = 0
-        self._prev_penned = 0
+        self._py_rng.seed(int(self.np_random.integers(0, 2**31 - 1)))
+        opts = options or {}

-        # Active sheep (0 .. n_sheep-1): random non-pen positions
-        self.sheep_pos[:] = self.PEN_CENTER
-        self.penned[:]    = True
-
-        placed = 0
-        while placed < self.n_sheep:
-            p = self.np_random.uniform(-12.0, 12.0, size=(2,)).astype(np.float32)
-            if not self._in_pen(p):
-                self.sheep_pos[placed] = p
-                self.penned[placed]    = False
-                placed += 1
-
-        # Dog: 50 % of the time start already on the anti-pen side of the
-        # nearest sheep (within flee range) so early training gets aligned
-        # starts; the other 50 % is fully random to ensure generalisation.
-        if self.np_random.random() < 0.5:
-            # Place dog behind the first active sheep relative to the pen
-            ref = self.sheep_pos[0]
-            away = ref - self.PEN_CENTER                       # sheep→anti-pen
-            dist = float(np.linalg.norm(away))
-            if dist > 0.1:
-                away = away / dist
-            offset = away * self.np_random.uniform(2.0, self.FLEE_DIST * 0.8)
-            self.dog_pos = np.clip(
-                (ref + offset).astype(np.float32), -self.FIELD, self.FIELD
-            )
+        if "n_sheep" in opts and opts["n_sheep"] is not None:
+            self.n_sheep = int(opts["n_sheep"])
+        elif self._fixed_n_sheep is not None:
+            self.n_sheep = int(self._fixed_n_sheep)
        else:
-            self.dog_pos = self.np_random.uniform(
-                -self.FIELD * 0.8, self.FIELD * 0.8, size=(2,)
-            ).astype(np.float32)
+            self.n_sheep = int(self.np_random.integers(1, self._max_n_sheep + 1))

-        # Inactive slots (n_sheep .. MAX_SHEEP-1): already at pen centre, penned=True
+        # Dog spawns near origin with random heading.
+        self.dog_x = float(self.np_random.uniform(-2.5, 2.5))
+        self.dog_y = float(self.np_random.uniform(-2.5, 2.5))
+        self.dog_heading = float(self.np_random.uniform(-math.pi, math.pi))

-        self.wander_ang = self.np_random.uniform(
-            -np.pi, np.pi, size=(self.MAX_SHEEP,)
-        ).astype(np.float32)
+        # Sheep spawn region linearly interpolates with difficulty:
+        # 0 → small box near the gate, 1 → full field.
+        d = self._difficulty
+        if FIELD_SHAPE == "field_round":
+            # Round field: spawn in a sector near the gate (south),
+            # expanding to the full circle at difficulty=1.
+            spawn_r_lo = 3.0
+            spawn_r_hi = d * FIELD_ROUND_R * 0.8 + (1.0 - d) * 6.0
+            # Angle spread around south (±60° at d=0, full circle at d=1).
+            half_angle = math.radians(60) + d * math.radians(120)
+            angle_lo = math.pi / 2.0 - half_angle   # from south = -π/2
+            angle_hi = math.pi / 2.0 + half_angle
+        else:
+            sx_lo = 7.0   - d * 20.0
+            sx_hi = 14.0  - d * 1.0
+            sy_lo = -12.0 + d * 0.0
+            sy_hi = -6.0  + d * 19.0

-        return self._obs(), {}
+        sxs, sys_, shs, sws = [], [], [], []
+        for _ in range(self.n_sheep):
+            for _try in range(100):
+                if FIELD_SHAPE == "field_round":
+                    r_spawn = float(self.np_random.uniform(spawn_r_lo, spawn_r_hi))
+                    a_spawn = float(self.np_random.uniform(angle_lo, angle_hi))
+                    sx = r_spawn * math.cos(a_spawn)
+                    sy = -r_spawn * math.sin(a_spawn)
+                else:
+                    sx = float(self.np_random.uniform(sx_lo, sx_hi))
+                    sy = float(self.np_random.uniform(sy_lo, sy_hi))
+                # Reject if too close to the dog or another sheep, or
+                # already in the gate column (would start "penned").
+                if math.hypot(sx - self.dog_x, sy - self.dog_y) < 3.0:
+                    continue
+                if any(math.hypot(sx - x, sy - y) < 1.5
+                       for x, y in zip(sxs, sys_)):
+                    continue
+                if PEN_X[0] <= sx <= PEN_X[1] and sy < -8.0:
+                    continue
+                break
+            sxs.append(sx); sys_.append(sy)
+            shs.append(float(self.np_random.uniform(-math.pi, math.pi)))
+            sws.append(float(self.np_random.uniform(-math.pi, math.pi)))
+
+        self.sheep_x = np.asarray(sxs, dtype=np.float32)
+        self.sheep_y = np.asarray(sys_, dtype=np.float32)
+        self.sheep_h = np.asarray(shs, dtype=np.float32)
+        self.sheep_wander = np.asarray(sws, dtype=np.float32)
+        self.sheep_penned = np.zeros(self.n_sheep, dtype=bool)
+
+        self.prev_action = np.zeros(self._action_dim, dtype=np.float32)
+        self.smoothed_action = np.zeros(self._action_dim, dtype=np.float32)
+        self.steps = 0
+        self.prev_n_penned = 0
+        self.prev_d_pen, self.prev_radius = self._flock_metrics()
+
+        if self._tracker is not None:
+            self._tracker.reset()
+            self._np_rng_lidar = np.random.default_rng(
+                int(self.np_random.integers(0, 2**31 - 1)))
+            self._update_tracker()
+
+        self._frame_buffer = []
+
+        obs = self._build_obs()
+        info = {"n_sheep": self.n_sheep}
+        return obs, info

    def step(self, action):
-        self._step_count += 1
+        action = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)

-        # Move dog — clip each axis independently so the agent can idle
-        act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
-        self.dog_pos = np.clip(
-            self.dog_pos + act * self.DOG_SPEED * self.DT,
-            -self.FIELD, self.FIELD
+        self.smoothed_action = (
+            self.ACTION_SMOOTH * self.prev_action
+            + (1.0 - self.ACTION_SMOOTH) * action
        )
+        self.prev_action = self.smoothed_action.copy()
+        vx, vy = float(self.smoothed_action[0]), float(self.smoothed_action[1])
+        omega = float(self.smoothed_action[2]) if self._action_dim >= 3 else 0.0

-        # Step sheep dynamics
-        for i in range(self.n_sheep):
-            if self.penned[i]:
-                continue
-            self.sheep_pos[i] = self._step_sheep(i)
-            if self._in_pen(self.sheep_pos[i]):
-                self.penned[i] = True
+        # Safety supervisor — dog stays north of the gate.
+        if self.dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
+            vx, vy = 0.0, 1.0

-        n_penned     = int(self.penned[:self.n_sheep].sum())
-        newly_penned = n_penned - self._prev_penned
-        self._prev_penned = n_penned
-
-        reward     = self._reward(n_penned, newly_penned)
-        terminated = n_penned == self.n_sheep
-        truncated  = self._step_count >= self.max_steps
-        info       = {"n_penned": n_penned, "n_sheep": self.n_sheep}
-
-        if self.render_mode == "human":
-            self.render()
-
-        return self._obs(), float(reward), terminated, truncated, info
-
-    def render(self):
-        import matplotlib.pyplot as plt
-        import matplotlib.patches as mpatches
-
-        if self._fig is None:
-            plt.ion()
-            self._fig, self._ax = plt.subplots(figsize=(6, 6))
-
-        ax = self._ax
-        ax.clear()
-        ax.set_xlim(-16, 16)
-        ax.set_ylim(-16, 16)
-        ax.set_aspect("equal")
-        ax.set_facecolor("#dcedc8")
-
-        # Field boundary
-        ax.add_patch(mpatches.Rectangle(
-            (-15, -15), 30, 30, fill=False, edgecolor="#795548", linewidth=2
-        ))
-        # Pen
-        pw = self.PEN_X[1] - self.PEN_X[0]
-        ph = self.PEN_Y[1] - self.PEN_Y[0]
-        ax.add_patch(mpatches.Rectangle(
-            (self.PEN_X[0], self.PEN_Y[0]), pw, ph,
-            facecolor="#ffe082", edgecolor="#795548", linewidth=2
-        ))
-        ax.text(11.5, -11.5, "pen", ha="center", va="center",
-                fontsize=8, color="#795548")
-
-        # Sheep
-        for i in range(self.MAX_SHEEP):
-            if i >= self.n_sheep:
-                continue   # inactive slot — not shown
-            color = "deeppink" if self.penned[i] else "white"
-            ax.plot(*self.sheep_pos[i], "o", color=color, markersize=11,
-                    markeredgecolor="#555", markeredgewidth=1.5)
-
-        # Dog
-        ax.plot(*self.dog_pos, "s", color="#4e342e", markersize=13,
-                markeredgecolor="black", markeredgewidth=1.5)
-
-        ax.set_title(
-            f"step {self._step_count} | "
-            f"penned {int(self.penned[:self.n_sheep].sum())}/{self.n_sheep}",
-            fontsize=11
-        )
-        self._fig.canvas.draw()
-        self._fig.canvas.flush_events()
-        plt.pause(0.001)
-
-    def close(self):
-        if self._fig is not None:
-            import matplotlib.pyplot as plt
-            plt.close(self._fig)
-            self._fig = None
-
-    # ------------------------------------------------------------------
-    # Internals
-    # ------------------------------------------------------------------
-
-    def _in_pen(self, pos: np.ndarray) -> bool:
-        return (self.PEN_X[0] < pos[0] < self.PEN_X[1] and
-                self.PEN_Y[0] < pos[1] < self.PEN_Y[1])
-
-    def _obs(self) -> np.ndarray:
-        scale = 1.0 / self.FIELD
-        return np.concatenate([
-            self.dog_pos * scale,                          # 2
-            (self.sheep_pos * scale).flatten(),            # 2 * MAX_SHEEP
-            self.penned.astype(np.float32),                # MAX_SHEEP
-        ]).astype(np.float32)
-
-    def _reward(self, n_penned: int, newly_penned: int) -> float:
-        active_mask = ~self.penned[:self.n_sheep]
-        if active_mask.any():
-            active_pos = self.sheep_pos[:self.n_sheep][active_mask]
-            dists_pen  = np.linalg.norm(active_pos - self.PEN_CENTER, axis=1)
-            dists_dog  = np.linalg.norm(active_pos - self.dog_pos, axis=1)
-
-            # Sheep-to-pen shaping
-            shaping = -(dists_pen.mean() / (2 * self.FIELD))
-
-            # Approach: dog penalised for being far from nearest sheep
-            approach = -(dists_dog.min() / (2 * self.FIELD))
-
-            # Alignment: reward dog for being on the anti-pen side of each sheep.
-            # When the dog is opposite the pen relative to a sheep, that sheep
-            # flees toward the pen.  Score ∈ [-1, 1] per sheep, weighted by
-            # a proximity gate so only nearby dogs count.
-            align_scores = []
-            for s_pos, d_pen, d_dog in zip(active_pos, dists_pen, dists_dog):
-                if d_pen < 0.1 or d_dog < 0.1:
-                    continue
-                pen_dir = (self.PEN_CENTER - s_pos) / d_pen   # sheep → pen
-                dog_dir = (self.dog_pos    - s_pos) / d_dog   # sheep → dog
-                # cos(angle): +1 → dog behind sheep, -1 → dog on pen side
-                cosine    = -float(np.dot(pen_dir, dog_dir))
-                # gate: full credit inside flee range, fades beyond
-                proximity = max(0.0, 1.0 - d_dog / self.FLEE_DIST)
-                align_scores.append(cosine * proximity)
-            alignment = float(np.mean(align_scores)) if align_scores else 0.0
+        # Step the dog.
+        if self._drive_mode == "mecanum":
+            w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
+                vx, vy, omega, self.dog_heading,
+                max_linear=DOG_MAX_LINEAR,
+                wheel_radius=DOG_WHEEL_RADIUS,
+                lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
+                max_wheel_omega=DOG_MAX_WHEEL_OMEGA,
+                k_turn=4.0,
+                wheel_base=DOG_WHEEL_BASE,
+            )
+            self.dog_x, self.dog_y, self.dog_heading = mecanum_kinematics_step(
+                self.dog_x, self.dog_y, self.dog_heading,
+                w_fl, w_fr, w_rl, w_rr,
+                DOG_WHEEL_RADIUS,
+                DOG_WHEEL_BASE_X / 2.0, DOG_WHEEL_BASE_Y / 2.0,
+                WEBOTS_DT,
+            )
        else:
-            shaping = approach = alignment = 0.0
+            wL, wR = velocity_to_wheels(
+                vx, vy, self.dog_heading,
+                max_linear=DOG_MAX_LINEAR,
+                wheel_radius=DOG_WHEEL_RADIUS,
+                max_wheel_omega=DOG_MAX_WHEEL_OMEGA,
+                k_turn=4.0,
+            )
+            self.dog_x, self.dog_y, self.dog_heading = kinematics_step(
+                self.dog_x, self.dog_y, self.dog_heading,
+                wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT,
+            )
+        self.dog_x, self.dog_y = clip_to_field(self.dog_x, self.dog_y, margin=0.3)
+        # Extra constraint: dog stays north of the gate area.
+        if self.dog_y < DOG_SOUTH_LIMIT:
+            self.dog_y = DOG_SOUTH_LIMIT

-        reward  = shaping   * self.W_SHAPING
-        reward += approach  * self.W_APPROACH
-        reward += alignment * self.W_ALIGN
-        reward += newly_penned * self.W_PEN_BONUS
-        reward -= self.W_STEP_COST
-        if n_penned == self.n_sheep:
-            reward += self.W_COMPLETE
-        return reward
+        # Step sheep and update penned flags (GT-based).
+        for i in range(self.n_sheep):
+            self._step_one_sheep(i)
+        for i in range(self.n_sheep):
+            if (not self.sheep_penned[i]
+                    and is_penned_position(self.sheep_x[i], self.sheep_y[i])):
+                self.sheep_penned[i] = True

-    def _step_sheep(self, i: int) -> np.ndarray:
-        """Apply one timestep of boid dynamics to sheep i."""
-        pos = self.sheep_pos[i].copy()
-        fx, fy = 0.0, 0.0
-        fleeing = False
+        # LiDAR perception runs after sheep move; feeds the obs and the
+        # imitation reward. Reward/termination still use GT.
+        if self._tracker is not None:
+            self._update_tracker()

-        # Flee from dog — quadratic ramp (mirrors sheep.py)
-        diff = self.dog_pos - pos
-        dist = float(np.linalg.norm(diff))
-        if 0.01 < dist < self.FLEE_DIST:
-            t = 1.0 - dist / self.FLEE_DIST
-            s = t * t * 5.0
-            fx -= (diff[0] / dist) * s
-            fy -= (diff[1] / dist) * s
-            fleeing = True
+        d_pen, radius = self._flock_metrics()
+        reward = self._compute_reward(d_pen, radius, action=action)
+        self.prev_d_pen = d_pen
+        self.prev_radius = radius
+        self.prev_n_penned = int(self.sheep_penned.sum())

-        # Separation (inverse-distance) + Cohesion
-        cx, cy, cn = 0.0, 0.0, 0
-        for j in range(self.n_sheep):
-            if j == i or self.penned[j]:
-                continue
-            dv = self.sheep_pos[j] - pos
-            dj = float(np.linalg.norm(dv))
-            if 0.3 < dj < self.COHESION_DIST:
-                cx += self.sheep_pos[j][0]
-                cy += self.sheep_pos[j][1]
-                cn += 1
-            if 0.05 < dj < self.SEPARATION_DIST:
-                push = (self.SEPARATION_DIST - dj) / dj
-                fx -= (dv[0] / dj) * push * 2.5
-                fy -= (dv[1] / dj) * push * 2.5
-        if cn > 0:
-            w = 0.08 if fleeing else 0.15
-            fx += (cx / cn - pos[0]) * w
-            fy += (cy / cn - pos[1]) * w
+        self.steps += 1
+        all_penned = bool(self.sheep_penned.all())
+        terminated = all_penned
+        truncated = self.steps >= self.max_steps
+        if all_penned:
+            reward += self.W_DONE

-        # Wall avoidance
-        m, F = self.WALL_MARGIN, self.FIELD
-        if pos[0] < -F + m: fx += ((-F + m - pos[0]) / m) * 6.0
-        if pos[0] >  F - m: fx -= ((pos[0] - (F - m)) / m) * 6.0
-        if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
-        if pos[1] >  F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
+        obs = self._build_obs()
+        info = {
+            "n_sheep": self.n_sheep,
+            "n_penned": self.prev_n_penned,
+            "is_success": all_penned,
+            "steps": self.steps,
+        }
+        return obs, float(reward), terminated, truncated, info

-        # Wander — suppressed while fleeing
-        if not fleeing:
-            if self.np_random.random() < 0.02:
-                self.wander_ang[i] += float(self.np_random.uniform(-0.6, 0.6))
-            fx += float(np.cos(self.wander_ang[i])) * 0.5
-            fy += float(np.sin(self.wander_ang[i])) * 0.5
+    # --- Internals ---
+    def _step_one_sheep(self, i: int) -> None:
+        x, y = float(self.sheep_x[i]), float(self.sheep_y[i])
+        peers = [(float(self.sheep_x[j]), float(self.sheep_y[j]))
+                 for j in range(self.n_sheep) if j != i]
+        heading, speed_motor, new_wander = compute_heading_speed(
+            x, y,
+            penned=bool(self.sheep_penned[i]),
+            dog_xy=(self.dog_x, self.dog_y),
+            peers=peers,
+            wander_angle=float(self.sheep_wander[i]),
+            rng=self._py_rng,
+        )
+        self.sheep_wander[i] = new_wander

-        # Integrate
-        force = np.array([fx, fy])
-        mag   = float(np.linalg.norm(force))
-        if mag > 0.01:
-            top_speed = self.SHEEP_FLEE_V if fleeing else self.SHEEP_WANDER_V
-            speed = min(top_speed, mag * 0.3)
-            pos   = np.clip(pos + (force / mag) * speed * self.DT,
-                            -self.FIELD, self.FIELD)
+        wL, wR = heading_speed_to_wheels(
+            heading, speed_motor, float(self.sheep_h[i]),
+            max_wheel_omega=SHEEP_MAX_WHEEL_OMEGA, k_turn=4.0,
+        )
+        nx, ny, nh = kinematics_step(
+            x, y, float(self.sheep_h[i]), wL, wR,
+            SHEEP_WHEEL_RADIUS, SHEEP_WHEEL_BASE, WEBOTS_DT,
+        )

-        return pos.astype(np.float32)
+        # Wall clipping.
+        if FIELD_SHAPE == "field_round":
+            in_gate_col = PEN_X[0] <= nx <= PEN_X[1]
+            if in_gate_col:
+                # Allow passage through the gate column (south of the wall).
+                ny = float(np.clip(ny, PEN_Y[0] + 0.2, FIELD_Y[1] - 0.2))
+            else:
+                nx, ny = clip_to_field(nx, ny, margin=0.2)
+        else:
+            nx = float(np.clip(nx, FIELD_X[0] + 0.2, FIELD_X[1] - 0.2))
+            in_gate_col = PEN_X[0] <= nx <= PEN_X[1]
+            if in_gate_col:
+                ny = float(np.clip(ny, PEN_Y[0] + 0.2, FIELD_Y[1] - 0.2))
+            else:
+                ny = float(np.clip(ny, FIELD_Y[0] + 0.2, FIELD_Y[1] - 0.2))
+
+        self.sheep_x[i] = nx
+        self.sheep_y[i] = ny
+        self.sheep_h[i] = nh
+
+    def _flock_metrics(self):
+        """Return (per-sheep mean distance to pen, max radius from CoM).
+
+        The per-sheep mean (not CoM distance) makes the progress signal
+        sensitive to stragglers: the dog can't game it by herding most of
+        the flock and abandoning one outlier.
+        """
+        active_mask = ~self.sheep_penned
+        if not active_mask.any():
+            return 0.0, 0.0
+        xs = self.sheep_x[active_mask]
+        ys = self.sheep_y[active_mask]
+        per_sheep_d = np.hypot(xs - PEN_ENTRY[0], ys - PEN_ENTRY[1])
+        d_pen = float(per_sheep_d.mean())
+        com_x, com_y = float(xs.mean()), float(ys.mean())
+        if active_mask.sum() == 1:
+            radius = 0.0
+        else:
+            radius = float(np.hypot(xs - com_x, ys - com_y).max())
+        return d_pen, radius
+
+    def _compute_reward(self, d_pen: float, radius: float, action=None) -> float:
+        """Sparse pen jackpot + dense progress shaping + Strömbom imitation."""
+        n_penned = int(self.sheep_penned.sum())
+        delta_pen = n_penned - self.prev_n_penned
+
+        d_progress = max(-5.0, min(5.0, self.prev_d_pen - d_pen))
+        r = (self.W_PEN_DELTA * delta_pen
+             + self.W_PROGRESS * d_progress
+             + self.W_TIME)
+
+        if action is not None and self.W_IMITATE > 0.0:
+            positions = self._perceived_positions()
+            if positions:
+                sx, sy, _mode = strombom_action(
+                    (self.dog_x, self.dog_y), positions, PEN_ENTRY,
+                )
+                a_norm = math.hypot(float(action[0]), float(action[1]))
+                s_norm = math.hypot(sx, sy)
+                if a_norm > 1e-3 and s_norm > 1e-3:
+                    cos_sim = (float(action[0]) * sx + float(action[1]) * sy) / (a_norm * s_norm)
+                    r += self.W_IMITATE * cos_sim
+
+        return float(r)
+
+    def _build_single_obs(self) -> np.ndarray:
+        if self._tracker is not None:
+            # LiDAR mode: the obs sees only the tracker's active set.
+            active = self._tracker.get_positions()
+            sheep_xy_list = list(active.values())
+            sheep_penned_list = [False] * len(sheep_xy_list)
+        else:
+            sheep_xy_list = list(zip(self.sheep_x.tolist(), self.sheep_y.tolist()))
+            sheep_penned_list = self.sheep_penned.tolist()
+        return build_obs(
+            (self.dog_x, self.dog_y), self.dog_heading,
+            sheep_xy_list, sheep_penned_list,
+            n_max=self._max_n_sheep,
+            n_expected=self.n_sheep,
+        )
+
+    def _build_obs(self) -> np.ndarray:
+        single = self._build_single_obs()
+        if self._frame_stack <= 1:
+            return single
+        # On reset the buffer is empty — pad with copies of the first frame.
+        if not self._frame_buffer:
+            self._frame_buffer = [single.copy() for _ in range(self._frame_stack)]
+        else:
+            self._frame_buffer.append(single)
+            if len(self._frame_buffer) > self._frame_stack:
+                self._frame_buffer = self._frame_buffer[-self._frame_stack:]
+        return np.concatenate(self._frame_buffer, axis=0).astype(np.float32)
+
+    # --- LiDAR perception ---
+    def _all_sheep_xy(self) -> list[tuple[float, float]]:
+        """Every sheep, including penned (the LiDAR sees them)."""
+        return [(float(self.sheep_x[i]), float(self.sheep_y[i]))
+                for i in range(self.n_sheep)]
+
+    def _update_tracker(self) -> None:
+        ranges = simulate_scan(
+            self.dog_x, self.dog_y, self.dog_heading,
+            self._all_sheep_xy(),
+            rng=self._np_rng_lidar,
+        )
+        detections = detections_from_scan(
+            ranges, self.dog_x, self.dog_y, self.dog_heading,
+        )
+        self._tracker.update(detections)
+
+    def perceived_positions(self) -> dict[str, tuple[float, float]]:
+        """What the controller would "see" this step: tracker output in
+        LiDAR mode, ground truth in privileged mode. Used by demo
+        collection and analytic-policy eval so the teacher runs on the
+        same perception the deployed controller has.
+        """
+        if self._tracker is not None:
+            return self._tracker.get_positions()
+        return {f"s{i}": (float(self.sheep_x[i]), float(self.sheep_y[i]))
+                for i in range(self.n_sheep) if not self.sheep_penned[i]}
+
+    _perceived_positions = perceived_positions
@@ -1,6 +1,9 @@
-gymnasium>=0.29
-stable-baselines3>=2.3
-torch>=2.2
-numpy>=1.26
-matplotlib>=3.8
-tensorboard>=2.16
+# Pin major versions; SB3 2.x requires gymnasium and torch >= 1.13.
+gymnasium>=0.29,<2.0
+stable-baselines3[extra]>=2.3,<3.0
+torch>=2.1
+numpy>=1.24
+pyyaml>=6.0
+tensorboard>=2.14
+tqdm>=4.66
+pytest>=8.0
@@ -0,0 +1,403 @@
+"""KL-regularised PPO fine-tune of a behaviour-cloned policy.
+
+The trainable policy is initialised from ``runs/bc/policy.zip``. A
+frozen copy of the same weights becomes the reference; each PPO loss
+gets an extra ``β · KL(π ‖ π_ref)`` term so the policy can only move
+within a trust region around BC. ``log_std`` is fixed small to keep
+exploration tight.
+
+Output: ``runs/rl/policy.zip`` — same SB3 format as the BC checkpoint,
+loadable by ``HERDING_MODE=rl`` in the dog controller.
+
+Usage::
+
+    python -m training.rl.train \\
+        --bc training/runs/bc \\
+        --out training/runs/rl \\
+        --total-timesteps 2000000
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+from pathlib import Path
+
+# Early CLI pre-parse for --world so geometry is configured before any
+# herding.* / training.* import binds geometry constants. Matches the
+# pattern used by training.bc.collect and training.eval.
+_pre_argv = [a for a in os.sys.argv[1:]]
+_pre_world = None
+for i, a in enumerate(_pre_argv):
+    if a == "--world" and i + 1 < len(_pre_argv):
+        _pre_world = _pre_argv[i + 1]
+        break
+    if a.startswith("--world="):
+        _pre_world = a.split("=", 1)[1]
+        break
+if _pre_world is not None:
+    from herding.world.geometry import configure as _geo_configure
+    _geo_configure(_pre_world)
+    os.environ["HERDING_WORLD"] = _pre_world
+
+import numpy as np
+import torch as th
+import torch.nn.functional as F
+from stable_baselines3 import PPO
+from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
+
+from herding.perception.obs import OBS_DIM
+from training.herding_env import HerdingEnv
+
+
+# --------------------------------------------------------------------
+# Env factory
+# --------------------------------------------------------------------
+
+def _make_env(rank: int, seed: int, frame_stack: int,
+              drive_mode: str = "differential",
+              difficulty: float = 1.0,
+              max_n_sheep: int = 10):
+    def _thunk():
+        env = HerdingEnv(seed=seed + rank, frame_stack=frame_stack,
+                         drive_mode=drive_mode, difficulty=difficulty,
+                         max_n_sheep=max_n_sheep)
+        env = Monitor(env, info_keywords=("is_success", "n_sheep", "n_penned"))
+        return env
+    return _thunk
+
+
+# --------------------------------------------------------------------
+# KL-regularised PPO
+# --------------------------------------------------------------------
+
+class KLPPO(PPO):
+    """PPO with an extra KL-to-reference penalty in the policy loss.
+
+    Overrides only ``train()``; rollout buffer, clipped surrogate, value
+    loss and entropy bonus are unchanged from stock SB3 PPO.
+    """
+
+    def __init__(self, *args, ref_policy=None, kl_coef: float = 0.05, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ref_policy = ref_policy
+        if self.ref_policy is not None:
+            self.ref_policy.set_training_mode(False)
+            for p in self.ref_policy.parameters():
+                p.requires_grad = False
+        self.kl_coef = kl_coef
+
+    def train(self) -> None:
+        # Stock SB3 PPO.train() structure with the KL-to-reference term
+        # added inside the inner minibatch loop.
+        self.policy.set_training_mode(True)
+        self._update_learning_rate(self.policy.optimizer)
+        clip_range = self.clip_range(self._current_progress_remaining)
+        if self.clip_range_vf is not None:
+            clip_range_vf = self.clip_range_vf(self._current_progress_remaining)
+
+        entropy_losses, pg_losses, value_losses, kl_losses = [], [], [], []
+        clip_fractions = []
+        continue_training = True
+
+        for epoch in range(self.n_epochs):
+            approx_kl_divs = []
+            for rollout_data in self.rollout_buffer.get(self.batch_size):
+                actions = rollout_data.actions
+                if isinstance(self.action_space, th.distributions.Categorical.__bases__):
+                    actions = rollout_data.actions.long().flatten()
+
+                values, log_prob, entropy = self.policy.evaluate_actions(
+                    rollout_data.observations, actions)
+                values = values.flatten()
+                advantages = rollout_data.advantages
+                if self.normalize_advantage and len(advantages) > 1:
+                    advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
+
+                ratio = th.exp(log_prob - rollout_data.old_log_prob)
+                policy_loss_1 = advantages * ratio
+                policy_loss_2 = advantages * th.clamp(ratio, 1 - clip_range, 1 + clip_range)
+                policy_loss = -th.min(policy_loss_1, policy_loss_2).mean()
+                pg_losses.append(policy_loss.item())
+                clip_fraction = th.mean((th.abs(ratio - 1) > clip_range).float()).item()
+                clip_fractions.append(clip_fraction)
+
+                if self.clip_range_vf is None:
+                    values_pred = values
+                else:
+                    values_pred = rollout_data.old_values + th.clamp(
+                        values - rollout_data.old_values, -clip_range_vf, clip_range_vf)
+                value_loss = F.mse_loss(rollout_data.returns, values_pred)
+                value_losses.append(value_loss.item())
+
+                if entropy is None:
+                    entropy_loss = -th.mean(-log_prob)
+                else:
+                    entropy_loss = -th.mean(entropy)
+                entropy_losses.append(entropy_loss.item())
+
+                # KL-to-reference: closed-form KL between two diagonal
+                # Gaussians, summed over the action axis, mean over batch.
+                if self.ref_policy is None:
+                    raise RuntimeError("KLPPO.train called without ref_policy")
+                with th.no_grad():
+                    ref_dist = self.ref_policy.get_distribution(rollout_data.observations)
+                pi_dist = self.policy.get_distribution(rollout_data.observations)
+                kl_div = th.distributions.kl.kl_divergence(
+                    pi_dist.distribution, ref_dist.distribution).sum(dim=-1).mean()
+                kl_losses.append(kl_div.item())
+
+                loss = (policy_loss
+                        + self.ent_coef * entropy_loss
+                        + self.vf_coef * value_loss
+                        + self.kl_coef * kl_div)
+
+                with th.no_grad():
+                    log_ratio = log_prob - rollout_data.old_log_prob
+                    approx_kl_div = th.mean((th.exp(log_ratio) - 1) - log_ratio).cpu().numpy()
+                    approx_kl_divs.append(approx_kl_div)
+
+                if self.target_kl is not None and approx_kl_div > 1.5 * self.target_kl:
+                    continue_training = False
+                    if self.verbose >= 1:
+                        print(f"Early stopping at step {epoch} due to reaching max kl: {approx_kl_div:.2f}")
+                    break
+
+                self.policy.optimizer.zero_grad()
+                loss.backward()
+                th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
+                self.policy.optimizer.step()
+
+            self._n_updates += 1
+            if not continue_training:
+                break
+
+        explained_var = self._explained_variance()
+        self.logger.record("train/entropy_loss", float(np.mean(entropy_losses)))
+        self.logger.record("train/policy_gradient_loss", float(np.mean(pg_losses)))
+        self.logger.record("train/value_loss", float(np.mean(value_losses)))
+        self.logger.record("train/kl_to_reference", float(np.mean(kl_losses)))
+        self.logger.record("train/approx_kl", float(np.mean(approx_kl_divs)))
+        self.logger.record("train/clip_fraction", float(np.mean(clip_fractions)))
+        self.logger.record("train/explained_variance", float(explained_var))
+        if hasattr(self.policy, "log_std"):
+            self.logger.record("train/std", th.exp(self.policy.log_std).mean().item())
+
+    def _explained_variance(self) -> float:
+        y_pred = self.rollout_buffer.values.flatten()
+        y_true = self.rollout_buffer.returns.flatten()
+        var_y = np.var(y_true)
+        return float("nan") if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y
+
+
+# --------------------------------------------------------------------
+# Main
+# --------------------------------------------------------------------
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--bc", default="training/runs/bc",
+                        help="Directory containing the BC initialisation.")
+    parser.add_argument("--out", default="training/runs/rl",
+                        help="Where to save the fine-tuned policy.")
+    parser.add_argument("--total-timesteps", type=int, default=2_000_000)
+    parser.add_argument("--n-envs", type=int, default=8)
+    parser.add_argument("--learning-rate", type=float, default=5e-5)
+    parser.add_argument("--kl-coef", type=float, default=0.05,
+                        help="Coefficient of the KL-to-reference penalty.")
+    parser.add_argument("--log-std", type=float, default=-1.5,
+                        help="Initial (and frozen) log_std for exploration.")
+    parser.add_argument("--freeze-log-std", action="store_true", default=True)
+    parser.add_argument("--n-steps", type=int, default=2048)
+    parser.add_argument("--batch-size", type=int, default=256)
+    parser.add_argument("--n-epochs", type=int, default=10)
+    parser.add_argument("--gamma", type=float, default=0.995)
+    parser.add_argument("--gae-lambda", type=float, default=0.95)
+    parser.add_argument("--clip-range", type=float, default=0.1)
+    parser.add_argument("--ent-coef", type=float, default=0.0)
+    parser.add_argument("--target-kl", type=float, default=0.02,
+                        help="SB3 per-batch KL early-stop guard.")
+    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument("--device", default="cpu")
+    parser.add_argument("--drive-mode", default=None,
+                        choices=["differential", "mecanum"],
+                        help="Drive mode. If not set, inferred from "
+                             "BC action dimension (2→differential, 3→mecanum).")
+    parser.add_argument("--imitate-weight", type=float, default=None,
+                        help="Override env.W_IMITATE (e.g. 0.0 to drop "
+                             "Strömbom imitation during fine-tune).")
+    parser.add_argument("--time-weight", type=float, default=None,
+                        help="Override env.W_TIME (e.g. -0.1 for a "
+                             "per-step time penalty).")
+    parser.add_argument("--difficulty", type=float, default=1.0,
+                        help="HerdingEnv difficulty for PPO rollouts. "
+                             "Must match eval (1.0) to avoid train/eval "
+                             "distribution mismatch.")
+    parser.add_argument("--max-n-sheep", type=int, default=10,
+                        help="Upper bound on flock size sampled each reset.")
+    parser.add_argument("--world", default=None,
+                        choices=["field", "field_round"],
+                        help="World shape. If not set, uses HERDING_WORLD "
+                             "env var or defaults to 'field'.")
+    args = parser.parse_args()
+    # --world was already honoured in the early pre-parse above; here we
+    # just sanity-check that the final argparse view agrees.
+    if args.world is not None:
+        from herding.world.geometry import FIELD_SHAPE as _CURRENT_SHAPE
+        if args.world != _CURRENT_SHAPE:
+            print(f"[rl] WARNING: --world={args.world} but geometry is "
+                  f"'{_CURRENT_SHAPE}'. File a bug.")
+
+    bc_zip = Path(args.bc) / "policy.zip"
+    if not bc_zip.exists():
+        raise SystemExit(
+            f"BC checkpoint not found at {bc_zip}. Train bc first with "
+            f"`python -m training.bc.pretrain`."
+        )
+
+    out = Path(args.out)
+    out.mkdir(parents=True, exist_ok=True)
+    (out / "checkpoints").mkdir(exist_ok=True)
+    (out / "best").mkdir(exist_ok=True)
+
+    # Infer frame_stack from the BC checkpoint's obs space.
+    ref_only = PPO.load(str(bc_zip), device=args.device)
+    obs_dim = int(ref_only.observation_space.shape[0])
+    if obs_dim % OBS_DIM != 0:
+        raise SystemExit(f"BC obs dim {obs_dim} is not a multiple of {OBS_DIM}.")
+    frame_stack = obs_dim // OBS_DIM
+    print(f"[rl] BC obs dim {obs_dim} → frame_stack={frame_stack}")
+
+    # Infer drive mode from BC action dim if not explicitly set.
+    bc_action_dim = int(ref_only.action_space.shape[0])
+    if args.drive_mode is not None:
+        drive_mode = args.drive_mode
+    elif bc_action_dim == 3:
+        drive_mode = "mecanum"
+    else:
+        drive_mode = "differential"
+    print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
+
+    env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
+                         difficulty=args.difficulty,
+                         max_n_sheep=args.max_n_sheep)
+               for i in range(args.n_envs)]
+    venv = SubprocVecEnv(env_fns) if args.n_envs > 1 else DummyVecEnv(env_fns)
+    eval_venv = DummyVecEnv([_make_env(99, args.seed + 999, frame_stack,
+                                       drive_mode,
+                                       difficulty=args.difficulty,
+                                       max_n_sheep=args.max_n_sheep)])
+    print(f"[rl] difficulty={args.difficulty} max_n_sheep={args.max_n_sheep}")
+
+    # Reward-shaping overrides (broadcast to every env instance).
+    def _broadcast(method: str, value):
+        for v in (venv, eval_venv):
+            try:
+                v.env_method(method, value)
+            except AttributeError:
+                v.venv.env_method(method, value)
+    if args.imitate_weight is not None:
+        _broadcast("set_imitate_weight", args.imitate_weight)
+        print(f"[rl] W_IMITATE overridden to {args.imitate_weight}")
+    if args.time_weight is not None:
+        _broadcast("set_time_weight", args.time_weight)
+        print(f"[rl] W_TIME overridden to {args.time_weight}")
+
+    # Build a fresh KLPPO at the right obs/action shape, then copy BC
+    # weights into both the trainable policy and the frozen reference.
+    model = KLPPO(
+        "MlpPolicy", venv,
+        ref_policy=None,  # filled in below
+        kl_coef=args.kl_coef,
+        learning_rate=args.learning_rate,
+        n_steps=args.n_steps,
+        batch_size=args.batch_size,
+        n_epochs=args.n_epochs,
+        gamma=args.gamma,
+        gae_lambda=args.gae_lambda,
+        clip_range=args.clip_range,
+        ent_coef=args.ent_coef,
+        target_kl=args.target_kl,
+        policy_kwargs=dict(
+            net_arch=dict(pi=[512, 512], vf=[512, 512]),
+            log_std_init=args.log_std,
+        ),
+        verbose=1,
+        seed=args.seed,
+        device=args.device,
+        tensorboard_log=str(out / "tb"),
+    )
+
+    # strict=False — the BC value head wasn't trained; PPO trains it.
+    bc_state = ref_only.policy.state_dict()
+    missing, unexpected = model.policy.load_state_dict(bc_state, strict=False)
+    print(f"[rl] BC → policy: missing={len(missing)} unexpected={len(unexpected)}")
+
+    ref_policy = type(model.policy)(
+        observation_space=model.observation_space,
+        action_space=model.action_space,
+        lr_schedule=lambda _: 0.0,
+        net_arch=dict(pi=[512, 512], vf=[512, 512]),
+        log_std_init=args.log_std,
+    ).to(args.device)
+    ref_policy.load_state_dict(bc_state, strict=False)
+    model.ref_policy = ref_policy
+    model.ref_policy.set_training_mode(False)
+    for p in model.ref_policy.parameters():
+        p.requires_grad = False
+
+    # Force both policies to the same log_std so the KL term measures
+    # mean drift only, not a std mismatch carried over from BC.
+    with th.no_grad():
+        model.policy.log_std.fill_(args.log_std)
+        model.ref_policy.log_std.fill_(args.log_std)
+    if args.freeze_log_std:
+        model.policy.log_std.requires_grad = False
+        print(f"[rl] log_std frozen at {args.log_std} (σ ≈ {np.exp(args.log_std):.3f})")
+
+    ckpt_cb = CheckpointCallback(
+        save_freq=max(1, 50_000 // args.n_envs),
+        save_path=str(out / "checkpoints"),
+        name_prefix="ppo",
+    )
+    # EvalCallback writes <save_path>/best_model.zip on every new best
+    # eval reward. We send it straight to ``out/`` and rename to
+    # ``policy.zip`` after training so the deployed file lives at the
+    # canonical path.
+    eval_cb = EvalCallback(
+        eval_venv,
+        best_model_save_path=str(out),
+        log_path=str(out / "evals"),
+        eval_freq=max(1, 20_000 // args.n_envs),
+        n_eval_episodes=5,
+        deterministic=True,
+    )
+
+    print(f"[rl] training: total_timesteps={args.total_timesteps} "
+          f"n_envs={args.n_envs} lr={args.learning_rate} kl_coef={args.kl_coef}")
+    model.learn(total_timesteps=args.total_timesteps,
+                callback=[ckpt_cb, eval_cb], progress_bar=True)
+
+    # Save the end-of-training state for debugging convergence behaviour.
+    model.save(out / "final.zip")
+
+    # Promote the EvalCallback's best-by-eval-reward snapshot to the
+    # canonical ``policy.zip`` (what the controller loads). Fall back
+    # to the final state if eval never recorded a "best".
+    import shutil
+    best_zip = out / "best_model.zip"
+    policy_zip = out / "policy.zip"
+    if best_zip.exists():
+        if policy_zip.exists():
+            policy_zip.unlink()
+        best_zip.rename(policy_zip)
+        print(f"[rl] best snapshot → {policy_zip}  (final state kept at {out/'final.zip'})")
+    else:
+        shutil.copy(out / "final.zip", policy_zip)
+        print(f"[rl] no best snapshot recorded; using final → {policy_zip}")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,211 +0,0 @@
-"""
-PPO training script for the herding task.
-
-Usage examples
--------------
-# Start fresh with curriculum (1 → 5 sheep):
-    python train.py --curriculum
-
-# Resume from checkpoint, skip directly to 3 sheep:
-    python train.py --resume runs/ppo_herding/ckpt_200000_steps.zip --n-sheep 3
-
-# Quick smoke-test (no curriculum, single env):
-    python train.py --n-envs 1 --total-steps 50000
-"""
-
-import argparse
-import os
-
-import numpy as np
-from stable_baselines3 import PPO
-from stable_baselines3.common.callbacks import (
-    BaseCallback,
-    CallbackList,
-    CheckpointCallback,
-    EvalCallback,
-)
-from stable_baselines3.common.vec_env import SubprocVecEnv, VecNormalize
-
-from herding_env import HerdingEnv
-
-
-# ---------------------------------------------------------------------------
-# Curriculum callback
-# ---------------------------------------------------------------------------
-
-class CurriculumCallback(BaseCallback):
-    """
-    Advances the curriculum (number of active sheep) when the rolling mean
-    episode success rate exceeds a threshold.
-
-    Success = episode terminated (all sheep penned) rather than truncated.
-    """
-
-    THRESHOLD   = 0.75   # success rate to graduate
-    WINDOW      = 100    # episodes to average over
-    MIN_EPISODES = 50    # don't graduate before seeing this many episodes
-
-    def __init__(self, start_sheep: int, max_sheep: int, verbose: int = 1):
-        super().__init__(verbose)
-        self.max_sheep  = max_sheep
-        self._successes = []
-        self._cur_sheep = start_sheep
-
-    def _on_step(self) -> bool:
-        for info, done in zip(self.locals["infos"], self.locals["dones"]):
-            if done:
-                truncated = info.get("TimeLimit.truncated", False)
-                self._successes.append(0 if truncated else 1)
-                if len(self._successes) > self.WINDOW:
-                    self._successes.pop(0)
-
-        if (self._cur_sheep < self.max_sheep
-                and len(self._successes) >= self.MIN_EPISODES
-                and np.mean(self._successes) >= self.THRESHOLD):
-            self._cur_sheep += 1
-            self.training_env.env_method("set_n_sheep", self._cur_sheep)
-            self._successes.clear()
-            if self.verbose:
-                print(f"\n[Curriculum] Advanced to {self._cur_sheep} sheep "
-                      f"at step {self.num_timesteps}\n")
-
-        return True
-
-
-# ---------------------------------------------------------------------------
-# Environment factory
-# ---------------------------------------------------------------------------
-
-def make_env(n_sheep: int, seed: int, max_steps: int):
-    def _init():
-        env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps)
-        env.reset(seed=seed)
-        return env
-    return _init
-
-
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-def parse_args():
-    p = argparse.ArgumentParser()
-    p.add_argument("--n-sheep",     type=int,   default=1,
-                   help="Starting number of sheep (or fixed count if no curriculum)")
-    p.add_argument("--max-sheep",   type=int,   default=5,
-                   help="Maximum sheep for curriculum (ignored without --curriculum)")
-    p.add_argument("--n-envs",      type=int,   default=8,
-                   help="Number of parallel environments")
-    p.add_argument("--total-steps", type=int,   default=5_000_000,
-                   help="Total environment steps to train for")
-    p.add_argument("--max-steps",   type=int,   default=2000,
-                   help="Episode step limit inside each env")
-    p.add_argument("--curriculum",  action="store_true",
-                   help="Enable automatic curriculum advancement")
-    p.add_argument("--resume",      type=str,   default=None,
-                   help="Path to a .zip checkpoint to resume training from")
-    p.add_argument("--run-dir",     type=str,   default="runs/ppo_herding",
-                   help="Output directory for checkpoints and logs")
-    p.add_argument("--save-freq",   type=int,   default=100_000,
-                   help="Checkpoint every N steps (per-env, not total)")
-    p.add_argument("--eval-freq",   type=int,   default=50_000,
-                   help="Evaluate every N steps")
-    p.add_argument("--eval-eps",    type=int,   default=20,
-                   help="Episodes per evaluation run")
-    return p.parse_args()
-
-
-def main():
-    args = parse_args()
-    os.makedirs(args.run_dir, exist_ok=True)
-    ckpt_dir = os.path.join(args.run_dir, "checkpoints")
-    best_dir = os.path.join(args.run_dir, "best_model")
-    norm_path = os.path.join(args.run_dir, "vecnorm.pkl")
-    os.makedirs(ckpt_dir, exist_ok=True)
-
-    # Training envs
-    train_env = SubprocVecEnv([
-        make_env(args.n_sheep, seed=i, max_steps=args.max_steps)
-        for i in range(args.n_envs)
-    ])
-    if args.resume and os.path.exists(norm_path):
-        train_env = VecNormalize.load(norm_path, train_env)
-        train_env.training = True
-        train_env.norm_reward = True
-    else:
-        train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True,
-                                 clip_obs=10.0)
-
-    # Eval env (no reward normalisation, deterministic)
-    eval_env = SubprocVecEnv([
-        make_env(args.n_sheep, seed=1000 + i, max_steps=args.max_steps)
-        for i in range(2)
-    ])
-    eval_env = VecNormalize(eval_env, norm_obs=True, norm_reward=False,
-                            clip_obs=10.0, training=False)
-
-    # Callbacks
-    checkpoint_cb = CheckpointCallback(
-        save_freq=max(args.save_freq // args.n_envs, 1),
-        save_path=ckpt_dir,
-        name_prefix="ckpt",
-        save_vecnormalize=True,
-    )
-    eval_cb = EvalCallback(
-        eval_env,
-        best_model_save_path=best_dir,
-        log_path=args.run_dir,
-        eval_freq=max(args.eval_freq // args.n_envs, 1),
-        n_eval_episodes=args.eval_eps,
-        deterministic=True,
-        verbose=1,
-    )
-    callbacks = [checkpoint_cb, eval_cb]
-    if args.curriculum:
-        callbacks.append(CurriculumCallback(start_sheep=args.n_sheep,
-                                            max_sheep=args.max_sheep))
-    callback_list = CallbackList(callbacks)
-
-    # Model
-    ppo_kwargs = dict(
-        policy          = "MlpPolicy",
-        env             = train_env,
-        learning_rate   = 3e-4,
-        n_steps         = 2048,
-        batch_size      = 256,
-        n_epochs        = 10,
-        gamma           = 0.995,
-        gae_lambda      = 0.95,
-        clip_range      = 0.2,
-        ent_coef        = 0.005,
-        vf_coef         = 0.5,
-        max_grad_norm   = 0.5,
-        policy_kwargs   = dict(net_arch=[256, 256]),
-        tensorboard_log = args.run_dir,
-        verbose         = 1,
-    )
-
-    if args.resume:
-        print(f"Resuming from {args.resume}")
-        model = PPO.load(args.resume, env=train_env, **{
-            k: v for k, v in ppo_kwargs.items()
-            if k not in ("policy", "env")
-        })
-    else:
-        model = PPO(**ppo_kwargs)
-
-    model.learn(
-        total_timesteps=args.total_steps,
-        callback=callback_list,
-        reset_num_timesteps=args.resume is None,
-        tb_log_name="ppo",
-    )
-
-    # Save final artefacts
-    model.save(os.path.join(args.run_dir, "final_model"))
-    train_env.save(norm_path)
-    print(f"\nTraining complete. Artefacts saved to {args.run_dir}/")
-
-
-if __name__ == "__main__":
-    main()
@@ -10,7 +10,7 @@ EXTERNPROTO "../protos/Sheep.proto"
 # World
 WorldInfo {
  info [
-    "RL-Based Autonomous Shepherd Robot"
+    "Autonomous Shepherd Robot (Strömbom)"
    "Group G25"
  ]
  title "Shepherd Herding"
@@ -106,19 +106,26 @@ Solid { translation -2.5 -15 0.84 children [ Shape { appearance USE CAP geometry
 Solid { translation 14 -15 0.40 children [ Shape { appearance USE STONE_A geometry Box { size 2.0 0.16 0.80 } } ] boundingObject Box { size 2.0 0.16 0.80 } }
 Solid { translation 14 -15 0.84 children [ Shape { appearance USE CAP geometry Box { size 2.1 0.26 0.07 } } ] boundingObject Box { size 2.1 0.26 0.07 } }
 # Gate posts
-Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
-Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
-# Outer gate (wooden, slightly ajar, Z-brace)
-Solid { translation 11.5 -15.08 0.55 rotation 0 0 1 0.25 children [
+Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
+Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
+# Outer gate — fully open, hinged on the west gate post. Modeled as a swung-back
+# wooden gate parallel to the south wall, on the west side, so the 3m corridor
+# between gate posts (x=10..13, y=-15) is unobstructed.
+Solid { translation 8.6 -15.05 0.55 rotation 0 0 1 0 children [
  Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
-  Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [ Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } } ] }
+  # FPOST appearance DEF lives here so the external pen below can USE it.
+  Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [
+    Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } }
+  ] }
 ] boundingObject Box { size 2.80 0.08 1.00 } }

-# ==================== QUARANTINE PEN (wooden post-and-rail fence, inside field) ====================
-# Flow: main field → inner gate → quarantine area → outer gate → outside
+# ==================== EXTERNAL PEN (south of field, accessed through south-wall gate) ====================
+# Flow: main field → south-wall gate (x ∈ [10, 13], y = -15) → external pen
+# The pen is a wooden post-and-rail rectangle south of the field, x ∈ [10, 13],
+# y ∈ [-22, -15], open on the north side (the gate hole is the entrance).

-# West wall (x=10, ~7m along Y)
-Solid { translation 10 -11.46 0.55 children [
+# Pen west wall (x=10, y from -22 to -15, length 7m)
+Solid { translation 10 -18.5 0.55 children [
  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
@@ -130,8 +137,8 @@ Solid { translation 10 -11.46 0.55 children [
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
 ] boundingObject Box { size 0.14 6.92 1.10 } }

-# East wall (x=13)
-Solid { translation 13 -11.46 0.55 children [
+# Pen east wall (x=13, y from -22 to -15, length 7m)
+Solid { translation 13 -18.5 0.55 children [
  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
@@ -143,39 +150,50 @@ Solid { translation 13 -11.46 0.55 children [
  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
 ] boundingObject Box { size 0.14 6.92 1.10 } }

-# North wall - open entrance (no wall, just corner posts)
-Solid { translation 10 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
-Solid { translation 13 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
+# Pen south wall (y=-22, x from 10 to 13, length 3m, closes the back of the pen)
+Solid { translation 11.5 -22 0.55 children [
+  Transform { translation -1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation  0   0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation  1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
+  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
+  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
+  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 2.92 0.14 0.04 } } ] }
+] boundingObject Box { size 2.92 0.14 1.10 } }
+
+# Pen north corner posts at the gate opening (no wall — sheep enter here from the field)
+Solid { translation 10 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+Solid { translation 13 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }

 # Corner pillars
-Solid { translation  15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
-Solid { translation  15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
-Solid { translation -15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
-Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
+Solid { translation  15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
+Solid { translation  15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
+Solid { translation -15  15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
+Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }

 # Mid-pillars every 5 m — East
-Solid { translation  15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation  15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation  15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation  15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation  15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation  15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation  15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation  15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation  15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # West
-Solid { translation -15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation -15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation -15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation -15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation -15  10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -15   5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -15   0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -15  -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # North
-Solid { translation  10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation   5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation   0  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation  -5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation -10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation  10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation   5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation   0  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation  -5  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -10  15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
 # South
-Solid { translation   5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation   0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation  -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
-Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
+Solid { translation   5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation   0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation  -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }

 # ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
 # Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
@@ -503,28 +521,16 @@ ShepherdDog {
 }

 # ==================== SHEEP ====================
-Sheep {
-  translation 3 2 0.5
-  name "sheep1"
-  controller "sheep"
-}
-Sheep {
-  translation 3 -2 0.5
-  name "sheep2"
-  controller "sheep"
-}
-Sheep {
-  translation 4 0 0.5
-  name "sheep3"
-  controller "sheep"
-}
-Sheep {
-  translation 3.5 1 0.5
-  name "sheep4"
-  controller "sheep"
-}
-Sheep {
-  translation 3.5 -1 0.5
-  name "sheep5"
-  controller "sheep"
-}
+# Up to 10 sheep, scattered through the field's central/north zone. Comment
+# out trailing slots to test smaller flock sizes; the dog policy is trained
+# to handle 1..10 sheep so any prefix works.
+Sheep { translation  3.0  2.0 0.5 name "sheep1"  controller "sheep" }
+Sheep { translation  3.0 -2.0 0.5 name "sheep2"  controller "sheep" }
+Sheep { translation  4.0  0.0 0.5 name "sheep3"  controller "sheep" }
+Sheep { translation -3.0  4.0 0.5 name "sheep4"  controller "sheep" }
+Sheep { translation -5.0 -2.0 0.5 name "sheep5"  controller "sheep" }
+Sheep { translation  6.0  5.0 0.5 name "sheep6"  controller "sheep" }
+Sheep { translation -6.0  6.0 0.5 name "sheep7"  controller "sheep" }
+Sheep { translation  0.0  8.0 0.5 name "sheep8"  controller "sheep" }
+Sheep { translation -8.0  0.0 0.5 name "sheep9"  controller "sheep" }
+Sheep { translation  7.0 -4.0 0.5 name "sheep10" controller "sheep" }
@@ -0,0 +1,537 @@
+#VRML_SIM R2025a utf8
+
+EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/backgrounds/protos/TexturedBackground.proto"
+EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/backgrounds/protos/TexturedBackgroundLight.proto"
+EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/floors/protos/UnevenTerrain.proto"
+EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/appearances/protos/Grass.proto"
+EXTERNPROTO "../protos/ShepherdDog.proto"
+EXTERNPROTO "../protos/Sheep.proto"
+
+# World
+WorldInfo {
+  info [
+    "Autonomous Shepherd Robot (Strömbom)"
+    "Group G25"
+  ]
+  title "Shepherd Herding (Round)"
+  ERP 0.62
+  basicTimeStep 16
+  contactProperties [
+    ContactProperties {
+      coulombFriction [
+        12
+      ]
+      softCFM 1e-05
+    }
+  ]
+}
+
+# Viewpoint
+DEF VIEWPOINT Viewpoint {
+  position 4.34 -100.99 41.73
+  orientation 0.199 -0.190 -0.961 4.624
+  fieldOfView 0.785
+}
+
+# Background
+Background {
+  skyColor [0.55 0.75 0.95]
+}
+# Single sun (diagonal from SW)
+DirectionalLight {
+  ambientIntensity 1
+  direction -0.3 0.5 -0.85
+  color 1 0.98 0.92
+  intensity 2.5
+  castShadows TRUE
+}
+
+# Grass terrain
+UnevenTerrain {
+  rotation 0 0 1 -1.5708
+  size 100 100 0.3
+  xDimension 50
+  yDimension 50
+  appearance Grass {
+    colorOverride 0.78 0.88 0.68
+    textureTransform TextureTransform {
+      scale 100 100
+    }
+  }
+  perlinNOctaves 4
+}
+
+# ==================== APPEARANCES ====================
+Transform {
+  children [
+    Shape { appearance DEF STONE_A    PBRAppearance { baseColor 0.48 0.45 0.40 roughness 0.95 metalness 0 } }
+    Shape { appearance DEF STONE_B    PBRAppearance { baseColor 0.36 0.33 0.29 roughness 0.95 metalness 0 } }
+    Shape { appearance DEF STONE_C    PBRAppearance { baseColor 0.58 0.55 0.50 roughness 0.90 metalness 0 } }
+    Shape { appearance DEF CAP        PBRAppearance { baseColor 0.54 0.51 0.46 roughness 0.80 metalness 0 } }
+    Shape { appearance DEF BARN_RED   PBRAppearance { baseColor 0.62 0.18 0.12 roughness 0.80 metalness 0 } }
+    Shape { appearance DEF BARN_ROOF  PBRAppearance { baseColor 0.28 0.20 0.13 roughness 0.72 metalness 0 } }
+    Shape { appearance DEF WOOD       PBRAppearance { baseColor 0.48 0.32 0.16 roughness 0.90 metalness 0 } }
+    Shape { appearance DEF TRUNK      PBRAppearance { baseColor 0.38 0.24 0.11 roughness 0.90 metalness 0 } }
+    Shape { appearance DEF LEAF_A     PBRAppearance { baseColor 0.22 0.52 0.16 roughness 0.85 metalness 0 } }
+    Shape { appearance DEF LEAF_B     PBRAppearance { baseColor 0.16 0.42 0.10 roughness 0.85 metalness 0 } }
+    Shape { appearance DEF LEAF_C     PBRAppearance { baseColor 0.30 0.60 0.20 roughness 0.80 metalness 0 } }
+    Shape { appearance DEF STRAW      PBRAppearance { baseColor 0.85 0.75 0.35 roughness 0.95 metalness 0 } }
+    Shape { appearance DEF HAT        PBRAppearance { baseColor 0.50 0.35 0.18 roughness 0.85 metalness 0 } }
+    Shape { appearance DEF SHIRT      PBRAppearance { baseColor 0.60 0.30 0.30 roughness 0.80 metalness 0 } }
+    Shape { appearance DEF PANTS      PBRAppearance { baseColor 0.25 0.25 0.30 roughness 0.80 metalness 0 } }
+    Shape { appearance DEF DOOR_MAT   PBRAppearance { baseColor 0.55 0.38 0.20 roughness 0.72 metalness 0 } }
+    Shape { appearance DEF GLASS      PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } }
+    Shape { appearance DEF HAY        PBRAppearance { baseColor 0.82 0.72 0.32 roughness 0.95 metalness 0 } }
+  ]
+}
+DEF TRIM       PBRAppearance { baseColor 0.90 0.88 0.82 roughness 0.70 metalness 0 }
+
+# ==================== CIRCULAR STONE WALL (R=15 m) ====================
+
+Solid { translation 15.00 0.00 0.40 rotation 0 0 1 -1.5708 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 15.00 0.00 0.84 rotation 0 0 1 -1.5708 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation 14.10 5.13 0.40 rotation 0 0 1 -1.2217 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 14.10 5.13 0.84 rotation 0 0 1 -1.2217 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation 11.49 9.64 0.40 rotation 0 0 1 -0.8727 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 11.49 9.64 0.84 rotation 0 0 1 -0.8727 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation 7.50 12.99 0.40 rotation 0 0 1 -0.5236 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 7.50 12.99 0.84 rotation 0 0 1 -0.5236 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation 2.60 14.77 0.40 rotation 0 0 1 -0.1745 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 2.60 14.77 0.84 rotation 0 0 1 -0.1745 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -2.60 14.77 0.40 rotation 0 0 1 0.1745 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -2.60 14.77 0.84 rotation 0 0 1 0.1745 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -7.50 12.99 0.40 rotation 0 0 1 0.5236 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -7.50 12.99 0.84 rotation 0 0 1 0.5236 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -11.49 9.64 0.40 rotation 0 0 1 0.8727 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -11.49 9.64 0.84 rotation 0 0 1 0.8727 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -14.10 5.13 0.40 rotation 0 0 1 1.2217 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -14.10 5.13 0.84 rotation 0 0 1 1.2217 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -15.00 0.00 0.40 rotation 0 0 1 1.5708 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -15.00 0.00 0.84 rotation 0 0 1 1.5708 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -14.10 -5.13 0.40 rotation 0 0 1 1.9199 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -14.10 -5.13 0.84 rotation 0 0 1 1.9199 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -11.49 -9.64 0.40 rotation 0 0 1 2.2689 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -11.49 -9.64 0.84 rotation 0 0 1 2.2689 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -7.50 -12.99 0.40 rotation 0 0 1 2.6180 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation -7.50 -12.99 0.84 rotation 0 0 1 2.6180 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation -3.37 -14.62 0.40 rotation 0 0 1 2.9671 children [ Shape { appearance USE STONE_A geometry Box { size 3.65 0.16 0.80 } } ] boundingObject Box { size 3.65 0.16 0.80 } }
+Solid { translation -3.37 -14.62 0.84 rotation 0 0 1 2.9671 children [ Shape { appearance USE CAP geometry Box { size 3.7 0.26 0.07 } } ] boundingObject Box { size 3.7 0.26 0.07 } }
+Solid { translation 3.37 -14.62 0.40 rotation 0 0 1 3.3161 children [ Shape { appearance USE STONE_A geometry Box { size 3.65 0.16 0.80 } } ] boundingObject Box { size 3.65 0.16 0.80 } }
+Solid { translation 3.37 -14.62 0.84 rotation 0 0 1 3.3161 children [ Shape { appearance USE CAP geometry Box { size 3.7 0.26 0.07 } } ] boundingObject Box { size 3.7 0.26 0.07 } }
+Solid { translation 7.50 -12.99 0.40 rotation 0 0 1 3.6652 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 7.50 -12.99 0.84 rotation 0 0 1 3.6652 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation 11.49 -9.64 0.40 rotation 0 0 1 4.0143 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 11.49 -9.64 0.84 rotation 0 0 1 4.0143 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+Solid { translation 14.10 -5.13 0.40 rotation 0 0 1 4.3633 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
+Solid { translation 14.10 -5.13 0.84 rotation 0 0 1 4.3633 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
+
+# Gate posts
+Solid { translation -1.57 -14.92 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
+Solid { translation 1.57 -14.92 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
+# Outer gate — swung-back beside west gate post
+Solid { translation -2.97 -14.92 0.55 rotation 0 0 1 0 children [
+  Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
+  Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [
+    Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } }
+  ] }
+] boundingObject Box { size 2.80 0.08 1.00 } }
+
+# Pillars between wall sections
+Solid { translation 14.97 2.64 0.53 rotation 0 0 1 0.9599 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 13.16 7.60 0.53 rotation 0 0 1 1.3090 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 9.77 11.64 0.53 rotation 0 0 1 1.6581 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 5.20 14.28 0.53 rotation 0 0 1 2.0071 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 0.00 15.20 0.53 rotation 0 0 1 2.3562 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -5.20 14.28 0.53 rotation 0 0 1 2.7053 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -9.77 11.64 0.53 rotation 0 0 1 3.0543 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -13.16 7.60 0.53 rotation 0 0 1 3.4034 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -14.97 2.64 0.53 rotation 0 0 1 3.7525 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -14.97 -2.64 0.53 rotation 0 0 1 4.1015 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -13.16 -7.60 0.53 rotation 0 0 1 4.4506 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -9.77 -11.64 0.53 rotation 0 0 1 4.7997 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation -5.20 -14.28 0.53 rotation 0 0 1 5.1487 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 5.20 -14.28 0.53 rotation 0 0 1 5.8469 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 9.77 -11.64 0.53 rotation 0 0 1 6.1959 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 13.16 -7.60 0.53 rotation 0 0 1 6.5450 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+Solid { translation 14.97 -2.64 0.53 rotation 0 0 1 6.8941 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
+
+# ==================== EXTERNAL PEN (south of round field gate) ====================
+# Pen west wall
+Solid { translation -1.57 -18.5 0.55 children [
+  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
+  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
+  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
+  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
+] boundingObject Box { size 0.14 6.92 1.10 } }
+# Pen east wall
+Solid { translation 1.57 -18.5 0.55 children [
+  Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
+  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
+  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
+  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
+] boundingObject Box { size 0.14 6.92 1.10 } }
+# Pen south wall
+Solid { translation 0.00 -22 0.55 children [
+  Transform { translation -1.52 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation  0   0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation  1.52 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+  Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
+  Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
+  Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
+  Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 3.16 0.14 0.04 } } ] }
+] boundingObject Box { size 3.16 0.14 1.10 } }
+# Pen north corner posts at the gate opening
+Solid { translation -1.57 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+Solid { translation 1.57 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
+
+# Gate width: 3.14 m (pen x: [-1.57, 1.57])
+
+# ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
+# Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
+Solid {
+  translation 18.5 25.49 2
+  children [
+    Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 10 7 4 } }
+    # Gambrel roof
+    Transform { translation -3.5 0 3.05 rotation 0 1 0 -0.611 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 3.9 7.2 0.18 } } ] }
+    Transform { translation 3.5 0 3.05 rotation 0 1 0 0.611 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 3.9 7.2 0.18 } } ] }
+    Transform { translation -1.0 0 4.55 rotation 0 1 0 -0.422 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 2.5 7.2 0.18 } } ] }
+    Transform { translation 1.0 0 4.55 rotation 0 1 0 0.422 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 2.5 7.2 0.18 } } ] }
+    Transform { translation 0 0 5.04 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 1.6 7.2 0.22 } } ] }
+    # South gable fill
+    Transform { translation 0 -3.57 2.40 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 8.8 0.16 0.80 } } ] }
+    Transform { translation 0 -3.57 3.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 6.8 0.16 0.70 } } ] }
+    Transform { translation 0 -3.57 3.70 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 5.1 0.16 0.60 } } ] }
+    Transform { translation 0 -3.57 4.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 4.0 0.16 0.40 } } ] }
+    Transform { translation 0 -3.57 4.42 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 2.7 0.16 0.60 } } ] }
+    Transform { translation 0 -3.57 4.84 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 0.9 0.16 0.36 } } ] }
+    # North gable fill
+    Transform { translation 0  3.57 2.40 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 8.8 0.16 0.80 } } ] }
+    Transform { translation 0  3.57 3.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 6.8 0.16 0.70 } } ] }
+    Transform { translation 0  3.57 3.70 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 5.1 0.16 0.60 } } ] }
+    Transform { translation 0  3.57 4.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 4.0 0.16 0.40 } } ] }
+    Transform { translation 0  3.57 4.42 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 2.7 0.16 0.60 } } ] }
+    Transform { translation 0  3.57 4.84 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 0.9 0.16 0.36 } } ] }
+    # Double barn doors (south face)
+    Transform {
+      translation 0 -3.51 -0.50
+      children [
+        Shape { appearance PBRAppearance { baseColor 0.44 0.30 0.14 roughness 0.88 metalness 0 } geometry Box { size 2.8 0.10 3.0 } }
+        Transform { rotation 0 0 1  0.83 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.10 0.12 3.75 } } ] }
+        Transform { rotation 0 0 1 -0.83 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.10 0.12 3.75 } } ] }
+        Transform { translation -1.45 0  0    children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.12 0.14 3.24 } } ] }
+        Transform { translation  1.45 0  0    children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.12 0.14 3.24 } } ] }
+        Transform { translation  0    0  1.62 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 3.04 0.14 0.14 } } ] }
+      ]
+    }
+    # Windows
+    Transform { translation -3.6 -3.52 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 1.40 0.12 1.10 } } ] }
+    Transform { translation  3.6 -3.52 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 1.40 0.12 1.10 } } ] }
+    Transform { translation 5.06  2.0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 0.12 1.20 1.0 } } ] }
+    Transform { translation 5.06 -2.0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 0.12 1.20 1.0 } } ] }
+    Transform { translation 0 -3.52 3.90 children [ Shape { appearance PBRAppearance { baseColor 0.44 0.30 0.14 roughness 0.88 metalness 0 } geometry Box { size 1.30 0.12 1.00 } } ] }
+  ]
+  boundingObject Box { size 10 7 7 }
+}
+
+# ==================== BARN 3 — Red barn (NE, outside fence, gate facing fence) ====================
+# Body 7×9×3.5, red walls, steep dark roof
+Solid {
+  translation 29.76 9.52 1.75
+  rotation 0 0 1 -1.5708
+  children [
+    Shape { appearance USE BARN_RED geometry Box { size 7 9 3.5 } }
+    # Roof
+    Transform { translation -2.0 0 3.0 rotation 0 1 0 -0.70 children [ Shape { appearance USE BARN_ROOF geometry Box { size 4.2 9.2 0.20 } } ] }
+    Transform { translation 2.0 0 3.0 rotation 0 1 0 0.70 children [ Shape { appearance USE BARN_ROOF geometry Box { size 4.2 9.2 0.20 } } ] }
+    Transform { translation 0 0 4.28 children [ Shape { appearance USE BARN_ROOF geometry Box { size 2.0 9.2 0.24 } } ] }
+    # South gable fill
+    Transform { translation 0 -4.52 2.05 children [ Shape { appearance USE BARN_RED geometry Box { size 6.2 0.16 0.60 } } ] }
+    Transform { translation 0 -4.52 2.65 children [ Shape { appearance USE BARN_RED geometry Box { size 4.5 0.16 0.60 } } ] }
+    Transform { translation 0 -4.52 3.25 children [ Shape { appearance USE BARN_RED geometry Box { size 2.9 0.16 0.60 } } ] }
+    Transform { translation 0 -4.52 3.85 children [ Shape { appearance USE BARN_RED geometry Box { size 1.2 0.16 0.60 } } ] }
+    # North gable fill
+    Transform { translation 0  4.52 2.05 children [ Shape { appearance USE BARN_RED geometry Box { size 6.2 0.16 0.60 } } ] }
+    Transform { translation 0  4.52 2.65 children [ Shape { appearance USE BARN_RED geometry Box { size 4.5 0.16 0.60 } } ] }
+    Transform { translation 0  4.52 3.25 children [ Shape { appearance USE BARN_RED geometry Box { size 2.9 0.16 0.60 } } ] }
+    Transform { translation 0  4.52 3.85 children [ Shape { appearance USE BARN_RED geometry Box { size 1.2 0.16 0.60 } } ] }
+    # Door
+    Transform {
+      translation 0 -4.52 -0.62
+      children [
+        Shape { appearance USE DOOR_MAT geometry Box { size 1.70 0.14 2.26 } }
+        Transform { translation 0 0 1.22 children [ Shape { appearance USE WOOD geometry Box { size 2.10 0.18 0.26 } } ] }
+        Transform { translation -0.90 0  0 children [ Shape { appearance USE WOOD geometry Box { size 0.24 0.18 2.52 } } ] }
+        Transform { translation  0.90 0  0 children [ Shape { appearance USE WOOD geometry Box { size 0.24 0.18 2.52 } } ] }
+        Transform { translation 0 0 -0.68 children [ Shape { appearance USE WOOD geometry Box { size 1.60 0.12 0.12 } } ] }
+        Transform { translation 0 0  0.30 children [ Shape { appearance USE WOOD geometry Box { size 1.60 0.12 0.12 } } ] }
+      ]
+    }
+    # Windows — south face
+    Transform { translation -2.2 -4.53 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.80 0.14 0.70 } } ] }
+    Transform { translation  2.2 -4.53 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.80 0.14 0.70 } } ] }
+    # East-face windows
+    Transform { translation 3.52  3.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
+    Transform { translation 3.52  0.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
+    Transform { translation 3.52 -3.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
+  ]
+  boundingObject Box { size 7 9 6 }
+}
+
+# ==================== TREES (outside fence) ====================
+
+# Tree A — large oak, SE
+Solid {
+  translation 20 -18 0
+  children [
+    Transform { translation 0 0 2.0 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.0 radius 0.30 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 4.6 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.6 subdivision 4 } } ] }
+    Transform { translation  1.2  0.6 5.6 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.9 subdivision 4 } } ] }
+    Transform { translation -1.0  0.9 5.3 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.7 subdivision 4 } } ] }
+    Transform { translation  0.4 -1.1 5.1 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.5 subdivision 4 } } ] }
+    Transform { translation -0.5 -0.4 6.2 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.0 subdivision 4 } } ] }
+  ]
+}
+
+# Tree B — medium, NE near barn
+Solid {
+  translation -8 26 0
+  children [
+    Transform { translation 0 0 1.7 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.4 radius 0.25 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 3.8 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 2.2 subdivision 4 } } ] }
+    Transform { translation  0.9 -0.7 4.7 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.6 subdivision 4 } } ] }
+    Transform { translation -0.6  0.8 4.4 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.4 subdivision 4 } } ] }
+  ]
+}
+
+# Tree C — large, NW
+Solid {
+  translation -23 20 0
+  children [
+    Transform { translation 0 0 2.3 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.6 radius 0.36 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 5.2 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 2.9 subdivision 4 } } ] }
+    Transform { translation  1.3  0.9 6.3 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.1 subdivision 4 } } ] }
+    Transform { translation -1.1  1.1 6.0 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.9 subdivision 4 } } ] }
+    Transform { translation  0.6 -1.3 5.8 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.6 subdivision 4 } } ] }
+  ]
+}
+
+# Tree D — small, SW
+Solid {
+  translation -20 -23 0
+  children [
+    Transform { translation 0 0 1.4 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.8 radius 0.20 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 3.2 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.9 subdivision 4 } } ] }
+    Transform { translation -0.7  0.6 4.0 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.4 subdivision 4 } } ] }
+    Transform { translation  0.6 -0.5 3.8 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.2 subdivision 4 } } ] }
+  ]
+}
+
+# Tree E — north cluster
+Solid {
+  translation 7 23 0
+  children [
+    Transform { translation 0 0 1.9 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.8 radius 0.27 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 4.1 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.3 subdivision 4 } } ] }
+    Transform { translation  1.0  0.5 5.0 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.7 subdivision 4 } } ] }
+    Transform { translation -0.6 -0.9 4.8 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.4 subdivision 4 } } ] }
+  ]
+}
+
+# Tree F — SW
+Solid {
+  translation -2.98 -22.8 0
+  children [
+    Transform { translation 0 0 1.3 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.6 radius 0.19 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 2.9 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.7 subdivision 4 } } ] }
+    Transform { translation  0.6  0.4 3.7 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.2 subdivision 4 } } ] }
+  ]
+}
+
+# Tree G — west side
+Solid {
+  translation -23 -5 0
+  children [
+    Transform { translation 0 0 2.0 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.0 radius 0.29 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 4.4 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 2.4 subdivision 4 } } ] }
+    Transform { translation -1.0  0.8 5.3 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.8 subdivision 4 } } ] }
+    Transform { translation  0.9 -0.7 5.0 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.6 subdivision 4 } } ] }
+  ]
+}
+
+# Tree H — east side
+Solid {
+  translation 21.35 -1.05 0
+  children [
+    Transform { translation 0 0 1.5 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.0 radius 0.22 subdivision 10 } } ] }
+    Transform { translation  0.0  0.0 3.4 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.0 subdivision 4 } } ] }
+    Transform { translation  0.7  0.6 4.2 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.4 subdivision 4 } } ] }
+    Transform { translation -0.5 -0.8 4.0 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.2 subdivision 4 } } ] }
+  ]
+}
+
+# ==================== SCARECROW (east side, outside fence) ====================
+Solid {
+  translation 20 -10 0
+  rotation 0 0 1 2.61799
+  children [
+    Transform { translation 0 0 1.22 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.44 radius 0.045 subdivision 8 } } ] }
+    Transform { translation 0 0 2.02 rotation 1 0 0 1.5708 children [ Shape { appearance USE TRUNK geometry Cylinder { height 1.60 radius 0.032 subdivision 8 } } ] }
+    Transform {
+      translation 0 0 2.44
+      children [
+        Shape { appearance USE STRAW geometry Sphere { radius 0.17 subdivision 3 } }
+        Transform { translation 0.13  0.05 0.06 children [ Shape { appearance PBRAppearance { baseColor 0.06 0.06 0.06 } geometry Sphere { radius 0.028 subdivision 2 } } ] }
+        Transform { translation 0.13 -0.05 0.06 children [ Shape { appearance PBRAppearance { baseColor 0.06 0.06 0.06 } geometry Sphere { radius 0.028 subdivision 2 } } ] }
+        Transform { translation 0.16 0 -0.02 rotation 0 1 0 1.5708 children [ Shape { appearance PBRAppearance { baseColor 0.75 0.50 0.30 } geometry Cone { height 0.07 bottomRadius 0.032 subdivision 6 } } ] }
+        Transform { translation 0.14  0.04 -0.06 children [ Shape { appearance PBRAppearance { baseColor 0.18 0.08 0.08 } geometry Box { size 0.01 0.04 0.01 } } ] }
+        Transform { translation 0.14 -0.04 -0.06 children [ Shape { appearance PBRAppearance { baseColor 0.18 0.08 0.08 } geometry Box { size 0.01 0.04 0.01 } } ] }
+      ]
+    }
+    Transform { translation 0 0 2.62 children [ Shape { appearance USE HAT geometry Cylinder { height 0.04 radius 0.28 subdivision 12 } } ] }
+    Transform { translation 0 0 2.80 children [ Shape { appearance USE HAT geometry Cylinder { height 0.30 radius 0.17 subdivision 10 } } ] }
+    Transform { translation 0 0 1.60 children [ Shape { appearance USE SHIRT geometry Box { size 0.20 0.40 0.46 } } ] }
+    Transform { translation 0 0 1.14 children [ Shape { appearance USE PANTS geometry Box { size 0.17 0.32 0.34 } } ] }
+    Transform { translation 0  0.68 2.03 rotation 0 0  1 0.25 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.24 0.03 } } ] }
+    Transform { translation 0 -0.68 2.03 rotation 0 0 -1 0.25 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.24 0.03 } } ] }
+    Transform { translation 0.10  0.08 1.82 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.03 0.14 } } ] }
+    Transform { translation 0.10 -0.08 1.82 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.03 0.14 } } ] }
+  ]
+}
+
+# ==================== HAY BALES (near barn) ====================
+Solid { translation 25.75 13.76 0.62 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
+Solid { translation 24.34 12.32 0.62 rotation -1 0 0 1.5708 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
+Solid { translation 24.28 13.79 0.62 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
+
+# ==================== TRACTOR (near barn) ====================
+Solid {
+  translation 17 19 0.18
+  rotation 0 0 1 1.9
+  children [
+    # Chassis
+    Transform { translation 0 0 0.35 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.20 0.20 roughness 0.6 metalness 0.3 } geometry Box { size 2.0 0.90 0.12 } } ] }
+    # Engine hood
+    Transform { translation 0.60 0 0.60 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.65 0.80 0.45 } } ] }
+    # Main body
+    Transform { translation -0.15 0 0.60 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.80 0.85 0.45 } } ] }
+    # Cabin
+    Transform { translation -0.20 0 0.95 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.75 0.80 0.45 } } ] }
+    # Cabin roof
+    Transform { translation -0.20 0 1.22 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.85 0.90 0.06 } } ] }
+    # Windshield
+    Transform { translation 0.12 0 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.02 0.55 0.35 } } ] }
+    # Rear window
+    Transform { translation -0.58 0 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.02 0.55 0.35 } } ] }
+    # Side windows
+    Transform { translation -0.20  0.40 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.55 0.02 0.30 } } ] }
+    Transform { translation -0.20 -0.40 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.55 0.02 0.30 } } ] }
+    # Seat
+    Transform { translation -0.25 0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.12 0.12 roughness 0.9 } geometry Box { size 0.30 0.35 0.06 } } ] }
+    # Exhaust stack
+    Transform { translation 0.50 0.30 0.60 children [
+      Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.4 metalness 0.6 } geometry Cylinder { height 0.90 radius 0.03 subdivision 6 } }
+      Transform { translation 0 0 0.50 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.20 0.20 roughness 0.4 metalness 0.6 } geometry Cylinder { height 0.04 radius 0.045 subdivision 6 } } ] }
+    ] }
+    # Rear axle
+    Transform { translation -0.45 0 0.40 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.08 1.15 0.08 } } ] }
+    # Front axle
+    Transform { translation 0.60 0 0.25 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.08 0.90 0.08 } } ] }
+    # Rear left wheel
+    Transform { translation -0.45  0.60 0.40 rotation 1 0 0 1.5708 children [
+      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.22 radius 0.40 subdivision 20 } }
+      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.24 radius 0.14 subdivision 10 } }
+    ] }
+    # Rear right wheel
+    Transform { translation -0.45 -0.60 0.40 rotation 1 0 0 1.5708 children [
+      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.22 radius 0.40 subdivision 20 } }
+      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.24 radius 0.14 subdivision 10 } }
+    ] }
+    # Front left wheel
+    Transform { translation 0.60  0.45 0.25 rotation 1 0 0 1.5708 children [
+      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.16 radius 0.25 subdivision 16 } }
+      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.18 radius 0.09 subdivision 8 } }
+    ] }
+    # Front right wheel
+    Transform { translation 0.60 -0.45 0.25 rotation 1 0 0 1.5708 children [
+      Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.16 radius 0.25 subdivision 16 } }
+      Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.18 radius 0.09 subdivision 8 } }
+    ] }
+    # Rear fenders
+    Transform { translation -0.45  0.50 0.72 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.50 0.12 0.20 } } ] }
+    Transform { translation -0.45 -0.50 0.72 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.50 0.12 0.20 } } ] }
+    # Front bumper
+    Transform { translation 0.95 0 0.35 children [ Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 roughness 0.7 metalness 0.3 } geometry Box { size 0.12 0.75 0.30 } } ] }
+    # Headlights
+    Transform { translation 0.97  0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.95 0.92 0.70 roughness 0.3 } geometry Sphere { radius 0.05 subdivision 3 } } ] }
+    Transform { translation 0.97 -0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.95 0.92 0.70 roughness 0.3 } geometry Sphere { radius 0.05 subdivision 3 } } ] }
+    # Taillights
+    Transform { translation -0.58  0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.80 0.10 0.10 roughness 0.4 } geometry Box { size 0.04 0.08 0.06 } } ] }
+    Transform { translation -0.58 -0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.80 0.10 0.10 roughness 0.4 } geometry Box { size 0.04 0.08 0.06 } } ] }
+    # Drawbar hitch
+    Transform { translation -0.95 0 0.20 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.12 0.06 0.06 } } ] }
+  ]
+  boundingObject Box { size 2.2 1.4 1.3 }
+}
+
+# ==================== GRASS PATCHES (inside field, decorative) ====================
+Solid { translation -8 6 0.15 children [
+  Transform { translation  0.10  0.00 0 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.30 } } ] }
+  Transform { translation -0.05  0.12 0 rotation 0 0 1 0.4 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
+  Transform { translation  0.08 -0.10 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.28 } } ] }
+  Transform { translation -0.12  0.04 0 rotation 0 0 1 0.2 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.24 } } ] }
+] }
+Solid { translation 6 -9 0.15 children [
+  Transform { translation  0.08  0.06 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.28 } } ] }
+  Transform { translation -0.10  0.00 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.32 } } ] }
+  Transform { translation  0.02 -0.12 0 rotation 0 0 1 0.35 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.26 } } ] }
+  Transform { translation -0.06  0.10 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.22 } } ] }
+] }
+Solid { translation -3 11 0.15 children [
+  Transform { translation  0.06 -0.06 0 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.26 } } ] }
+  Transform { translation -0.08  0.08 0 rotation 0 0 1 0.3 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.30 } } ] }
+  Transform { translation  0.12  0.02 0 rotation 0 0 1 -0.25 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.28 } } ] }
+] }
+Solid { translation 10 8 0.15 children [
+  Transform { translation -0.07  0.05 0 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.24 } } ] }
+  Transform { translation  0.09 -0.07 0 rotation 0 0 1 0.4 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.28 } } ] }
+  Transform { translation  0.00  0.11 0 rotation 0 0 1 -0.2 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
+] }
+Solid { translation -11 -7 0.15 children [
+  Transform { translation  0.05  0.08 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.30 } } ] }
+  Transform { translation -0.09 -0.04 0 rotation 0 0 1 0.35 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.28 } } ] }
+  Transform { translation  0.10 -0.09 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.24 } } ] }
+  Transform { translation -0.03  0.12 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
+] }
+
+# ==================== SHEPHERD DOG ====================
+ShepherdDog {
+  translation 0 0 0.5
+  rotation 0 0 1 0
+  controller "shepherd_dog"
+}
+
+# ==================== SHEEP ====================
+# Up to 10 sheep, scattered through the field's central/north zone. Comment
+# out trailing slots to test smaller flock sizes; the dog policy is trained
+# to handle 1..10 sheep so any prefix works.
+Sheep { translation  3.0  2.0 0.5 name "sheep1"  controller "sheep" }
+Sheep { translation  3.0 -2.0 0.5 name "sheep2"  controller "sheep" }
+Sheep { translation  4.0  0.0 0.5 name "sheep3"  controller "sheep" }
+Sheep { translation -3.0  4.0 0.5 name "sheep4"  controller "sheep" }
+Sheep { translation -5.0 -2.0 0.5 name "sheep5"  controller "sheep" }
+Sheep { translation  6.0  5.0 0.5 name "sheep6"  controller "sheep" }
+Sheep { translation -6.0  6.0 0.5 name "sheep7"  controller "sheep" }
+Sheep { translation  0.0  8.0 0.5 name "sheep8"  controller "sheep" }
+Sheep { translation -8.0  0.0 0.5 name "sheep9"  controller "sheep" }
+Sheep { translation  7.0 -4.0 0.5 name "sheep10" controller "sheep" }
Author	SHA1	Message	Date
Johnny Fernandes	c61df91950	Checkpoint 10	2026-05-13 23:22:17 +01:00
Johnny Fernandes	aa598fcb83	Checkpoint 10	2026-05-13 23:14:16 +01:00
Johnny Fernandes	0f807003a5	Results from last checkpoint	2026-05-13 20:26:18 +00:00
Johnny Fernandes	683de740af	Checkpoint 9	2026-05-13 13:46:50 +01:00
Johnny Fernandes	be58ad2054	Results from last checkpoinr	2026-05-13 07:49:17 +00:00
Johnny Fernandes	5c2ee4bba5	Checkpoint 8	2026-05-12 22:41:03 +01:00
Johnny Fernandes	a01a5c9cef	Checkpoint 7	2026-05-11 12:21:51 +01:00
Johnny Fernandes	fce0e0c786	Checkpoint 6	2026-05-11 10:35:48 +01:00
Johnny Fernandes	b457155538	Checkpoint 5 - incomplete	2026-05-11 10:35:39 +01:00
Johnny Fernandes	6688325d89	Checkpoint 4	2026-05-11 00:42:52 +01:00
Johnny Fernandes	2a6db038df	Checkpoint 3	2026-05-10 12:46:14 +01:00
Johnny Fernandes	1bb9415414	Checkpoint 2	2026-05-07 22:00:10 +01:00
Johnny Fernandes	90aa3bbcb4	Checkpoint 1	2026-05-07 21:59:58 +01:00
Johnny Fernandes	80a314b9e9	Trying attention method	2026-04-26 22:32:13 +01:00
Johnny Fernandes	a2363d882f	Trying attention method	2026-04-26 22:28:43 +01:00
Johnny Fernandes	57b1735e1a	Mimics webots approach better + debug. Lucky number	2026-04-26 20:36:36 +01:00
Johnny Fernandes	deeae3193e	Mimics webots approach better + debug. Lucky number	2026-04-26 18:55:53 +01:00
Johnny Fernandes	1af7d03ce2	Mimic webots physics	2026-04-26 18:22:26 +01:00
Johnny Fernandes	8110fc3143	Run n3	2026-04-26 16:42:55 +00:00
Johnny Fernandes	ad185b4d7e	Approach v4 simpler version	2026-04-26 17:18:20 +01:00
Johnny Fernandes	27fe6d1bf5	Run v3	2026-04-26 16:01:30 +00:00
Johnny Fernandes	e2883212c5	Approach v3 w/ south penalty fix	2026-04-26 15:26:24 +01:00
Johnny Fernandes	11e13c6980	Approach v3 w/ south penalty	2026-04-26 14:55:13 +01:00
Johnny Fernandes	a561f8a697	Run v2	2026-04-26 13:32:48 +00:00
Johnny Fernandes	a44ddb7b08	Approach refinement	2026-04-26 12:59:04 +01:00
Johnny Fernandes	acf0810425	Test26_1200	2026-04-26 11:04:23 +00:00
Johnny Fernandes	3cfd6b5e81	Approach refinement	2026-04-26 02:55:14 +01:00
Johnny Fernandes	d1aab20322	Approach refinement	2026-04-26 02:19:10 +01:00
Johnny Fernandes	287743709a	Approach refinement	2026-04-26 02:02:25 +01:00
Johnny Fernandes	61f8a7db15	Cleanup and new approach	2026-04-26 01:50:01 +01:00
Johnny Fernandes	b031473758	Behaviour refinement - fence penalty	2026-04-26 01:09:50 +01:00
Johnny Fernandes	6253850620	Behaviour refinement - fence penalty	2026-04-25 23:42:02 +01:00
Johnny Fernandes	6612dbc1ba	Test25_2330	2026-04-25 22:32:06 +00:00
Johnny Fernandes	7b87908410	Behaviour refinement	2026-04-25 21:35:23 +01:00
Johnny Fernandes	e302c76886	Test25_2025	2026-04-25 19:25:39 +00:00
Johnny Fernandes	841f5fa520	Test25_2000	2026-04-25 19:17:40 +00:00
Johnny Fernandes	7bfb7d3aae	Sheep training flock _ improver	2026-04-25 18:46:41 +01:00
Johnny Fernandes	5005128c07	Test25_1820	2026-04-25 17:19:02 +00:00
Johnny Fernandes	16878c5a0b	Sheep training flock _ improver	2026-04-25 18:02:56 +01:00
Johnny Fernandes	75d030cb49	Test25_1800	2026-04-25 17:00:19 +00:00
Johnny Fernandes	cc6d72e472	Sheep training flock _ improver	2026-04-25 17:07:03 +01:00
Johnny Fernandes	3a5decb185	Test25_1700	2026-04-25 16:02:10 +00:00
Johnny Fernandes	75c5b7c014	Sheep training flock _ improver	2026-04-25 16:28:15 +01:00
Johnny Fernandes	4350c7d320	Test25_1600	2026-04-25 15:06:06 +00:00
Johnny Fernandes	cd7e62b1b2	Sheep training flock _ improver	2026-04-25 13:39:49 +01:00
Johnny Fernandes	9bbef28515	Sheep training flock _ improver	2026-04-25 13:30:37 +01:00
Johnny Fernandes	438fa1be1d	Sheep training flock _ improver	2026-04-25 13:24:52 +01:00
Johnny Fernandes	e7c1d82f5c	Test25_1315	2026-04-25 12:14:36 +00:00
Johnny Fernandes	f889dc78cc	Sheep training flock _ improver	2026-04-25 12:50:06 +01:00
Johnny Fernandes	19bfac9bd9	Test25_1245	2026-04-25 11:47:37 +00:00
Johnny Fernandes	02b20fbdb4	Sheep training flock _ improver	2026-04-25 12:20:42 +01:00
Johnny Fernandes	433652cb94	Test25_1215	2026-04-25 11:16:12 +00:00
Johnny Fernandes	fbe76a0d04	Sheep training flock _ improver	2026-04-25 11:31:39 +01:00
Johnny Fernandes	062de676c9	Test25_0030	2026-04-24 23:37:03 +00:00
Johnny Fernandes	7d5725cc3e	Sheep training flock _ improver	2026-04-25 00:18:01 +01:00
Johnny Fernandes	5a61a424ee	Test25_0010	2026-04-24 23:10:33 +00:00
Johnny Fernandes	c029c3fc6c	Sheep training flock _ improver	2026-04-24 23:51:47 +01:00
Johnny Fernandes	b77f36b713	Sheep training flock _ improver	2026-04-24 23:38:09 +01:00
Johnny Fernandes	0716c6c3c8	Sheep training flock _ improver	2026-04-24 23:27:05 +01:00
Johnny Fernandes	b3251fcca3	Sheep training flock _ improver	2026-04-24 22:46:51 +01:00
Johnny Fernandes	d599181d22	Sheep training flock _ improver	2026-04-24 21:29:44 +01:00
Johnny Fernandes	8b54b2a934	Test24_2120	2026-04-24 20:21:53 +00:00
Johnny Fernandes	eb29cdf402	Test24_2100	2026-04-24 20:08:25 +00:00
Johnny Fernandes	36b3216c5f	Sheep training flock of 10 fix?	2026-04-24 19:05:41 +01:00
Johnny Fernandes	7bb545eab6	Sheep training flock of 10 fix?	2026-04-24 19:03:18 +01:00
Johnny Fernandes	efe996a5a9	Test24_1900	2026-04-24 18:00:20 +00:00
Johnny Fernandes	3bac24f406	Sheep training flock of 10 fix?	2026-04-24 18:29:23 +01:00
Johnny Fernandes	fc961e651c	Sheep training flock of 10 fix?	2026-04-24 18:06:22 +01:00
Johnny Fernandes	65d881aa0f	Test24_1800	2026-04-24 17:00:14 +00:00
Johnny Fernandes	bf9fe902d9	Sheep training flock of 10 fix?	2026-04-24 17:49:42 +01:00
Johnny Fernandes	4d7f365358	Sheep training flock of 10 fix?	2026-04-24 17:31:11 +01:00
Johnny Fernandes	c2da9c10e4	Test24_1725	2026-04-24 16:24:54 +00:00
Johnny Fernandes	d8b4e2c042	Sheep training flock of 10 fix?	2026-04-24 17:08:47 +01:00
Johnny Fernandes	e0426bf320	Sheep training flock of 10 fix?	2026-04-24 16:46:02 +01:00
Johnny Fernandes	3574d57ba2	Sheep training flock of 10 fix?	2026-04-24 16:30:35 +01:00
Johnny Fernandes	58d773cb7c	Sheep training flock of 10 fix?	2026-04-24 16:12:16 +01:00
Johnny Fernandes	fe5174e0bd	Sheep training flock of 10 fix?	2026-04-24 15:55:15 +01:00
Johnny Fernandes	678d757fe8	Sheep training flock of 10 fix?	2026-04-24 15:24:37 +01:00
Johnny Fernandes	44b2788e78	Sheep training flock of 10 fix?	2026-04-24 15:14:45 +01:00
Johnny Fernandes	bdbe8ba1de	Sheep training flock of 10 fix?	2026-04-24 15:10:36 +01:00
Johnny Fernandes	fcfa2c35c8	Sheep training flock of 10 fix?	2026-04-24 14:54:20 +01:00
Johnny Fernandes	17eb25864e	Sheep training flock of 10 fix?	2026-04-24 10:58:36 +01:00
Johnny Fernandes	4189cc8dba	Sheep training flock of 10 fix?	2026-04-24 01:59:15 +01:00
Johnny Fernandes	1e3b67d194	Test24_0150	2026-04-24 00:50:17 +00:00
Johnny Fernandes	f68dea44da	Sheep training flock of 10 fix?	2026-04-23 23:20:23 +01:00
Johnny Fernandes	a13f5d0ff0	Sheep training flock of 10 fix?	2026-04-23 20:41:48 +01:00
Johnny Fernandes	81dc2aca01	Sheep training flock of 10	2026-04-23 19:22:39 +01:00
Johnny Fernandes	fdac0ae0b0	Shepherd Dog RL	2026-04-23 19:22:14 +01:00
Johnny Fernandes	9e13eb060d	Classic approach results	2026-04-23 17:23:57 +00:00
Johnny Fernandes	ea6e66b16c	Classic approach results	2026-04-23 12:43:47 +00:00
Johnny Fernandes	ffbfaa3977	A more classical approach	2026-04-23 11:51:52 +01:00