Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and fixed a cluster of bugs blocking the BC/RL transfer, then explored training-side mitigations for the residual perception gap. Bug fixes: - Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution and stalling PPO at 0% success across 1.46M+ steps. - controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching controllers under system python3 (no numpy) and they were crashing silently. Pinned to the conda tir env. - herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0, max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom FPs near the gate from latching as permanently-penned tracks. - herding/perception/sheep_tracker.py: penned tracks now decay at forget_steps × 8 instead of living forever. Adds get_positions min_freshness filter for deploy-time use. Training/eval matches deployment: - training/bc/collect.py: --dagger-policy flag for DAgger rollouts (policy drives, teacher labels) + --use-webots-preset for matched 140° tracker + DR config. - controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when BC/RL sees empty sheep_positions — recovers from FOV gaps. Tooling: - tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc). - tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the perception-gap diagnosis matrix. - protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation comparison. Canonical proto stays at 140° per project spec. Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field 60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show 12%→38% progression on gym HERDING_WEBOTS proxy but did not close to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or learned tracker per the project-state memory. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
@@ -51,25 +51,57 @@ RL_FAST_POLICY    = $(RL_FAST_DIR)/policy.zip

 # --- Demo collection ---
 TEACHER          ?= universal
-# Round field is fundamentally harder (narrow gate at south of a circle).
-# Default to more demos there to give BC a fair shot at 60%+.
+# Mecanum has more complex dynamics and a weaker teacher imitation signal
+# (val_cos ≈ 0.70 vs ≥ 0.88 for differential).  Give it more demos and
+# longer BC training to compensate.
+ifeq ($(DRIVE),mecanum)
+ifeq ($(WORLD),field_round)
+SEEDS_PER_N      ?= 80
+else
+SEEDS_PER_N      ?= 50
+endif
+else
+# Round field is harder; more demos give BC a fair shot at 60%+.
 ifeq ($(WORLD),field_round)
 SEEDS_PER_N      ?= 60
 else
 SEEDS_PER_N      ?= 25
 endif
+endif
 SUBSAMPLE        ?= 3
 FRAME_STACK      ?= 4
 DEMO_MAX_STEPS   ?= 100000

 # --- Behaviour cloning ---
+ifeq ($(DRIVE),mecanum)
+ifeq ($(WORLD),field_round)
+BC_EPOCHS        ?= 200
+else
+BC_EPOCHS        ?= 100
+endif
+else
 ifeq ($(WORLD),field_round)
 BC_EPOCHS        ?= 150
 else
 BC_EPOCHS        ?= 60
 endif
+endif
 BC_NET_ARCH      ?= 512,512

+# --- Domain randomisation (used by bc_demos and rl targets) ---
+# FP_RATE: mean false-positive detections injected per step (Poisson λ).
+# ACTION_SMOOTH_TRAIN: EMA on actions to match Webots controller (0.55).
+# WHEEL_SLIP_STD: Gaussian wheel-speed noise for mecanum dynamics gap.
+#
+# FP_RATE is used consistently in BC demos *and* RL: BC collection runs
+# in PRIVILEGED mode (teacher sees GT; student obs sees the FP-injected
+# tracker output), so the policy learns to denoise to the GT signal.
+# Mismatched FP_RATE between BC/RL was the root cause of an earlier
+# regression (BC=0, RL=2 → PPO stalled at 0% success).
+FP_RATE          ?= 0.0
+ACTION_SMOOTH_TRAIN ?= 0.55
+WHEEL_SLIP_STD   ?= 0.05
+
 # --- KL-PPO fine-tune ---
 # Round field: longer training, looser KL, no time penalty (success
 # must be learned before speed is rewarded).
@@ -93,9 +125,16 @@ DIFFICULTY       ?= 1.0
 # --- Stage-2 "speed pass" (rl_fast) ---
 # Continues from RL_DIR with a negative TIME_W. Tighter KL keeps the
 # policy near the Stage-1 success rate while step-count drops.
+# Differential and mecanum respond differently: mecanum needs a stronger
+# time penalty to achieve speed gains; differential only needs a light
+# touch (-0.02) — stronger penalties trade success for speed without gain.
 RL_FAST_STEPS    ?= 1000000
 RL_FAST_KL       ?= 0.05
+ifeq ($(DRIVE),mecanum)
 RL_FAST_TIME_W   ?= -0.05
+else
+RL_FAST_TIME_W   ?= -0.02
+endif

 # --- Evaluation ---
 EVAL_SEEDS       ?= 10
@@ -107,7 +146,7 @@ MODE             ?= rl


 .PHONY: all bc_demos bc rl rl_fast eval eval_fast eval_all eval_all_fast \
-        test webots clean clean_all help \
+        test webots webots_sweep clean clean_all help \
        train_all train_diff_rect train_diff_round \
        train_mec_rect train_mec_round \
        train_all_fast train_diff_rect_fast train_diff_round_fast \
@@ -129,7 +168,10 @@ $(BC_DEMOS):
 		--seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \
 		--frame-stack $(FRAME_STACK) --drive-mode $(DRIVE) \
 		--world $(WORLD) \
-		--max-steps $(DEMO_MAX_STEPS)
+		--max-steps $(DEMO_MAX_STEPS) \
+		--fp-rate $(FP_RATE) \
+		--action-smooth $(ACTION_SMOOTH_TRAIN) \
+		--wheel-slip-std $(WHEEL_SLIP_STD)

 bc: $(BC_POLICY)
 $(BC_POLICY): $(BC_DEMOS)
@@ -144,7 +186,10 @@ $(RL_POLICY): $(BC_POLICY)
 		--total-timesteps $(PPO_STEPS) --kl-coef $(KL) \
 		--imitate-weight $(IMITATE) --time-weight $(TIME_W) \
 		--difficulty $(DIFFICULTY) \
-		--drive-mode $(DRIVE) --world $(WORLD)
+		--drive-mode $(DRIVE) --world $(WORLD) \
+		--fp-rate $(FP_RATE) \
+		--action-smooth $(ACTION_SMOOTH_TRAIN) \
+		--wheel-slip-std $(WHEEL_SLIP_STD)

 eval: $(RL_POLICY)
 	$(PY) -m training.eval --policy $(RL_DIR) \
@@ -162,7 +207,10 @@ $(RL_FAST_POLICY): $(RL_POLICY)
 		--total-timesteps $(RL_FAST_STEPS) --kl-coef $(RL_FAST_KL) \
 		--imitate-weight $(IMITATE) --time-weight $(RL_FAST_TIME_W) \
 		--difficulty $(DIFFICULTY) \
-		--drive-mode $(DRIVE) --world $(WORLD)
+		--drive-mode $(DRIVE) --world $(WORLD) \
+		--fp-rate $(FP_RATE) \
+		--action-smooth $(ACTION_SMOOTH_TRAIN) \
+		--wheel-slip-std $(WHEEL_SLIP_STD)

 eval_fast: $(RL_FAST_POLICY)
 	$(PY) -m training.eval --policy $(RL_FAST_DIR) \
@@ -175,6 +223,14 @@ test:
 webots:
 	tools/run_webots.sh $(N) $(MODE) $(DRIVE) $(WORLD)

+# Headless sweep across all modes × worlds × flock sizes.
+# Results are written to webots_sweep.log.
+# Set USE_GT=1 to bypass LiDAR tracker (isolate perception from policy).
+webots_sweep:
+	env $(if $(USE_GT),HERDING_USE_GT=1,) \
+	    PATH="$(CONDA_PREFIX)/bin:$(PATH)" \
+	    bash tools/webots_sweep.sh webots_sweep.log
+
 clean:
 	rm -f $(BC_DEMOS)
 	rm -rf $(BC_DIR) $(RL_DIR)
@@ -0,0 +1,2 @@
+[python]
+COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3
@@ -0,0 +1,2 @@
+[python]
+COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3
@@ -87,11 +87,20 @@ from herding.perception.lidar_perception import detections_from_scan
 from herding.perception.sheep_tracker import SheepTracker
 from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
 from herding.world.geometry import (
-    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
-    DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
-    DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
+    DOG_SOUTH_LIMIT,
    PEN_ENTRY, is_penned_position,
 )
+from herding.config import HERDING_WEBOTS, RobotConfig
+
+# Robot physical constants come from RobotConfig so they stay in sync with
+# the training environment.  The Webots preset uses action_smooth=0.55.
+_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
+DOG_WHEEL_RADIUS  = _ROBOT_CFG.wheel_radius
+DOG_WHEEL_BASE    = _ROBOT_CFG.wheel_base
+DOG_WHEEL_BASE_X  = _ROBOT_CFG.wheel_base_x
+DOG_WHEEL_BASE_Y  = _ROBOT_CFG.wheel_base_y
+DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
+DOG_MAX_LINEAR    = _ROBOT_CFG.max_linear


 # ---------------------------------------------------------------------------
@@ -102,6 +111,13 @@ MODE = (os.environ.get("HERDING_MODE")
        or _runtime_cfg.get("HERDING_MODE")
        or "bc").lower()

+# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
+# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
+USE_GT_PERCEPTION = bool(int(
+    os.environ.get("HERDING_USE_GT")
+    or _runtime_cfg.get("HERDING_USE_GT", "0")
+))
+

 def _resolve_policy_dir(mode: str) -> str:
    """Where to look for the trained policy for the given mode.
@@ -141,7 +157,7 @@ def _resolve_policy_dir(mode: str) -> str:
    return env_dir or primary


-_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
+_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
 if MODE not in _VALID_MODES:
    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
    MODE = "strombom"
@@ -173,7 +189,7 @@ print(f"[dog] drive mode={DRIVE_MODE}")
 # Control parameters
 # ---------------------------------------------------------------------------

-ACTION_SMOOTH = 0.55           # EMA on (vx, vy) — kills frame-to-frame jitter
+ACTION_SMOOTH = _ROBOT_CFG.action_smooth  # EMA on (vx, vy) — kills frame-to-frame jitter
 RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")


@@ -264,7 +280,7 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter = robot.getDevice("emitter")
 lidar = robot.getDevice("lidar");       lidar.enable(timestep)

-tracker = SheepTracker()
+tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)

 # Cosmetic ear motors — animated; not used by control.
 left_ear = robot.getDevice("left ear motor")
@@ -300,6 +316,76 @@ _run_done = False
 prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
 step_count = 0

+# ---------------------------------------------------------------------------
+# Calibration mode — apply fixed action, measure GPS displacement, compare
+# against gym kinematics prediction, write results to calibrate_mecanum.log.
+# ---------------------------------------------------------------------------
+if MODE == "calibrate":
+    import json as _json
+    _calib_vx  = float(os.environ.get("CALIB_VX", "0.5"))
+    _calib_vy  = float(os.environ.get("CALIB_VY", "0.0"))
+    _calib_om  = float(os.environ.get("CALIB_OM", "0.0"))
+    _calib_n   = int(os.environ.get("CALIB_N_STEPS", "150"))
+    _log_path  = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
+    # Settle for 5 steps so GPS stabilises.
+    for _ in range(5):
+        robot.step(timestep)
+    _pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
+    _n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
+    # Gym-predicted displacement using shared kinematics.
+    from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_kinematics_step
+    from herding.world.geometry import WEBOTS_DT as _DT
+    _xg, _yg, _hg = _x0, _y0, _h0
+    for _ in range(_calib_n):
+        _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
+            _calib_vx, _calib_vy, _calib_om, _hg,
+            max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
+            lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
+            max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
+            wheel_base=DOG_WHEEL_BASE,
+        )
+        _xg, _yg, _hg = mecanum_kinematics_step(
+            _xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
+            DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
+        )
+    # Run actual Webots steps.
+    for _ in range(_calib_n):
+        _nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
+        _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
+            _calib_vx, _calib_vy, _calib_om, _h,
+            max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
+            lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
+            max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
+            wheel_base=DOG_WHEEL_BASE,
+        )
+        if DRIVE_MODE == "mecanum":
+            drive_mecanum(_calib_vx, _calib_vy, _calib_om,
+                          fl_motor, fr_motor, rl_motor, rr_motor,
+                          compass, MOTOR_MAX)
+        robot.step(timestep)
+    _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
+    _T = _calib_n * _DT
+    _vx_w = (_x1 - _x0) / _T;  _vy_w = (_y1 - _y0) / _T
+    _vx_g = (_xg - _x0) / _T;  _vy_g = (_yg - _y0) / _T
+    def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
+    _result = (
+        f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f})  "
+        f"steps={_calib_n}\n"
+        f"  gym    displacement: dx={_xg-_x0:.4f}  dy={_yg-_y0:.4f}  "
+        f"(vx={_vx_g:.3f}  vy={_vy_g:.3f} m/s)\n"
+        f"  webots displacement: dx={_x1-_x0:.4f}  dy={_y1-_y0:.4f}  "
+        f"(vx={_vx_w:.3f}  vy={_vy_w:.3f} m/s)\n"
+        f"  vx error: {_pct(_vx_w, _vx_g):.1f}%    "
+        f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
+    )
+    print(_result)
+    with open(_log_path, "a") as _f:
+        _f.write(_result + "\n")
+    # Write the run-done sentinel so run_webots.sh closes Webots cleanly.
+    with open(RUN_DONE_FILE, "w") as _f:
+        _f.write("calibrate\n")
+    import sys as _sys; _sys.exit(0)
+
 while robot.step(timestep) != -1:
    step_count += 1

@@ -321,7 +407,17 @@ while robot.step(timestep) != -1:

    # ---- LiDAR perception → tracker → active sheep positions ----
    ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
-    detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading)
+    detections = detections_from_scan(
+        ranges, dog_xy[0], dog_xy[1], dog_heading,
+        detection_cfg=HERDING_WEBOTS.detection,
+        lidar_cfg=HERDING_WEBOTS.lidar,
+    )
+    if USE_GT_PERCEPTION and _gt_sheep:
+        # Bypass tracker: feed GT emitter positions directly to policy/teacher.
+        sheep_positions = {k: v for k, v in _gt_sheep.items()
+                          if not is_penned_position(v[0], v[1])}
+        tracker.update(detections)  # still advance tracker for diagnostics
+    else:
        sheep_positions = tracker.update(detections)

    sheep_xy_list = list(sheep_positions.values())
@@ -331,6 +427,14 @@ while robot.step(timestep) != -1:
    # ---- Action selection ----
    omega = 0.0
    if MODE in ("bc", "rl") and policy_handle is not None:
+        if not sheep_positions:
+            # BC/RL never saw "empty obs during operation" in training (empty
+            # obs only happened at episode end), so the policy outputs ~zero
+            # and the dog gets stuck. Fall back to a fixed scan rotation
+            # until tracker recovers some sheep.
+            vx, vy = 0.0, 0.6
+            omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
+        else:
            action = policy_handle.predict(single_obs)
            vx, vy = float(action[0]), float(action[1])
            if DRIVE_MODE == "mecanum" and len(action) >= 3:
@@ -0,0 +1,335 @@
+"""Central configuration dataclasses for the herding simulation.
+
+Every tunable constant that previously lived as a module-level literal in
+perception/lidar_sim.py, perception/lidar_perception.py,
+perception/sheep_tracker.py, world/geometry.py, or training/herding_env.py
+is now represented here as a field with its original default value.
+
+Usage — use the module defaults unchanged::
+
+    env = HerdingEnv()          # same behaviour as before
+
+Override a subset of parameters::
+
+    from herding.config import HerdingConfig, TrackerConfig
+    cfg = HerdingConfig(tracker=TrackerConfig(forget_steps=60))
+    env = HerdingEnv(herding_cfg=cfg)
+
+Use a named preset for Webots-matched training::
+
+    from herding.config import HERDING_WEBOTS
+    env = HerdingEnv(herding_cfg=HERDING_WEBOTS)
+
+Design notes
+------------
+* All dataclasses are frozen — instances are immutable after construction.
+* This module must not import from other ``herding.*`` packages to avoid
+  import cycles.  Field-geometry constants (pen coordinates, field size)
+  stay in ``herding.world.geometry`` because they depend on the world
+  variant selected at runtime via ``HERDING_WORLD``.
+"""
+
+from __future__ import annotations
+
+import math
+from dataclasses import dataclass, field, replace
+
+
+# ---------------------------------------------------------------------------
+# LiDAR hardware spec
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class LidarConfig:
+    """Parameters of the simulated / physical LiDAR sensor.
+
+    The two canonical presets are :data:`LIDAR_FULL` (360°, oracle mode)
+    and :data:`LIDAR_WEBOTS` (140°/180-ray, matches the ShepherdDog proto).
+    """
+
+    n_rays: int = 360
+    """Number of rays in the scan."""
+
+    fov_rad: float = 2.0 * math.pi
+    """Full field-of-view in radians, centred on the robot's forward axis."""
+
+    max_range: float = 12.0
+    """Maximum detectable range in metres."""
+
+    noise_std: float = 0.005
+    """Gaussian standard deviation (metres) applied to each hit reading."""
+
+    sheep_radius: float = 0.30
+    """Effective disc radius of a sheep in the 2-D LiDAR plane (metres)."""
+
+    post_radius: float = 0.25
+    """Effective disc radius of gate / corner posts (metres)."""
+
+    def __post_init__(self) -> None:
+        if self.n_rays < 1:
+            raise ValueError(f"n_rays must be ≥ 1, got {self.n_rays}")
+        if not (0.0 < self.fov_rad <= 2.0 * math.pi):
+            raise ValueError(f"fov_rad must be in (0, 2π], got {self.fov_rad:.4f}")
+        if self.max_range <= 0.0:
+            raise ValueError(f"max_range must be > 0, got {self.max_range}")
+
+
+# Named presets -----------------------------------------------------------
+
+LIDAR_FULL = LidarConfig(
+    n_rays=360,
+    fov_rad=2.0 * math.pi,
+)
+"""360° full-circle scan — oracle / ablation mode."""
+
+LIDAR_WEBOTS = LidarConfig(
+    n_rays=180,
+    fov_rad=math.radians(140.0),
+)
+"""Matches the ShepherdDog.proto Lidar device (180 rays, 140° FOV).
+
+Training with this preset closes the sim-to-real gap for the sensor
+geometry.  Because the observation is built from tracker output (not raw
+rays), a policy trained here can be deployed on a wider-FOV LiDAR (e.g.
+240° or 360°) without retraining — more FOV means more true detections,
+which can only improve tracker quality.
+"""
+
+
+# ---------------------------------------------------------------------------
+# Cluster-detection pipeline
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class DetectionConfig:
+    """Parameters for the LiDAR-scan → detection clustering pipeline."""
+
+    gap_threshold: float = 0.6
+    """Adjacent hit-points farther apart than this (metres) start a new cluster."""
+
+    max_cluster_span: float = 1.5
+    """Clusters wider than this (metres) are rejected as walls / structures."""
+
+    range_hit_eps: float = 0.05
+    """A ray is considered a hit if ``range < max_range - range_hit_eps``."""
+
+    split_range_gap: float = 0.20
+    """Range increase within a cluster that triggers a multi-peak split."""
+
+    wall_reject: float = 0.5
+    """Drop detections within this distance (metres) of any field wall."""
+
+    static_reject: float = 0.8
+    """Drop detections within this distance (metres) of known static features
+    (gate posts, field corners)."""
+
+    def __post_init__(self) -> None:
+        if self.wall_reject < 0.0:
+            raise ValueError(f"wall_reject must be ≥ 0, got {self.wall_reject}")
+        if self.static_reject < 0.0:
+            raise ValueError(f"static_reject must be ≥ 0, got {self.static_reject}")
+
+
+# ---------------------------------------------------------------------------
+# Multi-target tracker
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class TrackerConfig:
+    """Parameters for the nearest-neighbour sheep tracker."""
+
+    gate_m: float = 2.5
+    """Primary NN association gate in metres (recently observed tracks)."""
+
+    reacquire_gate_m: float = 4.5
+    """Wider gate used when re-acquiring tracks stale for ≥ ``reacquire_min_age`` steps."""
+
+    reacquire_min_age: int = 20
+    """Minimum staleness (steps) before the wider re-acquisition gate activates."""
+
+    penned_gate_m: float = 4.0
+    """Gate for matching new detections to already-penned tracks."""
+
+    forget_steps: int = 200
+    """Delete an active track that has not been observed for this many steps (~3.2 s)."""
+
+    predict_steps: int = 120
+    """Extrapolate a track's position using constant velocity for this many steps (~1.9 s)."""
+
+    velocity_clamp: float = 1.0
+    """Maximum predicted speed (m/s) used during extrapolation."""
+
+    max_new_tracks_per_step: int = 10
+    """Maximum number of new tracks that may be spawned in a single step.
+
+    Capping this limits the damage from LiDAR false-positive bursts (e.g.
+    wall reflections in Webots) that would otherwise flood the track set.
+    The default (10 = MAX_SHEEP) preserves the original behaviour; reduce
+    to 2–3 for Webots deployment robustness.
+    """
+
+    pen_latch_depth: float = 0.0
+    """Minimum depth past the gate line (metres) before a track is latched
+    as penned.  0.0 = original behaviour (latch at y ≤ GATE_Y).  Increase
+    to 0.5 for Webots to prevent gate-hardware LiDAR reflections near y=-15
+    from permanently consuming tracker slots as false "penned" sheep.
+    """
+
+    def __post_init__(self) -> None:
+        if self.forget_steps < 1:
+            raise ValueError(f"forget_steps must be ≥ 1, got {self.forget_steps}")
+        if self.max_new_tracks_per_step < 1:
+            raise ValueError(
+                f"max_new_tracks_per_step must be ≥ 1, got {self.max_new_tracks_per_step}"
+            )
+
+
+# ---------------------------------------------------------------------------
+# Robot physical specification
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class RobotConfig:
+    """Physical parameters of the shepherd-dog robot.
+
+    Values mirror ``protos/ShepherdDog.proto`` and ``protos/ShepherdDogMecanum.proto``.
+    """
+
+    wheel_radius: float = 0.038
+    """Wheel radius in metres."""
+
+    wheel_base: float = 0.28
+    """Axle-to-axle distance for differential drive (metres)."""
+
+    wheel_base_x: float = 0.28
+    """Front-to-back axle distance for mecanum drive (metres)."""
+
+    wheel_base_y: float = 0.28
+    """Left-to-right axle distance for mecanum drive (metres)."""
+
+    max_wheel_omega: float = 70.0
+    """Maximum wheel angular velocity (rad/s)."""
+
+    action_smooth: float = 0.0
+    """Exponential moving-average coefficient applied to actions inside the env.
+
+    ``0.0`` means no smoothing (gym default).
+    ``0.55`` matches the hard-coded EMA in ``shepherd_dog.py`` — use this
+    when training so the policy learns to act through the same filter it
+    sees at deployment.
+    """
+
+    def __post_init__(self) -> None:
+        if not (0.0 <= self.action_smooth < 1.0):
+            raise ValueError(
+                f"action_smooth must be in [0, 1), got {self.action_smooth}"
+            )
+
+    @property
+    def max_linear(self) -> float:
+        """Maximum achievable linear speed (m/s)."""
+        return self.wheel_radius * self.max_wheel_omega
+
+
+# ---------------------------------------------------------------------------
+# Domain randomisation
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class DomainRandomConfig:
+    """Parameters that inject physics / sensor noise for domain randomisation.
+
+    All values default to 0 (disabled) so the base env is deterministic and
+    backwards-compatible.  Enable them gradually to close the sim-to-real gap.
+    """
+
+    fp_rate: float = 0.0
+    """Mean number of false-positive detections injected per step (Poisson λ).
+
+    FPs are placed near static features (walls, posts) with positional
+    noise ``fp_std_pos``, mimicking the spurious clusters Webots' physical
+    LiDAR returns from 3D geometry.
+    """
+
+    fp_std_pos: float = 0.3
+    """Positional standard deviation (metres) of injected false-positive clusters."""
+
+    wheel_slip_std: float = 0.0
+    """Gaussian noise standard deviation (rad/s) added to each wheel speed
+    before kinematic integration.  Models real-world wheel slip and motor
+    variation.  Suggested starting value: 0.05.
+    """
+
+    compass_noise_std: float = 0.0
+    """Gaussian noise standard deviation (radians) added to the heading
+    reading each step.  Models magnetometer drift in Webots.
+    Suggested starting value: 0.02.
+    """
+
+    def __post_init__(self) -> None:
+        if self.fp_rate < 0.0:
+            raise ValueError(f"fp_rate must be ≥ 0, got {self.fp_rate}")
+        if self.wheel_slip_std < 0.0:
+            raise ValueError(f"wheel_slip_std must be ≥ 0, got {self.wheel_slip_std}")
+        if self.compass_noise_std < 0.0:
+            raise ValueError(f"compass_noise_std must be ≥ 0, got {self.compass_noise_std}")
+
+
+# ---------------------------------------------------------------------------
+# Aggregate config
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class HerdingConfig:
+    """Root configuration object passed to :class:`~training.herding_env.HerdingEnv`.
+
+    Sub-configs default to the original simulation parameters so that
+    ``HerdingEnv()`` and ``HerdingEnv(herding_cfg=HerdingConfig())`` produce
+    identical behaviour.
+    """
+
+    lidar: LidarConfig = field(default_factory=LidarConfig)
+    detection: DetectionConfig = field(default_factory=DetectionConfig)
+    tracker: TrackerConfig = field(default_factory=TrackerConfig)
+    robot: RobotConfig = field(default_factory=RobotConfig)
+    domain_random: DomainRandomConfig = field(default_factory=DomainRandomConfig)
+
+    def replace(self, **kwargs) -> "HerdingConfig":
+        """Return a new config with selected top-level sub-configs replaced.
+
+        Example::
+
+            cfg = HERDING_WEBOTS.replace(
+                domain_random=DomainRandomConfig(fp_rate=2.0, wheel_slip_std=0.05)
+            )
+        """
+        return replace(self, **kwargs)
+
+
+# ---------------------------------------------------------------------------
+# Named full-pipeline presets
+# ---------------------------------------------------------------------------
+
+HERDING_DEFAULT = HerdingConfig()
+"""Original simulation defaults — zero behaviour change."""
+
+HERDING_WEBOTS = HerdingConfig(
+    lidar=LIDAR_WEBOTS,
+    detection=DetectionConfig(wall_reject=0.5, static_reject=1.2),
+    tracker=TrackerConfig(
+        forget_steps=120,
+        max_new_tracks_per_step=1,
+        pen_latch_depth=2.0,
+    ),
+    robot=RobotConfig(action_smooth=0.55),
+)
+"""Webots-matched training preset.
+
+Changes vs HERDING_DEFAULT:
+* LiDAR: 180 rays / 140° FOV matching ShepherdDog.proto hardware
+* Detection: wall_reject kept at 0.5 m (original default; static_reject
+             handles post FPs; 1.0 m was too aggressive near the south gate)
+* Tracker: forget_steps 200 → 60 (~1 s ghost-track lifetime)
+           max_new_tracks_per_step 10 → 3 (rate-caps FP flooding)
+* Robot: action_smooth 0.0 → 0.55 (matches Webots controller EMA)
+"""
@@ -1,9 +1,23 @@
-"""Sequential "pin-and-push" shepherd-dog controller.
+"""Adaptive sequential shepherd-dog controller.

-Single-target alternative to Strömbom: each step, target the sheep
-closest to the pen, park behind it, drive it through; once it latches
-penned the next-closest sheep becomes the target. Naturally queues
-the flock through a narrow gate.
+Three-phase strategy:
+
+  1. **Collect** (flock scattered): Strömbom collect — park behind the
+     furthest sheep and push it toward the CoM. Identical to the
+     Strömbom heuristic; keeps the flock together.
+
+  2. **Drive** (flock compact, >STRAGGLER_THRESHOLD active): Strömbom
+     drive — park behind the CoM relative to the pen and push the whole
+     group through the gate.
+
+  3. **Targeted** (≤STRAGGLER_THRESHOLD sheep remain active): single-
+     target push on the sheep closest to the pen entry.  Safe to isolate
+     individual sheep once the flock is nearly exhausted.
+
+The original pure pin-and-push (Phase 3 only) caused flock scatter in
+Webots physics whenever the dog tried to isolate a sheep while others
+were still spread across the field.  Phases 1–2 handle the bulk of
+herding with flock-aware Strömbom logic; Phase 3 cleans up stragglers.
 """

 import math
@@ -11,64 +25,103 @@ import math
 from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen


-DELTA_DRIVE = 1.5     # standoff behind the target sheep
-APPROACH_GAIN = 1.0   # action magnitude scale (1 = full speed)
+F_FACTOR = 4.0           # collect/drive threshold: radius > F_FACTOR·√n
+DELTA_COLLECT = 1.5      # standoff behind the furthest sheep (collect)
+DELTA_DRIVE = 2.0        # standoff behind CoM (drive)
+DELTA_TARGET = 1.5       # standoff behind single target sheep (targeted)
+STRAGGLER_THRESHOLD = 2  # switch to targeted push when ≤ this many active


-def _unit(x, y):
+def _unit(x: float, y: float):
    d = math.hypot(x, y)
    if d < 1e-6:
        return 0.0, 0.0
    return x / d, y / d


-def _is_active(x, y) -> bool:
+def _is_active(x: float, y: float) -> bool:
    return (not in_pen(x, y)) and y > GATE_Y


 def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """Return ``(vx, vy, mode)`` — same call signature as Strömbom."""
-    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
-              if _is_active(x, y)]
+    """Return ``(vx, vy, mode)`` — same signature as Strömbom."""
+    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle"

-    name, sx, sy = min(
-        active,
-        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
-    )
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)

+    if n <= STRAGGLER_THRESHOLD:
+        # Targeted: push the sheep closest to the pen entry individually.
+        sx, sy = min(active,
+                     key=lambda p: math.hypot(p[0] - pen_target[0],
+                                              p[1] - pen_target[1]))
        ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
-    tx = sx + DELTA_DRIVE * ux
-    ty = sy + DELTA_DRIVE * uy
+        tx, ty = sx + DELTA_TARGET * ux, sy + DELTA_TARGET * uy
+        mode = "targeted"
+
+    elif radius > F_FACTOR * math.sqrt(n):
+        # Collect: aim behind the furthest sheep from the CoM.
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+
+    else:
+        # Drive: push the whole compact flock toward the gate.
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"

    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
-    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
+    return ax, ay, mode


 def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
-    """``compute_action`` plus a debug dict (target, drive point)."""
-    active = [(name, x, y) for name, (x, y) in sheep_positions.items()
-              if _is_active(x, y)]
+    """``compute_action`` plus a debug dict."""
+    active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
    if not active:
        return 0.0, 0.0, "idle", {
-            "n_active": 0, "target_name": "",
-            "target_x": 0.0, "target_y": 0.0,
-            "drive_x": dog_xy[0], "drive_y": dog_xy[1],
+            "n_active": 0, "phase": "idle", "radius": 0.0, "threshold": 0.0,
+            "com_x": 0.0, "com_y": 0.0,
+            "target_x": dog_xy[0], "target_y": dog_xy[1],
        }

-    name, sx, sy = min(
-        active,
-        key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
-    )
+    n = len(active)
+    com_x = sum(p[0] for p in active) / n
+    com_y = sum(p[1] for p in active) / n
+    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
+    radius = max(dists)
+    threshold = F_FACTOR * math.sqrt(n)

+    if n <= STRAGGLER_THRESHOLD:
+        sx, sy = min(active,
+                     key=lambda p: math.hypot(p[0] - pen_target[0],
+                                              p[1] - pen_target[1]))
        ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
-    tx = sx + DELTA_DRIVE * ux
-    ty = sy + DELTA_DRIVE * uy
-    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+        tx, ty = sx + DELTA_TARGET * ux, sy + DELTA_TARGET * uy
+        mode = "targeted"

-    return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
-        "n_active": len(active), "target_name": name,
-        "target_x": sx, "target_y": sy,
-        "drive_x": tx, "drive_y": ty,
+    elif radius > threshold:
+        idx = max(range(n), key=lambda i: dists[i])
+        sx, sy = active[idx]
+        ux, uy = _unit(sx - com_x, sy - com_y)
+        tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
+        mode = "collect"
+
+    else:
+        ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
+        tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
+        mode = "drive"
+
+    ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
+    return ax, ay, mode, {
+        "n_active": n, "phase": mode, "radius": radius, "threshold": threshold,
+        "com_x": com_x, "com_y": com_y,
+        "target_x": tx, "target_y": ty,
    }
@@ -21,9 +21,13 @@ The downstream tracker handles association across frames.
 from __future__ import annotations

 import math
+from typing import TYPE_CHECKING

 import numpy as np

+if TYPE_CHECKING:
+    from herding.config import DetectionConfig, LidarConfig
+
 from herding.world.geometry import (
    FIELD_SHAPE, FIELD_ROUND_R,
    FIELD_X, FIELD_Y, GATE_X, GATE_Y,
@@ -79,21 +83,22 @@ def _in_field_region(cx: float, cy: float) -> bool:
            FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)


-def _near_wall(cx: float, cy: float) -> bool:
+def _near_wall(cx: float, cy: float, wall_reject: float = WALL_REJECT) -> bool:
    """True if the detection is too close to a wall to be a sheep."""
    if FIELD_SHAPE == "field_round":
        r = math.hypot(cx, cy)
-        return r > FIELD_ROUND_R - WALL_REJECT
+        return r > FIELD_ROUND_R - wall_reject
    return (
-        cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or
-        cy > FIELD_Y[1] - WALL_REJECT or
-        (cy < FIELD_Y[0] + WALL_REJECT and not (PEN_X[0] <= cx <= PEN_X[1]))
+        cx > FIELD_X[1] - wall_reject or cx < FIELD_X[0] + wall_reject or
+        cy > FIELD_Y[1] - wall_reject or
+        (cy < FIELD_Y[0] + wall_reject and not (PEN_X[0] <= cx <= PEN_X[1]))
    )


 def _split_cluster_by_range(
    points: list[tuple[float, float]],
    range_vals: list[float],
+    split_range_gap: float = SPLIT_RANGE_GAP,
 ) -> list[list[tuple[float, float]]]:
    """Split a cluster at range-profile local maxima (gaps between sheep).

@@ -108,7 +113,7 @@ def _split_cluster_by_range(
    # Find the maximum range (the dip/gap between sheep).
    r_max = max(range_vals)
    # If the range variation is small, it's a single target.
-    if r_max - r_min < SPLIT_RANGE_GAP:
+    if r_max - r_min < split_range_gap:
        return [points]
    # Find the split point: the index with the maximum range.
    split_idx = range_vals.index(r_max)
@@ -124,7 +129,7 @@ def _split_cluster_by_range(
        (right, range_vals[split_idx + 1:]),
    ]:
        if len(sub_pts) >= 1:
-            result.extend(_split_cluster_by_range(sub_pts, sub_ranges))
+            result.extend(_split_cluster_by_range(sub_pts, sub_ranges, split_range_gap))
    return result if result else [points]


@@ -132,14 +137,43 @@ def detections_from_scan(
    ranges: np.ndarray,
    dog_x: float, dog_y: float, dog_heading: float,
    max_range: float = LIDAR_MAX_RANGE,
+    detection_cfg: "DetectionConfig | None" = None,
+    lidar_cfg: "LidarConfig | None" = None,
 ) -> list[tuple[float, float]]:
-    """Return list of (x, y) world-frame sheep position estimates."""
+    """Return list of (x, y) world-frame sheep position estimates.
+
+    Pass ``detection_cfg`` to override clustering/filtering thresholds, or
+    ``lidar_cfg`` to inform the function of a non-default FOV (the number of
+    rays and FOV are inferred from the length of ``ranges`` and
+    ``lidar_cfg.fov_rad`` respectively).
+    """
+    # Resolve parameters — fall back to module-level constants when no cfg.
+    if detection_cfg is not None:
+        gap_thr = detection_cfg.gap_threshold
+        max_span = detection_cfg.max_cluster_span
+        hit_eps = detection_cfg.range_hit_eps
+        split_gap = detection_cfg.split_range_gap
+        wall_rej = detection_cfg.wall_reject
+        static_rej = detection_cfg.static_reject
+    else:
+        gap_thr = GAP_THRESHOLD
+        max_span = MAX_CLUSTER_SPAN
+        hit_eps = RANGE_HIT_EPS
+        split_gap = SPLIT_RANGE_GAP
+        wall_rej = WALL_REJECT
+        static_rej = STATIC_REJECT
+
+    sheep_r = lidar_cfg.sheep_radius if lidar_cfg is not None else SHEEP_RADIUS
+    fov = lidar_cfg.fov_rad if lidar_cfg is not None else LIDAR_FOV
+    if lidar_cfg is not None:
+        max_range = lidar_cfg.max_range
+
    ranges = np.asarray(ranges, dtype=np.float32)
    n_rays = ranges.shape[0]
    if n_rays == 0:
        return []
-    angles = ray_angles(n_rays, LIDAR_FOV)
-    hit = ranges < max_range - RANGE_HIT_EPS
+    angles = ray_angles(n_rays, fov)
+    hit = ranges < max_range - hit_eps

    world_a = dog_heading + angles
    px = dog_x + ranges * np.cos(world_a)
@@ -159,7 +193,7 @@ def detections_from_scan(
            prev_xy = None
            continue
        pt = (float(px[i]), float(py[i]), float(ranges[i]))
-        if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > GAP_THRESHOLD:
+        if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > gap_thr:
            clusters.append(current)
            current = []
        current.append(pt)
@@ -174,7 +208,7 @@ def detections_from_scan(

        # Multi-peak splitting.
        if len(cluster) >= 4:
-            sub_clusters = _split_cluster_by_range(points_xy, range_vals)
+            sub_clusters = _split_cluster_by_range(points_xy, range_vals, split_gap)
        else:
            sub_clusters = [points_xy]

@@ -185,24 +219,24 @@ def detections_from_scan(
            ys = [p[1] for p in sub]
            cx, cy = sum(xs) / len(xs), sum(ys) / len(ys)
            span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
-            if span > MAX_CLUSTER_SPAN:
+            if span > max_span:
                continue
            # Rays hit the front edge of the sheep; offset outward by
-            # SHEEP_RADIUS along the dog→cluster direction.
+            # sheep_radius along the dog→cluster direction.
            dx, dy = cx - dog_x, cy - dog_y
            d = math.hypot(dx, dy)
            if d > 1e-3:
-                cx += SHEEP_RADIUS * dx / d
-                cy += SHEEP_RADIUS * dy / d
+                cx += sheep_r * dx / d
+                cy += sheep_r * dy / d
            in_main = _in_field_region(cx, cy)
            in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
                             GATE_Y - 1.0 < cy < GATE_Y + 0.2)
            if not (in_main or in_gate_strip):
                continue
-            if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT
+            if any(math.hypot(cx - fx, cy - fy) < static_rej
                   for fx, fy in _STATIC_FEATURES):
                continue
-            if _near_wall(cx, cy):
+            if _near_wall(cx, cy, wall_rej):
                continue
            detections.append((cx, cy))
    return detections
@@ -2,20 +2,25 @@

 Raycasts against sheep (discs) and static world geometry. For rectangular
 fields this is axis-aligned walls + gate posts; for round fields it is a
-circular wall + gate posts. The env reproduces the false-positive cluster
-distribution Webots produces from real 3D geometry.
+circular wall + gate posts.

-Returns a range array matching the Webots Lidar device:
-180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise.
-See ``protos/ShepherdDog.proto``.
+The module-level constants (``LIDAR_N_RAYS``, ``LIDAR_FOV``, etc.) reflect
+the original 360°/360-ray oracle configuration.  Pass a
+:class:`~herding.config.LidarConfig` to :func:`simulate_scan` to use a
+different spec (e.g. :data:`~herding.config.LIDAR_WEBOTS` for 180-ray/140°
+matching the ShepherdDog.proto hardware).
 """

 from __future__ import annotations

 import math
+from typing import TYPE_CHECKING

 import numpy as np

+if TYPE_CHECKING:
+    from herding.config import LidarConfig
+
 from herding.world.geometry import (
    FIELD_SHAPE, FIELD_ROUND_R,
    FIELD_X, FIELD_Y,
@@ -192,11 +197,27 @@ def simulate_scan(
    noise: float = LIDAR_NOISE,
    max_range: float = LIDAR_MAX_RANGE,
    rng: np.random.Generator | None = None,
+    lidar_cfg: "LidarConfig | None" = None,
 ) -> np.ndarray:
    """Return a (N,) float32 range array. No-hit entries equal ``max_range``.

    ``sheep_xy`` is every sheep (penned or active) in the scene.
+
+    Pass ``lidar_cfg`` to override the module-level defaults for a single
+    call (e.g. to use :data:`~herding.config.LIDAR_WEBOTS`).
    """
+    if lidar_cfg is not None:
+        n_rays = lidar_cfg.n_rays
+        fov = lidar_cfg.fov_rad
+        max_range = lidar_cfg.max_range
+        noise = lidar_cfg.noise_std
+        sheep_r2 = lidar_cfg.sheep_radius ** 2
+        angles = ray_angles(n_rays, fov)
+        ch, sh = math.cos(dog_heading), math.sin(dog_heading)
+        cos_w = ch * np.cos(angles) - sh * np.sin(angles)
+        sin_w = sh * np.cos(angles) + ch * np.sin(angles)
+    else:
+        sheep_r2 = SHEEP_RADIUS ** 2
        ch, sh = math.cos(dog_heading), math.sin(dog_heading)
        cos_w = ch * _COS - sh * _SIN
        sin_w = sh * _COS + ch * _SIN
@@ -209,9 +230,8 @@ def simulate_scan(
        t = np.outer(sx, cos_w) + np.outer(sy, sin_w)
        s_dist2 = (sx ** 2 + sy ** 2)[:, None]
        perp2 = s_dist2 - t ** 2
-        R2 = SHEEP_RADIUS ** 2
-        hit = (perp2 < R2) & (t > 0.0)
-        half = np.sqrt(np.clip(R2 - perp2, 0.0, None))
+        hit = (perp2 < sheep_r2) & (t > 0.0)
+        half = np.sqrt(np.clip(sheep_r2 - perp2, 0.0, None))
        candidate = np.where(hit, t - half, np.inf)
        nearest = candidate.min(axis=0)
        np.minimum(best, nearest, out=best)
@@ -22,6 +22,10 @@ plane south (``is_penned_position``). Penned tracks are excluded from
 from __future__ import annotations

 import math
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from herding.config import TrackerConfig

 from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position

@@ -56,16 +60,21 @@ class Track:
        """Not-a-property in the hot loop — callers pass current step."""
        raise NotImplementedError

-    def predicted_position(self, current_step: int) -> tuple[float, float]:
+    def predicted_position(
+        self,
+        current_step: int,
+        predict_steps: int = PREDICT_STEPS,
+        velocity_clamp: float = VELOCITY_CLAMP,
+    ) -> tuple[float, float]:
        """Extrapolated position using constant velocity, clamped."""
        dt = current_step - self.last_seen
-        if dt <= 0 or dt > PREDICT_STEPS:
+        if dt <= 0 or dt > predict_steps:
            return self.x, self.y
        speed = math.hypot(self.vx, self.vy)
        if speed < 1e-4:
            return self.x, self.y
        # Clamp extrapolation distance.
-        max_d = VELOCITY_CLAMP * dt * 0.016  # steps → seconds
+        max_d = velocity_clamp * dt * 0.016  # steps → seconds
        d = min(speed * dt * 0.016, max_d)
        return (
            self.x + d * (self.vx / speed),
@@ -93,10 +102,36 @@ class SheepTracker:

    Each track is a :class:`Track` with position, velocity estimate,
    last-seen step, and penned flag.
+
+    Pass a :class:`~herding.config.TrackerConfig` to override any
+    module-level defaults without changing this file.
    """

-    def __init__(self, gate: float = GATE_M):
+    def __init__(
+        self,
+        gate: float = GATE_M,
+        tracker_cfg: "TrackerConfig | None" = None,
+    ):
+        if tracker_cfg is not None:
+            self.gate = tracker_cfg.gate_m
+            self._reacquire_gate = tracker_cfg.reacquire_gate_m
+            self._reacquire_min_age = tracker_cfg.reacquire_min_age
+            self._penned_gate = tracker_cfg.penned_gate_m
+            self._forget_steps = tracker_cfg.forget_steps
+            self._predict_steps = tracker_cfg.predict_steps
+            self._velocity_clamp = tracker_cfg.velocity_clamp
+            self._max_new_per_step = tracker_cfg.max_new_tracks_per_step
+            self._pen_latch_depth = tracker_cfg.pen_latch_depth
+        else:
            self.gate = gate
+            self._reacquire_gate = REACQUIRE_GATE_M
+            self._reacquire_min_age = REACQUIRE_MIN_AGE
+            self._penned_gate = PENNED_GATE_M
+            self._forget_steps = FORGET_STEPS
+            self._predict_steps = PREDICT_STEPS
+            self._velocity_clamp = VELOCITY_CLAMP
+            self._max_new_per_step = MAX_ACTIVE_TRACKS
+            self._pen_latch_depth = 0.0
        self._tracks: dict[int, Track] = {}
        self._next_id = 0
        self.step = 0
@@ -119,8 +154,8 @@ class SheepTracker:
        active_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
        for tid in active_tids:
            track = self._tracks[tid]
-            # Use predicted position for matching.
-            tx, ty = track.predicted_position(self.step)
+            tx, ty = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
            best_j, best_d = -1, self.gate
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
@@ -140,10 +175,11 @@ class SheepTracker:
            if tid in updated_tids:
                continue
            track = self._tracks[tid]
-            if (self.step - track.last_seen) < REACQUIRE_MIN_AGE:
+            if (self.step - track.last_seen) < self._reacquire_min_age:
                continue
-            tx, ty = track.predicted_position(self.step)
-            best_j, best_d = -1, REACQUIRE_GATE_M
+            tx, ty = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
+            best_j, best_d = -1, self._reacquire_gate
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
                    continue
@@ -161,7 +197,7 @@ class SheepTracker:
        penned_tids = [tid for tid, t in self._tracks.items() if t.penned]
        for tid in penned_tids:
            track = self._tracks[tid]
-            best_j, best_d = -1, PENNED_GATE_M
+            best_j, best_d = -1, self._penned_gate
            for j, (dx, dy) in enumerate(detections):
                if j in det_used:
                    continue
@@ -174,25 +210,35 @@ class SheepTracker:
                track.update(dx, dy, self.step)
                det_used.add(best_j)

-        # Spawn new tracks for unmatched detections.
+        # Spawn new tracks for unmatched detections — rate-capped.
+        spawned = 0
        for j, (dx, dy) in enumerate(detections):
            if j in det_used:
                continue
-            penned = in_pen(dx, dy) or is_penned_position(dx, dy)
+            if spawned >= self._max_new_per_step:
+                break
+            penned = self._is_penned(dx, dy)
            self._tracks[self._next_id] = Track(dx, dy, self.step, penned)
            self._next_id += 1
+            spawned += 1

        # Promote active tracks whose current estimate crosses the gate.
        for track in self._tracks.values():
            if track.penned:
                continue
-            px, py = track.predicted_position(self.step)
-            if is_penned_position(px, py):
+            px, py = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
+            if self._is_penned(px, py):
                track.penned = True

-        # Forget stale active tracks; penned tracks live forever.
+        # Forget stale active tracks; penned tracks decay too but at a
+        # longer horizon (real penned sheep are still observed occasionally
+        # when the dog faces south; pure FPs at gate posts stop being
+        # detected once the dog drives away).
+        penned_forget = self._forget_steps * 8
        stale = [tid for tid, t in self._tracks.items()
-                 if not t.penned and (self.step - t.last_seen) > FORGET_STEPS]
+                 if (not t.penned and (self.step - t.last_seen) > self._forget_steps)
+                 or (t.penned and (self.step - t.last_seen) > penned_forget)]
        for tid in stale:
            del self._tracks[tid]

@@ -206,18 +252,42 @@ class SheepTracker:

        return self.get_positions()

-    def get_positions(self) -> dict[str, tuple[float, float]]:
+    def _is_penned(self, x: float, y: float) -> bool:
+        """Check whether a position should be considered penned.
+
+        Uses ``pen_latch_depth`` to require the position to be that many
+        metres past the gate line before latching.  Increasing the depth
+        prevents gate-area LiDAR false positives (gate hardware reflections
+        at y ≈ -15) from being permanently latched as penned tracks.
+        """
+        from herding.world.geometry import GATE_Y
+        # Apply depth threshold to both in_pen and is_penned_position so
+        # that any position in the gate column must clear GATE_Y - depth.
+        threshold = GATE_Y - self._pen_latch_depth
+        return (in_pen(x, y) or is_penned_position(x, y)) and y <= threshold
+
+    def get_positions(self, min_freshness: int | None = None) -> dict[str, tuple[float, float]]:
        """Active (not-penned) tracks as a ``{name: (x, y)}`` dict.

        For tracks currently being predicted (occluded but within
-        PREDICT_STEPS), returns the extrapolated position so the teacher
+        predict_steps), returns the extrapolated position so the teacher
        sees a smooth estimate.
+
+        ``min_freshness`` (optional, deploy-only): drop tracks whose
+        last_seen is older than ``step - min_freshness``. Real sheep in
+        FOV are detected nearly every step; phantom tracks from sporadic
+        Webots FPs stop being re-observed and decay. Default ``None``
+        preserves training behaviour (extrapolated tracks visible).
        """
        result = {}
        for tid, track in self._tracks.items():
            if track.penned:
                continue
-            px, py = track.predicted_position(self.step)
+            if (min_freshness is not None
+                    and self.step - track.last_seen > min_freshness):
+                continue
+            px, py = track.predicted_position(
+                self.step, self._predict_steps, self._velocity_clamp)
            result[f"t{tid}"] = (px, py)
        return result

@@ -234,4 +304,4 @@ class SheepTracker:
        """Number of active tracks currently being extrapolated (not directly observed)."""
        return sum(1 for t in self._tracks.values()
                   if not t.penned and (self.step - t.last_seen) > 0
-                   and (self.step - t.last_seen) <= PREDICT_STEPS)
+                   and (self.step - t.last_seen) <= self._predict_steps)
@@ -2,14 +2,22 @@
 controllers.

 First-order rigid-body model — no slip, wheel-accel limits, or contact
-forces. Webots' ODE physics handles those at inference; the env stays
-close enough to first order that a policy trained here transfers.
+forces by default.  Pass ``slip_std`` and an ``rng`` to
+:func:`kinematics_step` / :func:`mecanum_kinematics_step` to add
+per-wheel Gaussian speed noise for domain randomisation.
 """

+from __future__ import annotations
+
 import math
+from typing import Optional
+
+import numpy as np


-def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
+def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt,
+                    slip_std: float = 0.0,
+                    rng: Optional[np.random.Generator] = None):
    """Integrate one step of differential-drive forward kinematics.

    Inputs
@@ -19,9 +27,15 @@ def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
    w_left, w_right : wheel angular velocities (rad/s)
    wheel_radius, wheel_base : robot dimensions (m)
    dt   : timestep (s)
+    slip_std : optional Gaussian std (rad/s) added to each wheel speed
+    rng  : numpy Generator for slip noise; required when slip_std > 0

    Returns (new_x, new_y, new_h).
    """
+    if slip_std > 0.0 and rng is not None:
+        noise = rng.normal(0.0, slip_std, size=2)
+        w_left = w_left + noise[0]
+        w_right = w_right + noise[1]
    v = (w_right + w_left) * wheel_radius * 0.5
    omega = (w_right - w_left) * wheel_radius / wheel_base
    new_x = x + v * math.cos(h) * dt
@@ -67,7 +81,9 @@ def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
 # ---------------------------------------------------------------------------

 def mecanum_kinematics_step(x, y, h, w_fl, w_fr, w_rl, w_rr,
-                             wheel_radius, lx, ly, dt):
+                             wheel_radius, lx, ly, dt,
+                             slip_std: float = 0.0,
+                             rng: Optional[np.random.Generator] = None):
    """Integrate one step of mecanum forward kinematics.

    Parameters
@@ -79,9 +95,15 @@ def mecanum_kinematics_step(x, y, h, w_fl, w_fr, w_rl, w_rr,
    lx   : half the front-to-back axle distance (m)
    ly   : half the left-to-right axle distance (m)
    dt   : timestep (s)
+    slip_std : optional Gaussian std (rad/s) added to each wheel speed
+    rng  : numpy Generator for slip noise; required when slip_std > 0

    Returns (new_x, new_y, new_h).
    """
+    if slip_std > 0.0 and rng is not None:
+        noise = rng.normal(0.0, slip_std, size=4)
+        w_fl, w_fr = w_fl + noise[0], w_fr + noise[1]
+        w_rl, w_rr = w_rl + noise[2], w_rr + noise[3]
    r = wheel_radius
    vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
    vy_body = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
@@ -72,6 +72,36 @@ if FIELD_SHAPE == "field_round":
    GATE_Y = FIELD_ROUND_GATE_Y


+def configure_from_args(argv: list[str] | None = None) -> str:
+    """Parse ``--world`` from *argv* (or ``sys.argv[1:]``), call :func:`configure`,
+    and set ``HERDING_WORLD`` in the environment.
+
+    Returns the resolved world name (``"field"`` or ``"field_round"``).
+
+    Call this at the very top of any script that accepts a ``--world`` flag,
+    *before* importing anything from ``herding.*`` that depends on field
+    geometry.  This centralises the pre-parse logic that was previously
+    duplicated in ``bc/collect.py``, ``rl/train.py``, and ``eval.py``::
+
+        from herding.world.geometry import configure_from_args
+        configure_from_args()   # reads sys.argv
+    """
+    import os
+    import sys as _sys
+    args = argv if argv is not None else _sys.argv[1:]
+    world = "field"
+    for i, a in enumerate(args):
+        if a == "--world" and i + 1 < len(args):
+            world = args[i + 1]
+            break
+        if a.startswith("--world="):
+            world = a.split("=", 1)[1]
+            break
+    configure(world)
+    os.environ["HERDING_WORLD"] = world
+    return world
+
+
 def configure(shape: str) -> None:
    """Switch the active field geometry at runtime.

@@ -138,7 +138,8 @@ PROTO ShepherdDog [
              }
            ]
          }
-          # Lidar — front-facing 140° FOV, mounted at snout tip
+          # Lidar — front-facing 140° FOV (canonical hardware spec).
+          # See ShepherdDog360.proto for the 360° robustness-ablation variant.
          Lidar {
            translation 0.05 0 0.01
            name "lidar"
@@ -0,0 +1,691 @@
+#VRML_SIM R2025a utf8
+# Shepherd Dog Robot - wheeled base with dog character on top, tail-mounted 360 lidar
+
+EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/appearances/protos/TireRubber.proto"
+
+PROTO ShepherdDog360 [
+  field SFVec3f    translation     0 0 0
+  field SFRotation rotation        0 1 0 0
+  field SFString   name            "ShepherdDog"
+  field SFString   controller      "shepherd_dog"
+  field MFString   controllerArgs  []
+  field SFString   customData      ""
+  field SFBool     supervisor      FALSE
+  field SFBool     synchronization TRUE
+]
+{
+  Robot {
+    translation IS translation
+    rotation IS rotation
+    name IS name
+    controller IS controller
+    controllerArgs IS controllerArgs
+    customData IS customData
+    supervisor IS supervisor
+    synchronization IS synchronization
+    children [
+      # ========== CHASSIS / BASE ==========
+      DEF CHASSIS Transform {
+        translation 0 0 0.05
+        children [
+          Shape {
+            appearance DEF CHASSIS_APP PBRAppearance {
+              baseColor 0.2 0.2 0.2
+              roughness 0.6
+              metalness 0.3
+            }
+            geometry Box {
+              size 0.32 0.16 0.06
+            }
+          }
+        ]
+      }
+      # Front slope
+      DEF CHASSIS_FRONT Transform {
+        translation 0.14 0 0.07
+        children [
+          Shape {
+            appearance USE CHASSIS_APP
+            geometry Box {
+              size 0.06 0.14 0.04
+            }
+          }
+        ]
+      }
+      # Rear slope
+      DEF CHASSIS_REAR Transform {
+        translation -0.14 0 0.07
+        children [
+          Shape {
+            appearance USE CHASSIS_APP
+            geometry Box {
+              size 0.06 0.14 0.04
+            }
+          }
+        ]
+      }
+
+      # ========== DOG BODY on top of chassis ==========
+      DEF BODY Transform {
+        translation 0 0 0.11
+        children [
+          Shape {
+            appearance DEF FUR_BROWN PBRAppearance {
+              baseColor 0.55 0.35 0.17
+              roughness 0.85
+              metalness 0.0
+            }
+            geometry Box {
+              size 0.30 0.16 0.08
+            }
+          }
+        ]
+      }
+
+      # ========== CHEST ==========
+      DEF CHEST Transform {
+        translation 0.12 0 0.11
+        children [
+          Shape {
+            appearance DEF FUR_CREAM PBRAppearance {
+              baseColor 0.85 0.72 0.55
+              roughness 0.85
+              metalness 0.0
+            }
+            geometry Box {
+              size 0.08 0.18 0.08
+            }
+          }
+        ]
+      }
+
+      # ========== HEAD ==========
+      DEF HEAD Transform {
+        translation 0.20 0 0.17
+        children [
+          Shape {
+            appearance USE FUR_BROWN
+            geometry Box {
+              size 0.10 0.12 0.09
+            }
+          }
+        ]
+      }
+
+      # ========== SNOUT + LIDAR ==========
+      DEF SNOUT Transform {
+        translation 0.28 0 0.155
+        children [
+          Shape {
+            appearance USE FUR_CREAM
+            geometry Box {
+              size 0.08 0.07 0.05
+            }
+          }
+          # Nose
+          Transform {
+            translation 0.04 0 0.01
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.1 0.1 0.1
+                  roughness 0.4
+                }
+                geometry Sphere {
+                  radius 0.013
+                  subdivision 2
+                }
+              }
+            ]
+          }
+          # Lidar — 360° FOV (was 140°/2.44 rad). Wider FOV closes the
+          # FOV-loss perception gap so policies trained on 360° gym sim
+          # transfer cleanly without retraining.
+          Lidar {
+            translation 0.05 0 0.01
+            name "lidar"
+            horizontalResolution 360
+            fieldOfView 6.28
+            numberOfLayers 1
+            minRange 0.10
+            maxRange 15.0
+            noise 0.005
+          }
+        ]
+      }
+
+      # ========== LEFT EAR ==========
+      DEF LEFT_EAR HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 0 1
+          anchor 0.19 0.055 0.21
+        }
+        device [
+          RotationalMotor {
+            name "left ear motor"
+            maxVelocity 10.0
+            minPosition -0.5
+            maxPosition 0.5
+          }
+        ]
+        endPoint Solid {
+          translation 0.19 0.055 0.21
+          rotation 0 0 1 0.2
+          name "left ear"
+          children [
+            Shape {
+              appearance DEF FUR_DARK PBRAppearance {
+                baseColor 0.35 0.20 0.10
+                roughness 0.85
+                metalness 0.0
+              }
+              geometry Box {
+                size 0.035 0.025 0.06
+              }
+            }
+          ]
+          boundingObject Box {
+            size 0.035 0.025 0.06
+          }
+          physics Physics {
+            density -1
+            mass 0.005
+          }
+        }
+      }
+
+      # ========== RIGHT EAR ==========
+      DEF RIGHT_EAR HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 0 1
+          anchor 0.19 -0.055 0.21
+        }
+        device [
+          RotationalMotor {
+            name "right ear motor"
+            maxVelocity 10.0
+            minPosition -0.5
+            maxPosition 0.5
+          }
+        ]
+        endPoint Solid {
+          translation 0.19 -0.055 0.21
+          rotation 0 0 -1 0.2
+          name "right ear"
+          children [
+            Shape {
+              appearance USE FUR_DARK
+              geometry Box {
+                size 0.035 0.025 0.06
+              }
+            }
+          ]
+          boundingObject Box {
+            size 0.035 0.025 0.06
+          }
+          physics Physics {
+            density -1
+            mass 0.005
+          }
+        }
+      }
+
+      # ========== EYES ==========
+      DEF LEFT_EYE Transform {
+        translation 0.25 0.05 0.19
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.95 0.95 0.95
+              roughness 0.3
+            }
+            geometry Sphere {
+              radius 0.016
+              subdivision 2
+            }
+          }
+          # Pupil
+          Transform {
+            translation 0.012 0 0.004
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.1 0.1 0.1
+                  roughness 0.2
+                }
+                geometry Sphere {
+                  radius 0.009
+                  subdivision 2
+                }
+              }
+            ]
+          }
+        ]
+      }
+      DEF RIGHT_EYE Transform {
+        translation 0.25 -0.05 0.19
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.95 0.95 0.95
+              roughness 0.3
+            }
+            geometry Sphere {
+              radius 0.016
+              subdivision 2
+            }
+          }
+          # Pupil
+          Transform {
+            translation 0.012 0 0.004
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.1 0.1 0.1
+                  roughness 0.2
+                }
+                geometry Sphere {
+                  radius 0.009
+                  subdivision 2
+                }
+              }
+            ]
+          }
+        ]
+      }
+
+      # ========== COLLAR ==========
+      DEF COLLAR Transform {
+        translation 0.16 0 0.125
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.8 0.1 0.1
+              roughness 0.5
+            }
+            geometry Cylinder {
+              height 0.02
+              radius 0.095
+              subdivision 16
+            }
+          }
+          # ID tag
+          Transform {
+            translation 0 0.10 0
+            rotation 1 0 0 1.5708
+            children [
+              Shape {
+                appearance PBRAppearance {
+                  baseColor 0.75 0.75 0.0
+                  metalness 0.8
+                  roughness 0.2
+                }
+                geometry Cylinder {
+                  height 0.003
+                  radius 0.018
+                  subdivision 8
+                }
+              }
+            ]
+          }
+        ]
+      }
+
+      # ========== TAIL (lidar inside tail tip ball) ==========
+      DEF TAIL HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor -0.15 0 0.11
+        }
+        device [
+          RotationalMotor {
+            name "tail motor"
+            maxVelocity 5.0
+            minPosition -1.0
+            maxPosition 1.0
+          }
+        ]
+        endPoint Solid {
+          translation -0.17 0 0.13
+          name "tail solid"
+          children [
+            Shape {
+              appearance USE FUR_BROWN
+              geometry Capsule {
+                height 0.12
+                radius 0.013
+                top FALSE
+              }
+            }
+            # Tail tip ball
+            Transform {
+              translation 0 0 0.08
+              children [
+                Shape {
+                  appearance PBRAppearance {
+                    baseColor 0.2 0.2 0.2
+                    roughness 0.3
+                    metalness 0.6
+                  }
+                  geometry Sphere {
+                    radius 0.028
+                    subdivision 4
+                  }
+                }
+              ]
+            }
+          ]
+          boundingObject Group {
+            children [
+              Capsule {
+                height 0.12
+                radius 0.013
+              }
+              Transform {
+                translation 0 0 0.08
+                children [
+                  Sphere {
+                    radius 0.028
+                  }
+                ]
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.08
+          }
+        }
+      }
+
+      # ========== RIGHT AXLE ARM (horizontal bar from chassis to wheel) ==========
+      DEF RIGHT_AXLE Transform {
+        translation 0 -0.115 0.038
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.5 0.5 0.5
+              roughness 0.3
+              metalness 0.8
+            }
+            geometry Box {
+              size 0.02 0.08 0.02
+            }
+          }
+        ]
+      }
+
+      # ========== LEFT AXLE ARM ==========
+      DEF LEFT_AXLE Transform {
+        translation 0 0.115 0.038
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColor 0.5 0.5 0.5
+              roughness 0.3
+              metalness 0.8
+            }
+            geometry Box {
+              size 0.02 0.08 0.02
+            }
+          }
+        ]
+      }
+
+      # ========== RIGHT WHEEL ==========
+      DEF RIGHT_WHEEL_JOINT HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor 0 -0.14 0.038
+        }
+        device [
+          RotationalMotor {
+            name "right wheel motor"
+            maxVelocity 70.0
+            maxTorque 20.0
+          }
+          PositionSensor {
+            name "right wheel sensor"
+            resolution 0.00628
+          }
+        ]
+        endPoint Solid {
+          translation 0 -0.14 0.038
+          rotation 0 -1 0 1.570796
+          children [
+            DEF WHEEL_VIS Pose {
+              rotation 1 0 0 -1.5708
+              children [
+                Shape {
+                  appearance PBRAppearance {
+                    baseColor 0.15 0.15 0.15
+                    roughness 0.4
+                    metalness 0.5
+                  }
+                  geometry Cylinder {
+                    height 0.016
+                    radius 0.035
+                    subdivision 24
+                  }
+                }
+                Shape {
+                  appearance PBRAppearance {
+                    baseColor 0.6 0.6 0.6
+                    roughness 0.3
+                    metalness 0.7
+                  }
+                  geometry Cylinder {
+                    height 0.018
+                    radius 0.014
+                    subdivision 12
+                  }
+                }
+                Shape {
+                  appearance TireRubber {
+                    textureTransform TextureTransform {
+                      scale 1.5 0.6
+                    }
+                    type "bike"
+                  }
+                  geometry Cylinder {
+                    height 0.022
+                    radius 0.038
+                    subdivision 24
+                  }
+                }
+              ]
+            }
+          ]
+          name "right wheel"
+          boundingObject Pose {
+            rotation 1 0 0 -1.5708
+            children [
+              Cylinder {
+                height 0.022
+                radius 0.038
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.06
+            centerOfMass [
+              0 0 0
+            ]
+          }
+        }
+      }
+
+      # ========== LEFT WHEEL ==========
+      DEF LEFT_WHEEL_JOINT HingeJoint {
+        jointParameters HingeJointParameters {
+          axis 0 1 0
+          anchor 0 0.14 0.038
+        }
+        device [
+          RotationalMotor {
+            name "left wheel motor"
+            maxVelocity 70.0
+            maxTorque 20.0
+          }
+          PositionSensor {
+            name "left wheel sensor"
+            resolution 0.00628
+          }
+        ]
+        endPoint Solid {
+          translation 0 0.14 0.038
+          rotation 0.707105 0 0.707109 -3.14159
+          children [
+            USE WHEEL_VIS
+          ]
+          name "left wheel"
+          boundingObject Pose {
+            rotation 1 0 0 -1.5708
+            children [
+              Cylinder {
+                height 0.022
+                radius 0.038
+              }
+            ]
+          }
+          physics Physics {
+            density -1
+            mass 0.12
+            centerOfMass [
+              0 0 0
+            ]
+          }
+        }
+      }
+
+      # ========== FRONT CASTER ==========
+      DEF FRONT_CASTER BallJoint {
+        jointParameters BallJointParameters {
+          anchor 0.14 0 0.02
+        }
+        endPoint Solid {
+          translation 0.14 0 0.02
+          name "front caster"
+          children [
+            Shape {
+              appearance PBRAppearance {
+                baseColor 0.2 0.2 0.2
+                roughness 0.3
+                metalness 0.5
+              }
+              geometry Sphere {
+                radius 0.02
+                subdivision 2
+              }
+            }
+          ]
+          boundingObject Sphere {
+            radius 0.02
+          }
+          physics Physics {
+            density -1
+            mass 0.03
+          }
+        }
+      }
+
+      # ========== REAR CASTER ==========
+      DEF REAR_CASTER BallJoint {
+        jointParameters BallJointParameters {
+          anchor -0.14 0 0.02
+        }
+        endPoint Solid {
+          translation -0.14 0 0.02
+          name "rear caster"
+          children [
+            Shape {
+              appearance PBRAppearance {
+                baseColor 0.2 0.2 0.2
+                roughness 0.3
+                metalness 0.5
+              }
+              geometry Sphere {
+                radius 0.02
+                subdivision 2
+              }
+            }
+          ]
+          boundingObject Sphere {
+            radius 0.02
+          }
+          physics Physics {
+            density -1
+            mass 0.03
+          }
+        }
+      }
+
+      # ========== IMU SENSORS ==========
+      Accelerometer {
+        translation 0 0 0.10
+        name "accelerometer"
+      }
+      Gyro {
+        translation 0 0 0.10
+        name "gyro"
+      }
+      Compass {
+        translation 0 0 0.10
+        name "compass"
+      }
+
+      # ========== GPS ==========
+      GPS {
+        translation 0 0 0.17
+        name "gps"
+      }
+
+      # ========== RECEIVER ==========
+      Receiver {
+        name "receiver"
+        channel 1
+      }
+
+      # ========== EMITTER ==========
+      Emitter {
+        name "emitter"
+        channel 1
+        range 50.0
+      }
+    ]
+
+    # ========== BOUNDING OBJECT ==========
+    boundingObject Group {
+      children [
+        # Chassis box
+        Transform {
+          translation 0 0 0.05
+          children [
+            Box {
+              size 0.32 0.16 0.06
+            }
+          ]
+        }
+        # Body box
+        Transform {
+          translation 0 0 0.11
+          children [
+            Box {
+              size 0.30 0.16 0.08
+            }
+          ]
+        }
+      ]
+    }
+
+    # ========== PHYSICS ==========
+    physics Physics {
+      density -1
+      mass 5.0
+      centerOfMass [
+        0 0 0.03
+      ]
+    }
+  }
+}
@@ -0,0 +1,240 @@
+"""Tests for herding/config.py — dataclass construction, defaults, overrides."""
+
+import math
+import pytest
+
+from herding.config import (
+    DetectionConfig,
+    DomainRandomConfig,
+    HerdingConfig,
+    HERDING_DEFAULT,
+    HERDING_WEBOTS,
+    LidarConfig,
+    LIDAR_FULL,
+    LIDAR_WEBOTS,
+    RobotConfig,
+    TrackerConfig,
+)
+
+
+# ---------------------------------------------------------------------------
+# LidarConfig
+# ---------------------------------------------------------------------------
+
+class TestLidarConfig:
+    def test_defaults_match_full_circle_preset(self):
+        assert LidarConfig() == LIDAR_FULL
+
+    def test_webots_preset(self):
+        assert LIDAR_WEBOTS.n_rays == 180
+        assert abs(LIDAR_WEBOTS.fov_rad - math.radians(140.0)) < 1e-9
+
+    def test_frozen(self):
+        cfg = LidarConfig()
+        with pytest.raises((AttributeError, TypeError)):
+            cfg.n_rays = 42  # type: ignore[misc]
+
+    def test_invalid_n_rays(self):
+        with pytest.raises(ValueError):
+            LidarConfig(n_rays=0)
+
+    def test_invalid_fov(self):
+        with pytest.raises(ValueError):
+            LidarConfig(fov_rad=0.0)
+        with pytest.raises(ValueError):
+            LidarConfig(fov_rad=math.pi * 3)
+
+    def test_invalid_max_range(self):
+        with pytest.raises(ValueError):
+            LidarConfig(max_range=-1.0)
+
+
+# ---------------------------------------------------------------------------
+# TrackerConfig
+# ---------------------------------------------------------------------------
+
+class TestTrackerConfig:
+    def test_defaults(self):
+        cfg = TrackerConfig()
+        assert cfg.forget_steps == 200
+        assert cfg.max_new_tracks_per_step == 10
+
+    def test_webots_preset_tighter(self):
+        cfg = HERDING_WEBOTS.tracker
+        assert cfg.forget_steps == 120
+        assert cfg.max_new_tracks_per_step == 1
+        assert cfg.pen_latch_depth == 2.0
+
+    def test_invalid_forget_steps(self):
+        with pytest.raises(ValueError):
+            TrackerConfig(forget_steps=0)
+
+    def test_invalid_max_new_tracks(self):
+        with pytest.raises(ValueError):
+            TrackerConfig(max_new_tracks_per_step=0)
+
+
+# ---------------------------------------------------------------------------
+# DetectionConfig
+# ---------------------------------------------------------------------------
+
+class TestDetectionConfig:
+    def test_defaults(self):
+        cfg = DetectionConfig()
+        assert cfg.wall_reject == 0.5
+
+    def test_webots_preset_wall_reject(self):
+        # wall_reject stays at 0.5 m — 1.0 m was too aggressive near the south gate
+        cfg = HERDING_WEBOTS.detection
+        assert cfg.wall_reject == 0.5
+
+    def test_invalid_wall_reject(self):
+        with pytest.raises(ValueError):
+            DetectionConfig(wall_reject=-0.1)
+
+
+# ---------------------------------------------------------------------------
+# RobotConfig
+# ---------------------------------------------------------------------------
+
+class TestRobotConfig:
+    def test_max_linear_derived(self):
+        cfg = RobotConfig()
+        assert abs(cfg.max_linear - cfg.wheel_radius * cfg.max_wheel_omega) < 1e-9
+
+    def test_default_action_smooth_zero(self):
+        assert RobotConfig().action_smooth == 0.0
+
+    def test_webots_action_smooth(self):
+        assert HERDING_WEBOTS.robot.action_smooth == 0.55
+
+    def test_invalid_action_smooth(self):
+        with pytest.raises(ValueError):
+            RobotConfig(action_smooth=1.0)
+        with pytest.raises(ValueError):
+            RobotConfig(action_smooth=-0.1)
+
+
+# ---------------------------------------------------------------------------
+# DomainRandomConfig
+# ---------------------------------------------------------------------------
+
+class TestDomainRandomConfig:
+    def test_all_zeros_by_default(self):
+        cfg = DomainRandomConfig()
+        assert cfg.fp_rate == 0.0
+        assert cfg.wheel_slip_std == 0.0
+        assert cfg.compass_noise_std == 0.0
+
+    def test_invalid_fp_rate(self):
+        with pytest.raises(ValueError):
+            DomainRandomConfig(fp_rate=-1.0)
+
+    def test_invalid_slip_std(self):
+        with pytest.raises(ValueError):
+            DomainRandomConfig(wheel_slip_std=-0.01)
+
+
+# ---------------------------------------------------------------------------
+# HerdingConfig
+# ---------------------------------------------------------------------------
+
+class TestHerdingConfig:
+    def test_default_is_herding_default(self):
+        assert HerdingConfig() == HERDING_DEFAULT
+
+    def test_replace_sub_config(self):
+        new_cfg = HERDING_WEBOTS.replace(
+            domain_random=DomainRandomConfig(fp_rate=2.0)
+        )
+        assert new_cfg.domain_random.fp_rate == 2.0
+        # Other sub-configs unchanged
+        assert new_cfg.tracker == HERDING_WEBOTS.tracker
+        assert new_cfg.lidar == HERDING_WEBOTS.lidar
+
+    def test_herding_default_matches_original_module_constants(self):
+        """Verify the default config reproduces the original hardcoded values."""
+        from herding.perception.lidar_sim import (
+            LIDAR_N_RAYS, LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_NOISE,
+            SHEEP_RADIUS, POST_RADIUS,
+        )
+        from herding.perception.lidar_perception import (
+            GAP_THRESHOLD, MAX_CLUSTER_SPAN, RANGE_HIT_EPS,
+            SPLIT_RANGE_GAP, WALL_REJECT, STATIC_REJECT,
+        )
+        from herding.perception.sheep_tracker import (
+            GATE_M, REACQUIRE_GATE_M, REACQUIRE_MIN_AGE, PENNED_GATE_M,
+            FORGET_STEPS, PREDICT_STEPS, VELOCITY_CLAMP,
+        )
+        cfg = HERDING_DEFAULT
+        assert cfg.lidar.n_rays == LIDAR_N_RAYS
+        assert cfg.lidar.fov_rad == LIDAR_FOV
+        assert cfg.lidar.max_range == LIDAR_MAX_RANGE
+        assert cfg.lidar.noise_std == LIDAR_NOISE
+        assert cfg.lidar.sheep_radius == SHEEP_RADIUS
+        assert cfg.lidar.post_radius == POST_RADIUS
+        assert cfg.detection.gap_threshold == GAP_THRESHOLD
+        assert cfg.detection.max_cluster_span == MAX_CLUSTER_SPAN
+        assert cfg.detection.range_hit_eps == RANGE_HIT_EPS
+        assert cfg.detection.split_range_gap == SPLIT_RANGE_GAP
+        assert cfg.detection.wall_reject == WALL_REJECT
+        assert cfg.detection.static_reject == STATIC_REJECT
+        assert cfg.tracker.gate_m == GATE_M
+        assert cfg.tracker.reacquire_gate_m == REACQUIRE_GATE_M
+        assert cfg.tracker.reacquire_min_age == REACQUIRE_MIN_AGE
+        assert cfg.tracker.penned_gate_m == PENNED_GATE_M
+        assert cfg.tracker.forget_steps == FORGET_STEPS
+        assert cfg.tracker.predict_steps == PREDICT_STEPS
+        assert cfg.tracker.velocity_clamp == VELOCITY_CLAMP
+
+
+# ---------------------------------------------------------------------------
+# Integration: HerdingEnv honours the config
+# ---------------------------------------------------------------------------
+
+class TestHerdingEnvConfig:
+    def test_default_env_unchanged(self):
+        """HerdingEnv() still works with no config — zero behaviour change."""
+        from training.herding_env import HerdingEnv
+        env = HerdingEnv(n_sheep=1, max_steps=5, difficulty=1.0, seed=0)
+        obs, info = env.reset()
+        assert obs.shape == (32,)
+        obs2, *_ = env.step(env.action_space.sample())
+        assert obs2.shape == (32,)
+
+    def test_webots_config_propagates_action_smooth(self):
+        from training.herding_env import HerdingEnv
+        env = HerdingEnv(herding_cfg=HERDING_WEBOTS)
+        assert env.ACTION_SMOOTH == 0.55
+
+    def test_webots_config_runs(self):
+        from training.herding_env import HerdingEnv
+        env = HerdingEnv(
+            n_sheep=2, max_steps=10, difficulty=1.0, seed=42,
+            herding_cfg=HERDING_WEBOTS,
+        )
+        obs, _ = env.reset()
+        for _ in range(5):
+            obs, _, terminated, truncated, _ = env.step(env.action_space.sample())
+        assert obs.shape == (32,)
+
+    def test_domain_random_fp_runs(self):
+        from training.herding_env import HerdingEnv
+        cfg = HERDING_WEBOTS.replace(
+            domain_random=DomainRandomConfig(fp_rate=3.0, fp_std_pos=0.2)
+        )
+        env = HerdingEnv(n_sheep=2, max_steps=10, difficulty=1.0, seed=7, herding_cfg=cfg)
+        env.reset()
+        for _ in range(5):
+            env.step(env.action_space.sample())
+
+    def test_domain_random_slip_runs(self):
+        from training.herding_env import HerdingEnv
+        cfg = HERDING_WEBOTS.replace(
+            domain_random=DomainRandomConfig(wheel_slip_std=0.05, compass_noise_std=0.02)
+        )
+        env = HerdingEnv(n_sheep=1, max_steps=10, difficulty=1.0, seed=3,
+                         drive_mode="mecanum", herding_cfg=cfg)
+        env.reset()
+        for _ in range(5):
+            env.step(env.action_space.sample())
@@ -106,11 +106,34 @@ def test_sequential_empty_input_idle():


 def test_sequential_targets_closest_to_pen():
+    # With 2 sheep (≤ STRAGGLER_THRESHOLD), sequential goes straight to
+    # "targeted" phase and pushes the sheep nearest to the pen.
    near = (10.0, -5.0)       # closer to pen entry (11.5, -15)
    far = (-10.0, 10.0)
    sheep = {"near": near, "far": far}
+    vx, vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY)
+    assert mode == "targeted"
+    # Dog should be directed toward near sheep (south-east), not far (north-west).
+    assert vx > 0 and vy < 0
+
+
+def test_sequential_collects_when_scattered():
+    # With >STRAGGLER_THRESHOLD sheep and radius > F_FACTOR*sqrt(n):
+    # should use collect (Strombom) not targeted.
+    sheep = {f"s{i}": pos for i, pos in enumerate([
+        (12.0, 10.0), (-12.0, 10.0), (0.0, 12.0),
+        (12.0, -12.0), (-10.0, -8.0),
+    ])}
    _vx, _vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY)
-    assert mode.startswith("drive:near")
+    assert mode in ("collect", "drive")
+
+
+def test_sequential_drives_when_compact():
+    # Compact flock of 5 sheep near centre — should drive, not collect.
+    sheep = {f"s{i}": (float(i) * 0.3, float(i) * 0.3)
+             for i in range(5)}
+    _vx, _vy, mode = sequential_action((0.0, 5.0), sheep, PEN_ENTRY)
+    assert mode == "drive"


 # ---------------------------------------------------------------------------
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# Measure the actual velocity response of the Webots mecanum robot and
+# compare against the gym's first-order kinematics prediction.
+#
+# Uses HERDING_MODE=calibrate in the shepherd_dog controller, which applies
+# a known fixed action for N steps, records GPS displacement, and computes
+# the relative error vs gym prediction.
+#
+# Usage:
+#   bash tools/calibrate_mecanum.sh [N_STEPS]
+#     N_STEPS : steps to hold each action (default 150, ≈ 2.4 s real-time)
+#
+# Output:
+#   calibrate_mecanum.log  — per-axis results printed and written here
+#
+# Target: < 10% relative error on each axis.
+# If errors are high, tune coulombFriction / forceDependentSlip in
+# tools/run_webots.sh (mecanum contactProperties block).
+
+set -euo pipefail
+N_STEPS="${1:-150}"
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
+LOG="$ROOT/calibrate_mecanum.log"
+export PATH="/home/jalf/miniconda3/envs/tir/bin:$PATH"
+
+echo "Running mecanum calibration (N_STEPS=$N_STEPS)..."
+echo "Results will be written to: $LOG"
+truncate -s 0 "$LOG"
+
+run_calib() {
+    local vx="$1" vy="$2" om="$3"
+    echo "  Testing vx=$vx vy=$vy om=$om ..."
+    rm -f "$ROOT/training/.run_done"
+    timeout --kill-after=15s 60 \
+    xvfb-run -a \
+    env WEBOTS_HEADLESS=1 WEBOTS_EXTRA_ARGS="--stdout --stderr" \
+        HERDING_MODE=calibrate HERDING_DRIVE=mecanum HERDING_WORLD=field \
+        CALIB_VX="$vx" CALIB_VY="$vy" CALIB_OM="$om" \
+        CALIB_N_STEPS="$N_STEPS" \
+    bash "$ROOT/tools/run_webots.sh" 0 calibrate mecanum field \
+    2>&1 | grep -E "cmd=|gym|webots|error" || true
+    pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
+    sleep 1
+}
+
+# Three test vectors: pure-x, pure-y, diagonal
+run_calib 0.5  0.0 0.0
+run_calib 0.0  0.5 0.0
+run_calib 0.35 0.35 0.0
+
+echo ""
+echo "=== Calibration results ==="
+cat "$LOG" 2>/dev/null || echo "(no results written — check controller output above)"
+echo ""
+echo "Target: <10% error on each axis."
+echo "If errors are high, tune coulombFriction / forceDependentSlip in"
+echo "tools/run_webots.sh (mecanum contactProperties block)."
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# Run one DAgger round on a (drive, world) combo.
+#
+# Usage: tools/dagger_round.sh <drive> <world> [seeds_per_n] [round_idx]
+#
+# Collects DAgger demos using the current BC policy as the actor and the
+# universal teacher as the labeller, in the HERDING_WEBOTS preset env
+# (140° FOV, tight tracker — matches deployment). Concatenates with the
+# original BC demos, re-trains BC, and saves to runs/bc_dagger_<combo>/.
+
+set -euo pipefail
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
+cd "$ROOT"
+
+DRIVE="${1:-differential}"
+WORLD="${2:-field}"
+SEEDS="${3:-15}"
+ROUND="${4:-1}"
+
+TAG="${DRIVE}_${WORLD}"
+ORIG_DEMOS="training/bc/demos_${TAG}.npz"
+DAGGER_DEMOS="training/bc/dagger${ROUND}_${TAG}.npz"
+COMBINED_DEMOS="training/bc/combined${ROUND}_${TAG}.npz"
+BC_DIR="training/runs/bc_${TAG}"
+OUT_DIR="training/runs/bc_dagger${ROUND}_${TAG}"
+
+case "$WORLD" in
+    field_round)
+        EPOCHS=150
+        ;;
+    *)
+        EPOCHS=60
+        ;;
+esac
+
+echo "=== DAgger round ${ROUND}: ${DRIVE}/${WORLD} ==="
+echo "    Actor policy: ${BC_DIR}/policy.zip"
+echo "    Output:       ${OUT_DIR}/policy.zip"
+
+# 1. Collect DAgger demos: BC drives, teacher labels (privileged + HERDING_WEBOTS).
+python -m training.bc.collect \
+    --teacher universal --out "$DAGGER_DEMOS" \
+    --seeds-per-n "$SEEDS" --subsample 3 \
+    --frame-stack 4 --drive-mode "$DRIVE" --world "$WORLD" \
+    --max-steps 30000 \
+    --privileged --use-webots-preset \
+    --fp-rate 0.0 --action-smooth 0.55 --wheel-slip-std 0.05 \
+    --dagger-policy "$BC_DIR"
+
+# 2. Concatenate original demos + dagger demos.
+python - <<PY
+import numpy as np
+orig = np.load("${ORIG_DEMOS}")
+dag  = np.load("${DAGGER_DEMOS}")
+obs = np.concatenate([orig["obs"], dag["obs"]], axis=0)
+act = np.concatenate([orig["actions"], dag["actions"]], axis=0)
+np.savez("${COMBINED_DEMOS}", obs=obs, actions=act,
+         meta=np.concatenate([orig["meta"], dag["meta"]], axis=0))
+print(f"[combine] orig={orig['obs'].shape[0]} + dagger={dag['obs'].shape[0]} = {obs.shape[0]}")
+PY
+
+# 3. Re-train BC on combined demos.
+python -m training.bc.pretrain \
+    --demos "$COMBINED_DEMOS" --out "$OUT_DIR" \
+    --epochs "$EPOCHS" --net-arch 512,512
+
+echo "=== DAgger round ${ROUND} done: ${OUT_DIR}/policy.zip ==="
@@ -38,12 +38,12 @@ MODE=${2:-bc}
 DRIVE=${3:-differential}
 WORLD=${4:-field}

-if (( N < 1 || N > 10 )); then
-    echo "N must be 1..10, got $N" >&2; exit 1
+if (( N < 0 || N > 10 )); then
+    echo "N must be 0..10, got $N" >&2; exit 1
 fi
 case "$MODE" in
-    bc|rl|strombom|sequential|universal) ;;
-    *) echo "MODE must be bc|rl|strombom|sequential|universal, got '$MODE'" >&2; exit 1 ;;
+    bc|rl|strombom|sequential|universal|calibrate) ;;
+    *) echo "MODE must be bc|rl|strombom|sequential|universal|calibrate, got '$MODE'" >&2; exit 1 ;;
 esac
 case "$DRIVE" in
    differential|mecanum) ;;
@@ -83,29 +83,31 @@ cp "$SRC" "$DST"
 if [[ "$DRIVE" == "mecanum" ]]; then
    sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
    sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
-    # Inject mecanum contact properties after the existing contactProperties block.
+    # Inject mecanum contact properties into the contactProperties array.
+    # Strategy: find the closing '  ]' that ends the contactProperties block
+    # (it sits at 2-space indent, immediately before the WorldInfo closing brace)
+    # and insert just before it.
    python3 -c "
-import re, sys
-with open(sys.argv[1], 'r') as f:
+with open('$DST', 'r') as f:
    txt = f.read()
-# Find the closing ']' of contactProperties and insert before it.
-mec = '''
-    ContactProperties {
+mec = '''    ContactProperties {
      material1 \"MecanumWheel\"
      coulombFriction [
-        2
+        1.0
      ]
      bounce 0
      forceDependentSlip [
-        10
+        0.01
      ]
      softCFM 0.0001
-    }'''
-# Insert before the first ']' that closes contactProperties [...]
-txt = re.sub(r'(contactProperties\s*\[[^\]]*)(\])', r'\1' + mec + r'\2', txt, count=1)
-with open(sys.argv[1], 'w') as f:
+    }
+'''
+# The contactProperties array closes with '  ]\n}' (2-space indent ] then WorldInfo }).
+# Insert the new block just before that closing ].
+txt = txt.replace('\n  ]\n}', '\n' + mec + '  ]\n}', 1)
+with open('$DST', 'w') as f:
    f.write(txt)
-" "$DST"
+"
 fi

 # Comment out sheep N+1..10 by prefixing the matching Sheep { ... } line.
@@ -129,6 +131,7 @@ HERDING_MODE=$MODE
 HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
 HERDING_DRIVE=$DRIVE
 HERDING_WORLD=$WORLD
+HERDING_USE_GT=${HERDING_USE_GT:-0}
 EOF

 export HERDING_MODE="$MODE"
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+# Headless Webots sweep across modes, drives, worlds, and flock sizes.
+# Runs sequentially; each trial gets a hard 150s wall-clock timeout.
+# Results are written to webots_sweep.log (tab-separated) and printed.
+#
+# Usage: bash tools/webots_sweep.sh [output_log]
+
+set -euo pipefail
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
+OUT="${1:-$ROOT/webots_sweep.log}"
+TIMEOUT_S=120  # ~80k steps in fast headless mode — covers slow-converging physics
+
+# Webots uses its own python3; put the conda env first so all deps resolve.
+export PATH="/home/jalf/miniconda3/envs/tir/bin:$PATH"
+
+# Columns: mode  drive  world  n_sheep  success  steps
+printf "%-12s %-14s %-12s %7s  %7s  %s\n" \
+    "mode" "drive" "world" "n_sheep" "success" "steps" | tee "$OUT"
+printf '%s\n' "$(printf '%-12s %-14s %-12s %7s  %7s  %s' \
+    '----' '-----' '-----' '-------' '-------' '-----')" | tee -a "$OUT"
+
+run_trial() {
+    local mode="$1" drive="$2" world="$3" n="$4" policy_dir="${5:-}"
+
+    local done_file="$ROOT/training/.run_done"
+    rm -f "$done_file"
+
+    local extra_env=()
+    extra_env+=(WEBOTS_HEADLESS=1)
+    extra_env+=(WEBOTS_EXTRA_ARGS="--stdout --stderr")
+    extra_env+=(HERDING_USE_GT=0)
+    if [[ -n "$policy_dir" ]]; then
+        extra_env+=(HERDING_POLICY_DIR="$ROOT/$policy_dir")
+    fi
+
+    local raw
+    raw=$(
+        timeout --kill-after=15s "$TIMEOUT_S" \
+        xvfb-run -a \
+        env "${extra_env[@]}" \
+        bash "$ROOT/tools/run_webots.sh" "$n" "$mode" "$drive" "$world" \
+        2>&1
+    ) || true
+    # Webots-bin and Xvfb can survive the timeout; kill any orphans now.
+    pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
+    sleep 1
+
+    local success="FAIL"
+    local steps="timeout"
+
+    if echo "$raw" | grep -q "\[dog\] all .* sheep penned at step"; then
+        success="OK"
+        steps=$(echo "$raw" | grep "\[dog\] all .* sheep penned at step" \
+                | grep -oP 'step \K[0-9]+')
+    fi
+
+    printf "%-12s %-14s %-12s %7s  %7s  %s\n" \
+        "$mode" "$drive" "$world" "$n" "$success" "$steps" | tee -a "$OUT"
+}
+
+# ---------------------------------------------------------------------------
+# Analytic baselines (differential only — that's the story context)
+# strombom / sequential: canonical baselines
+# universal: the actual teacher used to collect BC demos
+# ---------------------------------------------------------------------------
+for mode in strombom sequential universal; do
+    for world in field field_round; do
+        for n in 5 10; do
+            run_trial "$mode" differential "$world" "$n"
+        done
+    done
+done
+
+# ---------------------------------------------------------------------------
+# BC — world-specific policies
+# ---------------------------------------------------------------------------
+for drive in differential mecanum; do
+    for world in field field_round; do
+        for n in 5 10; do
+            run_trial bc "$drive" "$world" "$n" \
+                "training/runs/bc_${drive}_${world}"
+        done
+    done
+done
+
+# ---------------------------------------------------------------------------
+# RL_FAST — MODE=rl with explicit HERDING_POLICY_DIR pointing to rl_fast dirs
+# (run_webots.sh rejects "rl_fast" as a mode; "rl" + policy override is correct)
+# ---------------------------------------------------------------------------
+for drive in differential mecanum; do
+    for world in field field_round; do
+        for n in 5 10; do
+            run_trial rl "$drive" "$world" "$n" \
+                "training/runs/rl_fast_${drive}_${world}"
+        done
+    done
+done
+
+echo ""
+echo "Sweep complete. Results saved to: $OUT"
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+# Headless Webots sweep across modes, drives, worlds, and flock sizes.
+# Runs sequentially; each trial gets a hard 150s wall-clock timeout.
+# Results are written to webots_sweep.log (tab-separated) and printed.
+#
+# Usage: bash tools/webots_sweep.sh [output_log]
+
+set -euo pipefail
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
+OUT="${1:-$ROOT/webots_sweep.log}"
+TIMEOUT_S=120  # ~80k steps in fast headless mode — covers slow-converging physics
+
+# Webots uses its own python3; put the conda env first so all deps resolve.
+export PATH="/home/jalf/miniconda3/envs/tir/bin:$PATH"
+
+# Columns: mode  drive  world  n_sheep  success  steps
+printf "%-12s %-14s %-12s %7s  %7s  %s\n" \
+    "mode" "drive" "world" "n_sheep" "success" "steps" | tee "$OUT"
+printf '%s\n' "$(printf '%-12s %-14s %-12s %7s  %7s  %s' \
+    '----' '-----' '-----' '-------' '-------' '-----')" | tee -a "$OUT"
+
+run_trial() {
+    local mode="$1" drive="$2" world="$3" n="$4" policy_dir="${5:-}"
+
+    local done_file="$ROOT/training/.run_done"
+    rm -f "$done_file"
+
+    local extra_env=()
+    extra_env+=(WEBOTS_HEADLESS=1)
+    extra_env+=(WEBOTS_EXTRA_ARGS="--stdout --stderr")
+    extra_env+=(HERDING_USE_GT=1)
+    if [[ -n "$policy_dir" ]]; then
+        extra_env+=(HERDING_POLICY_DIR="$ROOT/$policy_dir")
+    fi
+
+    local raw
+    raw=$(
+        timeout --kill-after=15s "$TIMEOUT_S" \
+        xvfb-run -a \
+        env "${extra_env[@]}" \
+        bash "$ROOT/tools/run_webots.sh" "$n" "$mode" "$drive" "$world" \
+        2>&1
+    ) || true
+    # Webots-bin and Xvfb can survive the timeout; kill any orphans now.
+    pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
+    sleep 1
+
+    local success="FAIL"
+    local steps="timeout"
+
+    if echo "$raw" | grep -q "\[dog\] all .* sheep penned at step"; then
+        success="OK"
+        steps=$(echo "$raw" | grep "\[dog\] all .* sheep penned at step" \
+                | grep -oP 'step \K[0-9]+')
+    fi
+
+    printf "%-12s %-14s %-12s %7s  %7s  %s\n" \
+        "$mode" "$drive" "$world" "$n" "$success" "$steps" | tee -a "$OUT"
+}
+
+# ---------------------------------------------------------------------------
+# Analytic baselines (differential only — that's the story context)
+# strombom / sequential: canonical baselines
+# universal: the actual teacher used to collect BC demos
+# ---------------------------------------------------------------------------
+for mode in strombom sequential universal; do
+    for world in field field_round; do
+        for n in 5 10; do
+            run_trial "$mode" differential "$world" "$n"
+        done
+    done
+done
+
+# ---------------------------------------------------------------------------
+# BC — world-specific policies
+# ---------------------------------------------------------------------------
+for drive in differential mecanum; do
+    for world in field field_round; do
+        for n in 5 10; do
+            run_trial bc "$drive" "$world" "$n" \
+                "training/runs/bc_${drive}_${world}"
+        done
+    done
+done
+
+# ---------------------------------------------------------------------------
+# RL_FAST — MODE=rl with explicit HERDING_POLICY_DIR pointing to rl_fast dirs
+# (run_webots.sh rejects "rl_fast" as a mode; "rl" + policy override is correct)
+# ---------------------------------------------------------------------------
+for drive in differential mecanum; do
+    for world in field field_round; do
+        for n in 5 10; do
+            run_trial rl "$drive" "$world" "$n" \
+                "training/runs/rl_fast_${drive}_${world}"
+        done
+    done
+done
+
+echo ""
+echo "Sweep complete. Results saved to: $OUT"
@@ -21,22 +21,9 @@ from pathlib import Path

 import numpy as np

-# Early CLI parse so we can configure geometry before heavy imports.
-# (argparse is used again below for the full parse; this is a lightweight
-# pre-pass that only reads --world.)
-_pre_argv = [a for a in os.sys.argv[1:]]
-_pre_world = None
-for i, a in enumerate(_pre_argv):
-    if a == "--world" and i + 1 < len(_pre_argv):
-        _pre_world = _pre_argv[i + 1]
-        break
-    if a.startswith("--world="):
-        _pre_world = a.split("=", 1)[1]
-        break
-if _pre_world is not None:
-    from herding.world.geometry import configure as _geo_configure
-    _geo_configure(_pre_world)
-    os.environ["HERDING_WORLD"] = _pre_world
+# Configure field geometry before other herding imports read it at module level.
+from herding.world.geometry import configure_from_args as _configure_from_args
+_configure_from_args()

 from herding.control.active_scan import ActiveScanTeacher
 from herding.world.geometry import PEN_ENTRY, FIELD_SHAPE
@@ -83,10 +70,17 @@ def _call_teacher(fn, dog_xy, dog_heading, sheep_positions, pen_target,

 def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
                teacher_fn, frame_stack: int = 1, privileged: bool = False,
-                drive_mode: str = "differential"):
+                drive_mode: str = "differential", herding_cfg=None,
+                actor_policy=None):
+    """Collect (obs, teacher_action) pairs from one episode.
+
+    ``actor_policy`` (DAgger mode): a callable ``policy(obs) -> action`` that
+    drives the env. The teacher still labels each visited state. If ``None``
+    (default), the teacher drives.
+    """
    env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
                    difficulty=1.0, seed=seed, frame_stack=frame_stack,
-                    drive_mode=drive_mode)
+                    drive_mode=drive_mode, herding_cfg=herding_cfg)
    obs, _ = env.reset(seed=seed)
    obs_list, action_list = [], []
    scan_teacher = ActiveScanTeacher(teacher_fn)
@@ -108,13 +102,16 @@ def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
            )
            vx, vy, omega, _mode = result
        if drive_mode == "mecanum":
-            action = np.array([vx, vy, omega], dtype=np.float32)
+            teacher_action = np.array([vx, vy, omega], dtype=np.float32)
        else:
-            action = np.array([vx, vy], dtype=np.float32)
+            teacher_action = np.array([vx, vy], dtype=np.float32)
        if step % subsample == 0:
            obs_list.append(obs.copy())
-            action_list.append(action.copy())
-        obs, _r, term, trunc, _info = env.step(action)
+            action_list.append(teacher_action.copy())
+        # In DAgger mode the policy drives; otherwise the teacher does.
+        step_action = (actor_policy(obs) if actor_policy is not None
+                       else teacher_action)
+        obs, _r, term, trunc, _info = env.step(step_action)
        if term or trunc:
            break
    success = bool(env.sheep_penned.all())
@@ -153,6 +150,24 @@ def main():
                        help="World shape. If not set, uses HERDING_WORLD "
                             "env var or defaults to 'field'. Must be set "
                             "before geometry is imported.")
+    # Domain randomisation — applied to the gym env during collection so
+    # the teacher demonstrates under the same noise the policy will face.
+    parser.add_argument("--fp-rate", type=float, default=0.0,
+                        help="Mean false-positive detections injected per "
+                             "step (Poisson λ). 0 = clean sim (default).")
+    parser.add_argument("--action-smooth", type=float, default=0.0,
+                        help="EMA coefficient on dog actions (0 = none). "
+                             "Set to 0.55 to match the Webots controller.")
+    parser.add_argument("--wheel-slip-std", type=float, default=0.0,
+                        help="Gaussian noise (rad/s) on wheel speeds for "
+                             "mecanum dynamics domain randomisation.")
+    parser.add_argument("--dagger-policy", default=None,
+                        help="Path to a BC/PPO policy directory. When set, "
+                             "the policy drives the env (DAgger) while the "
+                             "teacher labels every visited state.")
+    parser.add_argument("--use-webots-preset", action="store_true",
+                        help="Use HERDING_WEBOTS preset (140° FOV + tight "
+                             "tracker). Match this to deployment for DAgger.")
    args = parser.parse_args()

    # Validate --world matches geometry (already configured by the
@@ -161,6 +176,53 @@ def main():
        print(f"[demos] WARNING: --world={args.world} but geometry is "
              f"'{FIELD_SHAPE}'. This should not happen — file a bug.")

+    from herding.config import HerdingConfig, HERDING_WEBOTS, DomainRandomConfig, RobotConfig
+    if args.use_webots_preset:
+        herding_cfg = HERDING_WEBOTS.replace(
+            domain_random=DomainRandomConfig(
+                fp_rate=args.fp_rate,
+                wheel_slip_std=args.wheel_slip_std,
+            ),
+            robot=RobotConfig(action_smooth=args.action_smooth),
+        )
+        print(f"[demos] HERDING_WEBOTS preset + DR: fp_rate={args.fp_rate} "
+              f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std}")
+    else:
+        herding_cfg = None
+        if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
+            herding_cfg = HerdingConfig(
+                domain_random=DomainRandomConfig(
+                    fp_rate=args.fp_rate,
+                    wheel_slip_std=args.wheel_slip_std,
+                ),
+                robot=RobotConfig(action_smooth=args.action_smooth),
+            )
+            print(f"[demos] domain-random: fp_rate={args.fp_rate}  "
+                  f"action_smooth={args.action_smooth}  "
+                  f"wheel_slip_std={args.wheel_slip_std}")
+
+    actor_policy = None
+    if args.dagger_policy is not None:
+        # DAgger: failures are the most valuable data (off-policy states
+        # where the student needs teacher correction). Always keep them.
+        args.keep_failures = True
+        from stable_baselines3 import PPO
+        from pathlib import Path as _P
+        run = _P(args.dagger_policy)
+        for name in ("policy.zip", "final.zip"):
+            if (run / name).exists():
+                zip_path = run / name
+                break
+        else:
+            raise FileNotFoundError(
+                f"No policy found in {run} (tried policy.zip, final.zip)")
+        _model = PPO.load(str(zip_path), device="auto")
+        print(f"[demos] DAgger mode: actor = {zip_path}")
+        def actor_policy(obs):
+            obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
+            a, _ = _model.predict(obs_b, deterministic=True)
+            return a[0]
+
    teacher_fn = TEACHERS[args.teacher]
    print(f"[demos] teacher: {args.teacher}  world: {FIELD_SHAPE}")

@@ -177,7 +239,8 @@ def main():
            obs, actions, success, total_steps = collect_one(
                n, seed, args.max_steps, args.subsample, teacher_fn,
                frame_stack=args.frame_stack, privileged=args.privileged,
-                drive_mode=args.drive_mode,
+                drive_mode=args.drive_mode, herding_cfg=herding_cfg,
+                actor_policy=actor_policy,
            )
            n_total += 1
            if success:
@@ -18,21 +18,9 @@ from statistics import mean

 import numpy as np

-# Early CLI pre-parse for --world so geometry is configured before
-# other herding.* modules are imported.
-_pre_argv = [a for a in os.sys.argv[1:]]
-_pre_world = None
-for i, a in enumerate(_pre_argv):
-    if a == "--world" and i + 1 < len(_pre_argv):
-        _pre_world = _pre_argv[i + 1]
-        break
-    if a.startswith("--world="):
-        _pre_world = a.split("=", 1)[1]
-        break
-if _pre_world is not None:
-    from herding.world.geometry import configure as _geo_configure
-    _geo_configure(_pre_world)
-    os.environ["HERDING_WORLD"] = _pre_world
+# Configure field geometry before other herding imports read it at module level.
+from herding.world.geometry import configure_from_args as _configure_from_args
+_configure_from_args()

 from herding.world.geometry import MAX_SHEEP, PEN_ENTRY
 from herding.control.sequential import compute_action as sequential_action
@@ -47,6 +47,7 @@ from herding.perception.lidar_sim import simulate_scan
 from herding.perception.obs import OBS_DIM, build_obs
 from herding.perception.sheep_tracker import SheepTracker
 from herding.control.strombom import compute_action as strombom_action
+from herding.config import HerdingConfig


 class HerdingEnv(gym.Env):
@@ -87,13 +88,24 @@ class HerdingEnv(gym.Env):
        use_lidar: bool = True,
        frame_stack: int = 1,
        drive_mode: str = "differential",
+        herding_cfg: Optional[HerdingConfig] = None,
    ):
        super().__init__()
+        # Store the config; fall back to defaults when None.
+        self._herding_cfg = herding_cfg
+
+        # Apply robot config overrides — these shadow the class attributes
+        # so that per-instance configuration is possible without touching
+        # the class-level defaults used by unconfigured instances.
+        if herding_cfg is not None:
+            self.ACTION_SMOOTH = herding_cfg.robot.action_smooth
+
        # ``use_lidar=True`` (default): obs and imitation-reward teacher
        # see only LiDAR-perceived positions via a tracker, matching the
        # Webots controller. ``False`` exposes ground truth for ablation.
        self._use_lidar = bool(use_lidar)
-        self._tracker = SheepTracker() if self._use_lidar else None
+        tracker_cfg = herding_cfg.tracker if herding_cfg is not None else None
+        self._tracker = SheepTracker(tracker_cfg=tracker_cfg) if self._use_lidar else None
        self._np_rng_lidar: Optional[np.random.Generator] = None

        # Frame stacking: the policy receives the last K obs concatenated,
@@ -261,6 +273,14 @@ class HerdingEnv(gym.Env):
        vx, vy = float(self.smoothed_action[0]), float(self.smoothed_action[1])
        omega = float(self.smoothed_action[2]) if self._action_dim >= 3 else 0.0

+        # Domain randomisation: compass (heading) noise.
+        dr = (self._herding_cfg.domain_random
+              if self._herding_cfg is not None else None)
+        slip_std = dr.wheel_slip_std if dr is not None else 0.0
+        if dr is not None and dr.compass_noise_std > 0.0 and self._np_rng_lidar is not None:
+            self.dog_heading = float(self.dog_heading + self._np_rng_lidar.normal(
+                0.0, dr.compass_noise_std))
+
        # Safety supervisor — dog stays north of the gate.
        if self.dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
            vx, vy = 0.0, 1.0
@@ -282,6 +302,8 @@ class HerdingEnv(gym.Env):
                DOG_WHEEL_RADIUS,
                DOG_WHEEL_BASE_X / 2.0, DOG_WHEEL_BASE_Y / 2.0,
                WEBOTS_DT,
+                slip_std=slip_std,
+                rng=self._np_rng_lidar,
            )
        else:
            wL, wR = velocity_to_wheels(
@@ -294,6 +316,8 @@ class HerdingEnv(gym.Env):
            self.dog_x, self.dog_y, self.dog_heading = kinematics_step(
                self.dog_x, self.dog_y, self.dog_heading,
                wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT,
+                slip_std=slip_std,
+                rng=self._np_rng_lidar,
            )
        self.dog_x, self.dog_y = clip_to_field(self.dog_x, self.dog_y, margin=0.3)
        # Extra constraint: dog stays north of the gate area.
@@ -460,16 +484,68 @@ class HerdingEnv(gym.Env):
                for i in range(self.n_sheep)]

    def _update_tracker(self) -> None:
+        lidar_cfg = (self._herding_cfg.lidar
+                     if self._herding_cfg is not None else None)
+        detection_cfg = (self._herding_cfg.detection
+                         if self._herding_cfg is not None else None)
        ranges = simulate_scan(
            self.dog_x, self.dog_y, self.dog_heading,
            self._all_sheep_xy(),
            rng=self._np_rng_lidar,
+            lidar_cfg=lidar_cfg,
        )
        detections = detections_from_scan(
            ranges, self.dog_x, self.dog_y, self.dog_heading,
+            detection_cfg=detection_cfg,
+            lidar_cfg=lidar_cfg,
        )
+        # Domain randomisation: inject false-positive detections near static
+        # features to mimic the spurious clusters Webots' physical LiDAR
+        # produces from real 3D geometry (walls, posts, fence rails).
+        dr = (self._herding_cfg.domain_random
+              if self._herding_cfg is not None else None)
+        if dr is not None and dr.fp_rate > 0.0 and self._np_rng_lidar is not None:
+            detections = list(detections)
+            detections.extend(
+                self._sample_false_positives(dr.fp_rate, dr.fp_std_pos))
        self._tracker.update(detections)

+    # Static feature anchor points used for FP injection.
+    # The rectangular list covers gate posts and field corners; the round
+    # list covers just the gate posts (the circular wall is handled separately).
+    _FP_ANCHORS_RECT = (
+        (10.0, -15.0), (13.0, -15.0),           # gate posts
+        (15.0,  15.0), (15.0, -15.0),
+        (-15.0,  15.0), (-15.0, -15.0),          # field corners
+        (15.0, 0.0), (-15.0, 0.0),               # mid-wall returns
+        (0.0, 15.0), (0.0, -15.0),
+    )
+    _FP_ANCHORS_ROUND = (
+        (0.0, -15.0),                            # gate centre
+        (-1.5, -15.0), (1.5, -15.0),             # gate posts
+        (0.0, 15.0),                              # north wall
+        (10.6, -10.6), (-10.6, -10.6),           # circular wall quadrants
+    )
+
+    def _sample_false_positives(
+        self, fp_rate: float, fp_std: float,
+    ) -> list[tuple[float, float]]:
+        """Return a list of spurious (x, y) detections for this step."""
+        from herding.world.geometry import FIELD_SHAPE
+        anchors = (self._FP_ANCHORS_ROUND
+                   if FIELD_SHAPE == "field_round"
+                   else self._FP_ANCHORS_RECT)
+        n_fps = int(self._np_rng_lidar.poisson(fp_rate))
+        if n_fps == 0:
+            return []
+        fps = []
+        chosen = self._np_rng_lidar.integers(0, len(anchors), size=n_fps)
+        noise = self._np_rng_lidar.normal(0.0, fp_std, size=(n_fps, 2))
+        for k in range(n_fps):
+            ax, ay = anchors[chosen[k]]
+            fps.append((float(ax + noise[k, 0]), float(ay + noise[k, 1])))
+        return fps
+
    def perceived_positions(self) -> dict[str, tuple[float, float]]:
        """What the controller would "see" this step: tracker output in
        LiDAR mode, ground truth in privileged mode. Used by demo
@@ -23,22 +23,9 @@ import argparse
 import os
 from pathlib import Path

-# Early CLI pre-parse for --world so geometry is configured before any
-# herding.* / training.* import binds geometry constants. Matches the
-# pattern used by training.bc.collect and training.eval.
-_pre_argv = [a for a in os.sys.argv[1:]]
-_pre_world = None
-for i, a in enumerate(_pre_argv):
-    if a == "--world" and i + 1 < len(_pre_argv):
-        _pre_world = _pre_argv[i + 1]
-        break
-    if a.startswith("--world="):
-        _pre_world = a.split("=", 1)[1]
-        break
-if _pre_world is not None:
-    from herding.world.geometry import configure as _geo_configure
-    _geo_configure(_pre_world)
-    os.environ["HERDING_WORLD"] = _pre_world
+# Configure field geometry before other herding imports read it at module level.
+from herding.world.geometry import configure_from_args as _configure_from_args
+_configure_from_args()

 import numpy as np
 import torch as th
@@ -59,11 +46,12 @@ from training.herding_env import HerdingEnv
 def _make_env(rank: int, seed: int, frame_stack: int,
              drive_mode: str = "differential",
              difficulty: float = 1.0,
-              max_n_sheep: int = 10):
+              max_n_sheep: int = 10,
+              herding_cfg=None):
    def _thunk():
        env = HerdingEnv(seed=seed + rank, frame_stack=frame_stack,
                         drive_mode=drive_mode, difficulty=difficulty,
-                         max_n_sheep=max_n_sheep)
+                         max_n_sheep=max_n_sheep, herding_cfg=herding_cfg)
        env = Monitor(env, info_keywords=("is_success", "n_sheep", "n_penned"))
        return env
    return _thunk
@@ -241,6 +229,13 @@ def main() -> None:
                        choices=["field", "field_round"],
                        help="World shape. If not set, uses HERDING_WORLD "
                             "env var or defaults to 'field'.")
+    # Domain randomisation
+    parser.add_argument("--fp-rate", type=float, default=0.0,
+                        help="Mean false-positive detections per step (Poisson λ).")
+    parser.add_argument("--action-smooth", type=float, default=0.0,
+                        help="EMA on dog actions (0=none, 0.55=Webots match).")
+    parser.add_argument("--wheel-slip-std", type=float, default=0.0,
+                        help="Gaussian wheel-speed noise std (rad/s).")
    args = parser.parse_args()
    # --world was already honoured in the early pre-parse above; here we
    # just sanity-check that the final argparse view agrees.
@@ -280,15 +275,31 @@ def main() -> None:
        drive_mode = "differential"
    print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")

+    from herding.config import HerdingConfig, DomainRandomConfig, RobotConfig
+    herding_cfg = None
+    if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
+        herding_cfg = HerdingConfig(
+            domain_random=DomainRandomConfig(
+                fp_rate=args.fp_rate,
+                wheel_slip_std=args.wheel_slip_std,
+            ),
+            robot=RobotConfig(action_smooth=args.action_smooth),
+        )
+        print(f"[rl] domain-random: fp_rate={args.fp_rate}  "
+              f"action_smooth={args.action_smooth}  "
+              f"wheel_slip_std={args.wheel_slip_std}")
+
    env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
                         difficulty=args.difficulty,
-                         max_n_sheep=args.max_n_sheep)
+                         max_n_sheep=args.max_n_sheep,
+                         herding_cfg=herding_cfg)
               for i in range(args.n_envs)]
    venv = SubprocVecEnv(env_fns) if args.n_envs > 1 else DummyVecEnv(env_fns)
    eval_venv = DummyVecEnv([_make_env(99, args.seed + 999, frame_stack,
                                       drive_mode,
                                       difficulty=args.difficulty,
-                                       max_n_sheep=args.max_n_sheep)])
+                                       max_n_sheep=args.max_n_sheep,
+                                       herding_cfg=herding_cfg)])
    print(f"[rl] difficulty={args.difficulty} max_n_sheep={args.max_n_sheep}")

    # Reward-shaping overrides (broadcast to every env instance).