diff --git a/Makefile b/Makefile index e59b328..3acf05e 100644 --- a/Makefile +++ b/Makefile @@ -51,25 +51,57 @@ RL_FAST_POLICY = $(RL_FAST_DIR)/policy.zip # --- Demo collection --- TEACHER ?= universal -# Round field is fundamentally harder (narrow gate at south of a circle). -# Default to more demos there to give BC a fair shot at 60%+. +# Mecanum has more complex dynamics and a weaker teacher imitation signal +# (val_cos ≈ 0.70 vs ≥ 0.88 for differential). Give it more demos and +# longer BC training to compensate. +ifeq ($(DRIVE),mecanum) +ifeq ($(WORLD),field_round) +SEEDS_PER_N ?= 80 +else +SEEDS_PER_N ?= 50 +endif +else +# Round field is harder; more demos give BC a fair shot at 60%+. ifeq ($(WORLD),field_round) SEEDS_PER_N ?= 60 else SEEDS_PER_N ?= 25 endif +endif SUBSAMPLE ?= 3 FRAME_STACK ?= 4 DEMO_MAX_STEPS ?= 100000 # --- Behaviour cloning --- +ifeq ($(DRIVE),mecanum) +ifeq ($(WORLD),field_round) +BC_EPOCHS ?= 200 +else +BC_EPOCHS ?= 100 +endif +else ifeq ($(WORLD),field_round) BC_EPOCHS ?= 150 else BC_EPOCHS ?= 60 endif +endif BC_NET_ARCH ?= 512,512 +# --- Domain randomisation (used by bc_demos and rl targets) --- +# FP_RATE: mean false-positive detections injected per step (Poisson λ). +# ACTION_SMOOTH_TRAIN: EMA on actions to match Webots controller (0.55). +# WHEEL_SLIP_STD: Gaussian wheel-speed noise for mecanum dynamics gap. +# +# FP_RATE is used consistently in BC demos *and* RL: BC collection runs +# in PRIVILEGED mode (teacher sees GT; student obs sees the FP-injected +# tracker output), so the policy learns to denoise to the GT signal. +# Mismatched FP_RATE between BC/RL was the root cause of an earlier +# regression (BC=0, RL=2 → PPO stalled at 0% success). +FP_RATE ?= 0.0 +ACTION_SMOOTH_TRAIN ?= 0.55 +WHEEL_SLIP_STD ?= 0.05 + # --- KL-PPO fine-tune --- # Round field: longer training, looser KL, no time penalty (success # must be learned before speed is rewarded). @@ -93,9 +125,16 @@ DIFFICULTY ?= 1.0 # --- Stage-2 "speed pass" (rl_fast) --- # Continues from RL_DIR with a negative TIME_W. Tighter KL keeps the # policy near the Stage-1 success rate while step-count drops. +# Differential and mecanum respond differently: mecanum needs a stronger +# time penalty to achieve speed gains; differential only needs a light +# touch (-0.02) — stronger penalties trade success for speed without gain. RL_FAST_STEPS ?= 1000000 RL_FAST_KL ?= 0.05 +ifeq ($(DRIVE),mecanum) RL_FAST_TIME_W ?= -0.05 +else +RL_FAST_TIME_W ?= -0.02 +endif # --- Evaluation --- EVAL_SEEDS ?= 10 @@ -107,7 +146,7 @@ MODE ?= rl .PHONY: all bc_demos bc rl rl_fast eval eval_fast eval_all eval_all_fast \ - test webots clean clean_all help \ + test webots webots_sweep clean clean_all help \ train_all train_diff_rect train_diff_round \ train_mec_rect train_mec_round \ train_all_fast train_diff_rect_fast train_diff_round_fast \ @@ -129,7 +168,10 @@ $(BC_DEMOS): --seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \ --frame-stack $(FRAME_STACK) --drive-mode $(DRIVE) \ --world $(WORLD) \ - --max-steps $(DEMO_MAX_STEPS) + --max-steps $(DEMO_MAX_STEPS) \ + --fp-rate $(FP_RATE) \ + --action-smooth $(ACTION_SMOOTH_TRAIN) \ + --wheel-slip-std $(WHEEL_SLIP_STD) bc: $(BC_POLICY) $(BC_POLICY): $(BC_DEMOS) @@ -144,7 +186,10 @@ $(RL_POLICY): $(BC_POLICY) --total-timesteps $(PPO_STEPS) --kl-coef $(KL) \ --imitate-weight $(IMITATE) --time-weight $(TIME_W) \ --difficulty $(DIFFICULTY) \ - --drive-mode $(DRIVE) --world $(WORLD) + --drive-mode $(DRIVE) --world $(WORLD) \ + --fp-rate $(FP_RATE) \ + --action-smooth $(ACTION_SMOOTH_TRAIN) \ + --wheel-slip-std $(WHEEL_SLIP_STD) eval: $(RL_POLICY) $(PY) -m training.eval --policy $(RL_DIR) \ @@ -162,7 +207,10 @@ $(RL_FAST_POLICY): $(RL_POLICY) --total-timesteps $(RL_FAST_STEPS) --kl-coef $(RL_FAST_KL) \ --imitate-weight $(IMITATE) --time-weight $(RL_FAST_TIME_W) \ --difficulty $(DIFFICULTY) \ - --drive-mode $(DRIVE) --world $(WORLD) + --drive-mode $(DRIVE) --world $(WORLD) \ + --fp-rate $(FP_RATE) \ + --action-smooth $(ACTION_SMOOTH_TRAIN) \ + --wheel-slip-std $(WHEEL_SLIP_STD) eval_fast: $(RL_FAST_POLICY) $(PY) -m training.eval --policy $(RL_FAST_DIR) \ @@ -175,6 +223,14 @@ test: webots: tools/run_webots.sh $(N) $(MODE) $(DRIVE) $(WORLD) +# Headless sweep across all modes × worlds × flock sizes. +# Results are written to webots_sweep.log. +# Set USE_GT=1 to bypass LiDAR tracker (isolate perception from policy). +webots_sweep: + env $(if $(USE_GT),HERDING_USE_GT=1,) \ + PATH="$(CONDA_PREFIX)/bin:$(PATH)" \ + bash tools/webots_sweep.sh webots_sweep.log + clean: rm -f $(BC_DEMOS) rm -rf $(BC_DIR) $(RL_DIR) diff --git a/controllers/sheep/runtime.ini b/controllers/sheep/runtime.ini new file mode 100644 index 0000000..19a4441 --- /dev/null +++ b/controllers/sheep/runtime.ini @@ -0,0 +1,2 @@ +[python] +COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3 diff --git a/controllers/shepherd_dog/runtime.ini b/controllers/shepherd_dog/runtime.ini new file mode 100644 index 0000000..19a4441 --- /dev/null +++ b/controllers/shepherd_dog/runtime.ini @@ -0,0 +1,2 @@ +[python] +COMMAND = /home/jalf/miniconda3/envs/tir/bin/python3 diff --git a/controllers/shepherd_dog/shepherd_dog.py b/controllers/shepherd_dog/shepherd_dog.py index 88429e1..745d419 100644 --- a/controllers/shepherd_dog/shepherd_dog.py +++ b/controllers/shepherd_dog/shepherd_dog.py @@ -87,11 +87,20 @@ from herding.perception.lidar_perception import detections_from_scan from herding.perception.sheep_tracker import SheepTracker from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels from herding.world.geometry import ( - DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA, - DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X, - DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS, + DOG_SOUTH_LIMIT, PEN_ENTRY, is_penned_position, ) +from herding.config import HERDING_WEBOTS, RobotConfig + +# Robot physical constants come from RobotConfig so they stay in sync with +# the training environment. The Webots preset uses action_smooth=0.55. +_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot +DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius +DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base +DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x +DOG_WHEEL_BASE_Y = _ROBOT_CFG.wheel_base_y +DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega +DOG_MAX_LINEAR = _ROBOT_CFG.max_linear # --------------------------------------------------------------------------- @@ -102,6 +111,13 @@ MODE = (os.environ.get("HERDING_MODE") or _runtime_cfg.get("HERDING_MODE") or "bc").lower() +# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly. +# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality. +USE_GT_PERCEPTION = bool(int( + os.environ.get("HERDING_USE_GT") + or _runtime_cfg.get("HERDING_USE_GT", "0") +)) + def _resolve_policy_dir(mode: str) -> str: """Where to look for the trained policy for the given mode. @@ -141,7 +157,7 @@ def _resolve_policy_dir(mode: str) -> str: return env_dir or primary -_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal") +_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate") if MODE not in _VALID_MODES: print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.") MODE = "strombom" @@ -173,7 +189,7 @@ print(f"[dog] drive mode={DRIVE_MODE}") # Control parameters # --------------------------------------------------------------------------- -ACTION_SMOOTH = 0.55 # EMA on (vx, vy) — kills frame-to-frame jitter +ACTION_SMOOTH = _ROBOT_CFG.action_smooth # EMA on (vx, vy) — kills frame-to-frame jitter RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done") @@ -264,7 +280,7 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep) emitter = robot.getDevice("emitter") lidar = robot.getDevice("lidar"); lidar.enable(timestep) -tracker = SheepTracker() +tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker) # Cosmetic ear motors — animated; not used by control. left_ear = robot.getDevice("left ear motor") @@ -300,6 +316,76 @@ _run_done = False prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0) step_count = 0 +# --------------------------------------------------------------------------- +# Calibration mode — apply fixed action, measure GPS displacement, compare +# against gym kinematics prediction, write results to calibrate_mecanum.log. +# --------------------------------------------------------------------------- +if MODE == "calibrate": + import json as _json + _calib_vx = float(os.environ.get("CALIB_VX", "0.5")) + _calib_vy = float(os.environ.get("CALIB_VY", "0.0")) + _calib_om = float(os.environ.get("CALIB_OM", "0.0")) + _calib_n = int(os.environ.get("CALIB_N_STEPS", "150")) + _log_path = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log") + # Settle for 5 steps so GPS stabilises. + for _ in range(5): + robot.step(timestep) + _pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1] + _n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1]) + # Gym-predicted displacement using shared kinematics. + from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_kinematics_step + from herding.world.geometry import WEBOTS_DT as _DT + _xg, _yg, _hg = _x0, _y0, _h0 + for _ in range(_calib_n): + _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels( + _calib_vx, _calib_vy, _calib_om, _hg, + max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS, + lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2, + max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0, + wheel_base=DOG_WHEEL_BASE, + ) + _xg, _yg, _hg = mecanum_kinematics_step( + _xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr, + DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT, + ) + # Run actual Webots steps. + for _ in range(_calib_n): + _nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1]) + _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels( + _calib_vx, _calib_vy, _calib_om, _h, + max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS, + lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2, + max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0, + wheel_base=DOG_WHEEL_BASE, + ) + if DRIVE_MODE == "mecanum": + drive_mecanum(_calib_vx, _calib_vy, _calib_om, + fl_motor, fr_motor, rl_motor, rr_motor, + compass, MOTOR_MAX) + robot.step(timestep) + _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1] + _T = _calib_n * _DT + _vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T + _vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T + def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p) + _result = ( + f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) " + f"steps={_calib_n}\n" + f" gym displacement: dx={_xg-_x0:.4f} dy={_yg-_y0:.4f} " + f"(vx={_vx_g:.3f} vy={_vy_g:.3f} m/s)\n" + f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} " + f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n" + f" vx error: {_pct(_vx_w, _vx_g):.1f}% " + f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n" + ) + print(_result) + with open(_log_path, "a") as _f: + _f.write(_result + "\n") + # Write the run-done sentinel so run_webots.sh closes Webots cleanly. + with open(RUN_DONE_FILE, "w") as _f: + _f.write("calibrate\n") + import sys as _sys; _sys.exit(0) + while robot.step(timestep) != -1: step_count += 1 @@ -321,8 +407,18 @@ while robot.step(timestep) != -1: # ---- LiDAR perception → tracker → active sheep positions ---- ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32) - detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading) - sheep_positions = tracker.update(detections) + detections = detections_from_scan( + ranges, dog_xy[0], dog_xy[1], dog_heading, + detection_cfg=HERDING_WEBOTS.detection, + lidar_cfg=HERDING_WEBOTS.lidar, + ) + if USE_GT_PERCEPTION and _gt_sheep: + # Bypass tracker: feed GT emitter positions directly to policy/teacher. + sheep_positions = {k: v for k, v in _gt_sheep.items() + if not is_penned_position(v[0], v[1])} + tracker.update(detections) # still advance tracker for diagnostics + else: + sheep_positions = tracker.update(detections) sheep_xy_list = list(sheep_positions.values()) sheep_penned_list = [False] * len(sheep_xy_list) @@ -331,10 +427,18 @@ while robot.step(timestep) != -1: # ---- Action selection ---- omega = 0.0 if MODE in ("bc", "rl") and policy_handle is not None: - action = policy_handle.predict(single_obs) - vx, vy = float(action[0]), float(action[1]) - if DRIVE_MODE == "mecanum" and len(action) >= 3: - omega = float(action[2]) + if not sheep_positions: + # BC/RL never saw "empty obs during operation" in training (empty + # obs only happened at episode end), so the policy outputs ~zero + # and the dog gets stuck. Fall back to a fixed scan rotation + # until tracker recovers some sheep. + vx, vy = 0.0, 0.6 + omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0 + else: + action = policy_handle.predict(single_obs) + vx, vy = float(action[0]), float(action[1]) + if DRIVE_MODE == "mecanum" and len(action) >= 3: + omega = float(action[2]) else: result = analytic_teacher( dog_xy, dog_heading, sheep_positions, PEN_ENTRY, diff --git a/herding/config.py b/herding/config.py new file mode 100644 index 0000000..7544a5d --- /dev/null +++ b/herding/config.py @@ -0,0 +1,335 @@ +"""Central configuration dataclasses for the herding simulation. + +Every tunable constant that previously lived as a module-level literal in +perception/lidar_sim.py, perception/lidar_perception.py, +perception/sheep_tracker.py, world/geometry.py, or training/herding_env.py +is now represented here as a field with its original default value. + +Usage — use the module defaults unchanged:: + + env = HerdingEnv() # same behaviour as before + +Override a subset of parameters:: + + from herding.config import HerdingConfig, TrackerConfig + cfg = HerdingConfig(tracker=TrackerConfig(forget_steps=60)) + env = HerdingEnv(herding_cfg=cfg) + +Use a named preset for Webots-matched training:: + + from herding.config import HERDING_WEBOTS + env = HerdingEnv(herding_cfg=HERDING_WEBOTS) + +Design notes +------------ +* All dataclasses are frozen — instances are immutable after construction. +* This module must not import from other ``herding.*`` packages to avoid + import cycles. Field-geometry constants (pen coordinates, field size) + stay in ``herding.world.geometry`` because they depend on the world + variant selected at runtime via ``HERDING_WORLD``. +""" + +from __future__ import annotations + +import math +from dataclasses import dataclass, field, replace + + +# --------------------------------------------------------------------------- +# LiDAR hardware spec +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class LidarConfig: + """Parameters of the simulated / physical LiDAR sensor. + + The two canonical presets are :data:`LIDAR_FULL` (360°, oracle mode) + and :data:`LIDAR_WEBOTS` (140°/180-ray, matches the ShepherdDog proto). + """ + + n_rays: int = 360 + """Number of rays in the scan.""" + + fov_rad: float = 2.0 * math.pi + """Full field-of-view in radians, centred on the robot's forward axis.""" + + max_range: float = 12.0 + """Maximum detectable range in metres.""" + + noise_std: float = 0.005 + """Gaussian standard deviation (metres) applied to each hit reading.""" + + sheep_radius: float = 0.30 + """Effective disc radius of a sheep in the 2-D LiDAR plane (metres).""" + + post_radius: float = 0.25 + """Effective disc radius of gate / corner posts (metres).""" + + def __post_init__(self) -> None: + if self.n_rays < 1: + raise ValueError(f"n_rays must be ≥ 1, got {self.n_rays}") + if not (0.0 < self.fov_rad <= 2.0 * math.pi): + raise ValueError(f"fov_rad must be in (0, 2π], got {self.fov_rad:.4f}") + if self.max_range <= 0.0: + raise ValueError(f"max_range must be > 0, got {self.max_range}") + + +# Named presets ----------------------------------------------------------- + +LIDAR_FULL = LidarConfig( + n_rays=360, + fov_rad=2.0 * math.pi, +) +"""360° full-circle scan — oracle / ablation mode.""" + +LIDAR_WEBOTS = LidarConfig( + n_rays=180, + fov_rad=math.radians(140.0), +) +"""Matches the ShepherdDog.proto Lidar device (180 rays, 140° FOV). + +Training with this preset closes the sim-to-real gap for the sensor +geometry. Because the observation is built from tracker output (not raw +rays), a policy trained here can be deployed on a wider-FOV LiDAR (e.g. +240° or 360°) without retraining — more FOV means more true detections, +which can only improve tracker quality. +""" + + +# --------------------------------------------------------------------------- +# Cluster-detection pipeline +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class DetectionConfig: + """Parameters for the LiDAR-scan → detection clustering pipeline.""" + + gap_threshold: float = 0.6 + """Adjacent hit-points farther apart than this (metres) start a new cluster.""" + + max_cluster_span: float = 1.5 + """Clusters wider than this (metres) are rejected as walls / structures.""" + + range_hit_eps: float = 0.05 + """A ray is considered a hit if ``range < max_range - range_hit_eps``.""" + + split_range_gap: float = 0.20 + """Range increase within a cluster that triggers a multi-peak split.""" + + wall_reject: float = 0.5 + """Drop detections within this distance (metres) of any field wall.""" + + static_reject: float = 0.8 + """Drop detections within this distance (metres) of known static features + (gate posts, field corners).""" + + def __post_init__(self) -> None: + if self.wall_reject < 0.0: + raise ValueError(f"wall_reject must be ≥ 0, got {self.wall_reject}") + if self.static_reject < 0.0: + raise ValueError(f"static_reject must be ≥ 0, got {self.static_reject}") + + +# --------------------------------------------------------------------------- +# Multi-target tracker +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class TrackerConfig: + """Parameters for the nearest-neighbour sheep tracker.""" + + gate_m: float = 2.5 + """Primary NN association gate in metres (recently observed tracks).""" + + reacquire_gate_m: float = 4.5 + """Wider gate used when re-acquiring tracks stale for ≥ ``reacquire_min_age`` steps.""" + + reacquire_min_age: int = 20 + """Minimum staleness (steps) before the wider re-acquisition gate activates.""" + + penned_gate_m: float = 4.0 + """Gate for matching new detections to already-penned tracks.""" + + forget_steps: int = 200 + """Delete an active track that has not been observed for this many steps (~3.2 s).""" + + predict_steps: int = 120 + """Extrapolate a track's position using constant velocity for this many steps (~1.9 s).""" + + velocity_clamp: float = 1.0 + """Maximum predicted speed (m/s) used during extrapolation.""" + + max_new_tracks_per_step: int = 10 + """Maximum number of new tracks that may be spawned in a single step. + + Capping this limits the damage from LiDAR false-positive bursts (e.g. + wall reflections in Webots) that would otherwise flood the track set. + The default (10 = MAX_SHEEP) preserves the original behaviour; reduce + to 2–3 for Webots deployment robustness. + """ + + pen_latch_depth: float = 0.0 + """Minimum depth past the gate line (metres) before a track is latched + as penned. 0.0 = original behaviour (latch at y ≤ GATE_Y). Increase + to 0.5 for Webots to prevent gate-hardware LiDAR reflections near y=-15 + from permanently consuming tracker slots as false "penned" sheep. + """ + + def __post_init__(self) -> None: + if self.forget_steps < 1: + raise ValueError(f"forget_steps must be ≥ 1, got {self.forget_steps}") + if self.max_new_tracks_per_step < 1: + raise ValueError( + f"max_new_tracks_per_step must be ≥ 1, got {self.max_new_tracks_per_step}" + ) + + +# --------------------------------------------------------------------------- +# Robot physical specification +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class RobotConfig: + """Physical parameters of the shepherd-dog robot. + + Values mirror ``protos/ShepherdDog.proto`` and ``protos/ShepherdDogMecanum.proto``. + """ + + wheel_radius: float = 0.038 + """Wheel radius in metres.""" + + wheel_base: float = 0.28 + """Axle-to-axle distance for differential drive (metres).""" + + wheel_base_x: float = 0.28 + """Front-to-back axle distance for mecanum drive (metres).""" + + wheel_base_y: float = 0.28 + """Left-to-right axle distance for mecanum drive (metres).""" + + max_wheel_omega: float = 70.0 + """Maximum wheel angular velocity (rad/s).""" + + action_smooth: float = 0.0 + """Exponential moving-average coefficient applied to actions inside the env. + + ``0.0`` means no smoothing (gym default). + ``0.55`` matches the hard-coded EMA in ``shepherd_dog.py`` — use this + when training so the policy learns to act through the same filter it + sees at deployment. + """ + + def __post_init__(self) -> None: + if not (0.0 <= self.action_smooth < 1.0): + raise ValueError( + f"action_smooth must be in [0, 1), got {self.action_smooth}" + ) + + @property + def max_linear(self) -> float: + """Maximum achievable linear speed (m/s).""" + return self.wheel_radius * self.max_wheel_omega + + +# --------------------------------------------------------------------------- +# Domain randomisation +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class DomainRandomConfig: + """Parameters that inject physics / sensor noise for domain randomisation. + + All values default to 0 (disabled) so the base env is deterministic and + backwards-compatible. Enable them gradually to close the sim-to-real gap. + """ + + fp_rate: float = 0.0 + """Mean number of false-positive detections injected per step (Poisson λ). + + FPs are placed near static features (walls, posts) with positional + noise ``fp_std_pos``, mimicking the spurious clusters Webots' physical + LiDAR returns from 3D geometry. + """ + + fp_std_pos: float = 0.3 + """Positional standard deviation (metres) of injected false-positive clusters.""" + + wheel_slip_std: float = 0.0 + """Gaussian noise standard deviation (rad/s) added to each wheel speed + before kinematic integration. Models real-world wheel slip and motor + variation. Suggested starting value: 0.05. + """ + + compass_noise_std: float = 0.0 + """Gaussian noise standard deviation (radians) added to the heading + reading each step. Models magnetometer drift in Webots. + Suggested starting value: 0.02. + """ + + def __post_init__(self) -> None: + if self.fp_rate < 0.0: + raise ValueError(f"fp_rate must be ≥ 0, got {self.fp_rate}") + if self.wheel_slip_std < 0.0: + raise ValueError(f"wheel_slip_std must be ≥ 0, got {self.wheel_slip_std}") + if self.compass_noise_std < 0.0: + raise ValueError(f"compass_noise_std must be ≥ 0, got {self.compass_noise_std}") + + +# --------------------------------------------------------------------------- +# Aggregate config +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class HerdingConfig: + """Root configuration object passed to :class:`~training.herding_env.HerdingEnv`. + + Sub-configs default to the original simulation parameters so that + ``HerdingEnv()`` and ``HerdingEnv(herding_cfg=HerdingConfig())`` produce + identical behaviour. + """ + + lidar: LidarConfig = field(default_factory=LidarConfig) + detection: DetectionConfig = field(default_factory=DetectionConfig) + tracker: TrackerConfig = field(default_factory=TrackerConfig) + robot: RobotConfig = field(default_factory=RobotConfig) + domain_random: DomainRandomConfig = field(default_factory=DomainRandomConfig) + + def replace(self, **kwargs) -> "HerdingConfig": + """Return a new config with selected top-level sub-configs replaced. + + Example:: + + cfg = HERDING_WEBOTS.replace( + domain_random=DomainRandomConfig(fp_rate=2.0, wheel_slip_std=0.05) + ) + """ + return replace(self, **kwargs) + + +# --------------------------------------------------------------------------- +# Named full-pipeline presets +# --------------------------------------------------------------------------- + +HERDING_DEFAULT = HerdingConfig() +"""Original simulation defaults — zero behaviour change.""" + +HERDING_WEBOTS = HerdingConfig( + lidar=LIDAR_WEBOTS, + detection=DetectionConfig(wall_reject=0.5, static_reject=1.2), + tracker=TrackerConfig( + forget_steps=120, + max_new_tracks_per_step=1, + pen_latch_depth=2.0, + ), + robot=RobotConfig(action_smooth=0.55), +) +"""Webots-matched training preset. + +Changes vs HERDING_DEFAULT: +* LiDAR: 180 rays / 140° FOV matching ShepherdDog.proto hardware +* Detection: wall_reject kept at 0.5 m (original default; static_reject + handles post FPs; 1.0 m was too aggressive near the south gate) +* Tracker: forget_steps 200 → 60 (~1 s ghost-track lifetime) + max_new_tracks_per_step 10 → 3 (rate-caps FP flooding) +* Robot: action_smooth 0.0 → 0.55 (matches Webots controller EMA) +""" diff --git a/herding/control/sequential.py b/herding/control/sequential.py index b2fcc9f..6347156 100644 --- a/herding/control/sequential.py +++ b/herding/control/sequential.py @@ -1,9 +1,23 @@ -"""Sequential "pin-and-push" shepherd-dog controller. +"""Adaptive sequential shepherd-dog controller. -Single-target alternative to Strömbom: each step, target the sheep -closest to the pen, park behind it, drive it through; once it latches -penned the next-closest sheep becomes the target. Naturally queues -the flock through a narrow gate. +Three-phase strategy: + + 1. **Collect** (flock scattered): Strömbom collect — park behind the + furthest sheep and push it toward the CoM. Identical to the + Strömbom heuristic; keeps the flock together. + + 2. **Drive** (flock compact, >STRAGGLER_THRESHOLD active): Strömbom + drive — park behind the CoM relative to the pen and push the whole + group through the gate. + + 3. **Targeted** (≤STRAGGLER_THRESHOLD sheep remain active): single- + target push on the sheep closest to the pen entry. Safe to isolate + individual sheep once the flock is nearly exhausted. + +The original pure pin-and-push (Phase 3 only) caused flock scatter in +Webots physics whenever the dog tried to isolate a sheep while others +were still spread across the field. Phases 1–2 handle the bulk of +herding with flock-aware Strömbom logic; Phase 3 cleans up stragglers. """ import math @@ -11,64 +25,103 @@ import math from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen -DELTA_DRIVE = 1.5 # standoff behind the target sheep -APPROACH_GAIN = 1.0 # action magnitude scale (1 = full speed) +F_FACTOR = 4.0 # collect/drive threshold: radius > F_FACTOR·√n +DELTA_COLLECT = 1.5 # standoff behind the furthest sheep (collect) +DELTA_DRIVE = 2.0 # standoff behind CoM (drive) +DELTA_TARGET = 1.5 # standoff behind single target sheep (targeted) +STRAGGLER_THRESHOLD = 2 # switch to targeted push when ≤ this many active -def _unit(x, y): +def _unit(x: float, y: float): d = math.hypot(x, y) if d < 1e-6: return 0.0, 0.0 return x / d, y / d -def _is_active(x, y) -> bool: +def _is_active(x: float, y: float) -> bool: return (not in_pen(x, y)) and y > GATE_Y def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY): - """Return ``(vx, vy, mode)`` — same call signature as Strömbom.""" - active = [(name, x, y) for name, (x, y) in sheep_positions.items() - if _is_active(x, y)] + """Return ``(vx, vy, mode)`` — same signature as Strömbom.""" + active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)] if not active: return 0.0, 0.0, "idle" - name, sx, sy = min( - active, - key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]), - ) + n = len(active) + com_x = sum(p[0] for p in active) / n + com_y = sum(p[1] for p in active) / n + dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active] + radius = max(dists) - ux, uy = _unit(sx - pen_target[0], sy - pen_target[1]) - tx = sx + DELTA_DRIVE * ux - ty = sy + DELTA_DRIVE * uy + if n <= STRAGGLER_THRESHOLD: + # Targeted: push the sheep closest to the pen entry individually. + sx, sy = min(active, + key=lambda p: math.hypot(p[0] - pen_target[0], + p[1] - pen_target[1])) + ux, uy = _unit(sx - pen_target[0], sy - pen_target[1]) + tx, ty = sx + DELTA_TARGET * ux, sy + DELTA_TARGET * uy + mode = "targeted" + + elif radius > F_FACTOR * math.sqrt(n): + # Collect: aim behind the furthest sheep from the CoM. + idx = max(range(n), key=lambda i: dists[i]) + sx, sy = active[idx] + ux, uy = _unit(sx - com_x, sy - com_y) + tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy + mode = "collect" + + else: + # Drive: push the whole compact flock toward the gate. + ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1]) + tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy + mode = "drive" ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1]) - return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}" + return ax, ay, mode def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY): - """``compute_action`` plus a debug dict (target, drive point).""" - active = [(name, x, y) for name, (x, y) in sheep_positions.items() - if _is_active(x, y)] + """``compute_action`` plus a debug dict.""" + active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)] if not active: return 0.0, 0.0, "idle", { - "n_active": 0, "target_name": "", - "target_x": 0.0, "target_y": 0.0, - "drive_x": dog_xy[0], "drive_y": dog_xy[1], + "n_active": 0, "phase": "idle", "radius": 0.0, "threshold": 0.0, + "com_x": 0.0, "com_y": 0.0, + "target_x": dog_xy[0], "target_y": dog_xy[1], } - name, sx, sy = min( - active, - key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]), - ) + n = len(active) + com_x = sum(p[0] for p in active) / n + com_y = sum(p[1] for p in active) / n + dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active] + radius = max(dists) + threshold = F_FACTOR * math.sqrt(n) + + if n <= STRAGGLER_THRESHOLD: + sx, sy = min(active, + key=lambda p: math.hypot(p[0] - pen_target[0], + p[1] - pen_target[1])) + ux, uy = _unit(sx - pen_target[0], sy - pen_target[1]) + tx, ty = sx + DELTA_TARGET * ux, sy + DELTA_TARGET * uy + mode = "targeted" + + elif radius > threshold: + idx = max(range(n), key=lambda i: dists[i]) + sx, sy = active[idx] + ux, uy = _unit(sx - com_x, sy - com_y) + tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy + mode = "collect" + + else: + ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1]) + tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy + mode = "drive" - ux, uy = _unit(sx - pen_target[0], sy - pen_target[1]) - tx = sx + DELTA_DRIVE * ux - ty = sy + DELTA_DRIVE * uy ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1]) - - return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", { - "n_active": len(active), "target_name": name, - "target_x": sx, "target_y": sy, - "drive_x": tx, "drive_y": ty, + return ax, ay, mode, { + "n_active": n, "phase": mode, "radius": radius, "threshold": threshold, + "com_x": com_x, "com_y": com_y, + "target_x": tx, "target_y": ty, } diff --git a/herding/perception/lidar_perception.py b/herding/perception/lidar_perception.py index 59d67f1..11d9f55 100644 --- a/herding/perception/lidar_perception.py +++ b/herding/perception/lidar_perception.py @@ -21,9 +21,13 @@ The downstream tracker handles association across frames. from __future__ import annotations import math +from typing import TYPE_CHECKING import numpy as np +if TYPE_CHECKING: + from herding.config import DetectionConfig, LidarConfig + from herding.world.geometry import ( FIELD_SHAPE, FIELD_ROUND_R, FIELD_X, FIELD_Y, GATE_X, GATE_Y, @@ -79,21 +83,22 @@ def _in_field_region(cx: float, cy: float) -> bool: FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2) -def _near_wall(cx: float, cy: float) -> bool: +def _near_wall(cx: float, cy: float, wall_reject: float = WALL_REJECT) -> bool: """True if the detection is too close to a wall to be a sheep.""" if FIELD_SHAPE == "field_round": r = math.hypot(cx, cy) - return r > FIELD_ROUND_R - WALL_REJECT + return r > FIELD_ROUND_R - wall_reject return ( - cx > FIELD_X[1] - WALL_REJECT or cx < FIELD_X[0] + WALL_REJECT or - cy > FIELD_Y[1] - WALL_REJECT or - (cy < FIELD_Y[0] + WALL_REJECT and not (PEN_X[0] <= cx <= PEN_X[1])) + cx > FIELD_X[1] - wall_reject or cx < FIELD_X[0] + wall_reject or + cy > FIELD_Y[1] - wall_reject or + (cy < FIELD_Y[0] + wall_reject and not (PEN_X[0] <= cx <= PEN_X[1])) ) def _split_cluster_by_range( points: list[tuple[float, float]], range_vals: list[float], + split_range_gap: float = SPLIT_RANGE_GAP, ) -> list[list[tuple[float, float]]]: """Split a cluster at range-profile local maxima (gaps between sheep). @@ -108,7 +113,7 @@ def _split_cluster_by_range( # Find the maximum range (the dip/gap between sheep). r_max = max(range_vals) # If the range variation is small, it's a single target. - if r_max - r_min < SPLIT_RANGE_GAP: + if r_max - r_min < split_range_gap: return [points] # Find the split point: the index with the maximum range. split_idx = range_vals.index(r_max) @@ -124,7 +129,7 @@ def _split_cluster_by_range( (right, range_vals[split_idx + 1:]), ]: if len(sub_pts) >= 1: - result.extend(_split_cluster_by_range(sub_pts, sub_ranges)) + result.extend(_split_cluster_by_range(sub_pts, sub_ranges, split_range_gap)) return result if result else [points] @@ -132,14 +137,43 @@ def detections_from_scan( ranges: np.ndarray, dog_x: float, dog_y: float, dog_heading: float, max_range: float = LIDAR_MAX_RANGE, + detection_cfg: "DetectionConfig | None" = None, + lidar_cfg: "LidarConfig | None" = None, ) -> list[tuple[float, float]]: - """Return list of (x, y) world-frame sheep position estimates.""" + """Return list of (x, y) world-frame sheep position estimates. + + Pass ``detection_cfg`` to override clustering/filtering thresholds, or + ``lidar_cfg`` to inform the function of a non-default FOV (the number of + rays and FOV are inferred from the length of ``ranges`` and + ``lidar_cfg.fov_rad`` respectively). + """ + # Resolve parameters — fall back to module-level constants when no cfg. + if detection_cfg is not None: + gap_thr = detection_cfg.gap_threshold + max_span = detection_cfg.max_cluster_span + hit_eps = detection_cfg.range_hit_eps + split_gap = detection_cfg.split_range_gap + wall_rej = detection_cfg.wall_reject + static_rej = detection_cfg.static_reject + else: + gap_thr = GAP_THRESHOLD + max_span = MAX_CLUSTER_SPAN + hit_eps = RANGE_HIT_EPS + split_gap = SPLIT_RANGE_GAP + wall_rej = WALL_REJECT + static_rej = STATIC_REJECT + + sheep_r = lidar_cfg.sheep_radius if lidar_cfg is not None else SHEEP_RADIUS + fov = lidar_cfg.fov_rad if lidar_cfg is not None else LIDAR_FOV + if lidar_cfg is not None: + max_range = lidar_cfg.max_range + ranges = np.asarray(ranges, dtype=np.float32) n_rays = ranges.shape[0] if n_rays == 0: return [] - angles = ray_angles(n_rays, LIDAR_FOV) - hit = ranges < max_range - RANGE_HIT_EPS + angles = ray_angles(n_rays, fov) + hit = ranges < max_range - hit_eps world_a = dog_heading + angles px = dog_x + ranges * np.cos(world_a) @@ -159,7 +193,7 @@ def detections_from_scan( prev_xy = None continue pt = (float(px[i]), float(py[i]), float(ranges[i])) - if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > GAP_THRESHOLD: + if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > gap_thr: clusters.append(current) current = [] current.append(pt) @@ -174,7 +208,7 @@ def detections_from_scan( # Multi-peak splitting. if len(cluster) >= 4: - sub_clusters = _split_cluster_by_range(points_xy, range_vals) + sub_clusters = _split_cluster_by_range(points_xy, range_vals, split_gap) else: sub_clusters = [points_xy] @@ -185,24 +219,24 @@ def detections_from_scan( ys = [p[1] for p in sub] cx, cy = sum(xs) / len(xs), sum(ys) / len(ys) span = math.hypot(max(xs) - min(xs), max(ys) - min(ys)) - if span > MAX_CLUSTER_SPAN: + if span > max_span: continue # Rays hit the front edge of the sheep; offset outward by - # SHEEP_RADIUS along the dog→cluster direction. + # sheep_radius along the dog→cluster direction. dx, dy = cx - dog_x, cy - dog_y d = math.hypot(dx, dy) if d > 1e-3: - cx += SHEEP_RADIUS * dx / d - cy += SHEEP_RADIUS * dy / d + cx += sheep_r * dx / d + cy += sheep_r * dy / d in_main = _in_field_region(cx, cy) in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and GATE_Y - 1.0 < cy < GATE_Y + 0.2) if not (in_main or in_gate_strip): continue - if any(math.hypot(cx - fx, cy - fy) < STATIC_REJECT + if any(math.hypot(cx - fx, cy - fy) < static_rej for fx, fy in _STATIC_FEATURES): continue - if _near_wall(cx, cy): + if _near_wall(cx, cy, wall_rej): continue detections.append((cx, cy)) return detections diff --git a/herding/perception/lidar_sim.py b/herding/perception/lidar_sim.py index 3c8b322..72a299a 100644 --- a/herding/perception/lidar_sim.py +++ b/herding/perception/lidar_sim.py @@ -2,20 +2,25 @@ Raycasts against sheep (discs) and static world geometry. For rectangular fields this is axis-aligned walls + gate posts; for round fields it is a -circular wall + gate posts. The env reproduces the false-positive cluster -distribution Webots produces from real 3D geometry. +circular wall + gate posts. -Returns a range array matching the Webots Lidar device: -180 rays, 140° FOV centred on forward, 12 m max range, 5 mm noise. -See ``protos/ShepherdDog.proto``. +The module-level constants (``LIDAR_N_RAYS``, ``LIDAR_FOV``, etc.) reflect +the original 360°/360-ray oracle configuration. Pass a +:class:`~herding.config.LidarConfig` to :func:`simulate_scan` to use a +different spec (e.g. :data:`~herding.config.LIDAR_WEBOTS` for 180-ray/140° +matching the ShepherdDog.proto hardware). """ from __future__ import annotations import math +from typing import TYPE_CHECKING import numpy as np +if TYPE_CHECKING: + from herding.config import LidarConfig + from herding.world.geometry import ( FIELD_SHAPE, FIELD_ROUND_R, FIELD_X, FIELD_Y, @@ -192,14 +197,30 @@ def simulate_scan( noise: float = LIDAR_NOISE, max_range: float = LIDAR_MAX_RANGE, rng: np.random.Generator | None = None, + lidar_cfg: "LidarConfig | None" = None, ) -> np.ndarray: """Return a (N,) float32 range array. No-hit entries equal ``max_range``. ``sheep_xy`` is every sheep (penned or active) in the scene. + + Pass ``lidar_cfg`` to override the module-level defaults for a single + call (e.g. to use :data:`~herding.config.LIDAR_WEBOTS`). """ - ch, sh = math.cos(dog_heading), math.sin(dog_heading) - cos_w = ch * _COS - sh * _SIN - sin_w = sh * _COS + ch * _SIN + if lidar_cfg is not None: + n_rays = lidar_cfg.n_rays + fov = lidar_cfg.fov_rad + max_range = lidar_cfg.max_range + noise = lidar_cfg.noise_std + sheep_r2 = lidar_cfg.sheep_radius ** 2 + angles = ray_angles(n_rays, fov) + ch, sh = math.cos(dog_heading), math.sin(dog_heading) + cos_w = ch * np.cos(angles) - sh * np.sin(angles) + sin_w = sh * np.cos(angles) + ch * np.sin(angles) + else: + sheep_r2 = SHEEP_RADIUS ** 2 + ch, sh = math.cos(dog_heading), math.sin(dog_heading) + cos_w = ch * _COS - sh * _SIN + sin_w = sh * _COS + ch * _SIN best = _raycast_static(dog_x, dog_y, cos_w, sin_w) @@ -209,9 +230,8 @@ def simulate_scan( t = np.outer(sx, cos_w) + np.outer(sy, sin_w) s_dist2 = (sx ** 2 + sy ** 2)[:, None] perp2 = s_dist2 - t ** 2 - R2 = SHEEP_RADIUS ** 2 - hit = (perp2 < R2) & (t > 0.0) - half = np.sqrt(np.clip(R2 - perp2, 0.0, None)) + hit = (perp2 < sheep_r2) & (t > 0.0) + half = np.sqrt(np.clip(sheep_r2 - perp2, 0.0, None)) candidate = np.where(hit, t - half, np.inf) nearest = candidate.min(axis=0) np.minimum(best, nearest, out=best) diff --git a/herding/perception/sheep_tracker.py b/herding/perception/sheep_tracker.py index 44ea090..a791a8f 100644 --- a/herding/perception/sheep_tracker.py +++ b/herding/perception/sheep_tracker.py @@ -22,6 +22,10 @@ plane south (``is_penned_position``). Penned tracks are excluded from from __future__ import annotations import math +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from herding.config import TrackerConfig from herding.world.geometry import MAX_SHEEP, in_pen, is_penned_position @@ -56,16 +60,21 @@ class Track: """Not-a-property in the hot loop — callers pass current step.""" raise NotImplementedError - def predicted_position(self, current_step: int) -> tuple[float, float]: + def predicted_position( + self, + current_step: int, + predict_steps: int = PREDICT_STEPS, + velocity_clamp: float = VELOCITY_CLAMP, + ) -> tuple[float, float]: """Extrapolated position using constant velocity, clamped.""" dt = current_step - self.last_seen - if dt <= 0 or dt > PREDICT_STEPS: + if dt <= 0 or dt > predict_steps: return self.x, self.y speed = math.hypot(self.vx, self.vy) if speed < 1e-4: return self.x, self.y # Clamp extrapolation distance. - max_d = VELOCITY_CLAMP * dt * 0.016 # steps → seconds + max_d = velocity_clamp * dt * 0.016 # steps → seconds d = min(speed * dt * 0.016, max_d) return ( self.x + d * (self.vx / speed), @@ -93,10 +102,36 @@ class SheepTracker: Each track is a :class:`Track` with position, velocity estimate, last-seen step, and penned flag. + + Pass a :class:`~herding.config.TrackerConfig` to override any + module-level defaults without changing this file. """ - def __init__(self, gate: float = GATE_M): - self.gate = gate + def __init__( + self, + gate: float = GATE_M, + tracker_cfg: "TrackerConfig | None" = None, + ): + if tracker_cfg is not None: + self.gate = tracker_cfg.gate_m + self._reacquire_gate = tracker_cfg.reacquire_gate_m + self._reacquire_min_age = tracker_cfg.reacquire_min_age + self._penned_gate = tracker_cfg.penned_gate_m + self._forget_steps = tracker_cfg.forget_steps + self._predict_steps = tracker_cfg.predict_steps + self._velocity_clamp = tracker_cfg.velocity_clamp + self._max_new_per_step = tracker_cfg.max_new_tracks_per_step + self._pen_latch_depth = tracker_cfg.pen_latch_depth + else: + self.gate = gate + self._reacquire_gate = REACQUIRE_GATE_M + self._reacquire_min_age = REACQUIRE_MIN_AGE + self._penned_gate = PENNED_GATE_M + self._forget_steps = FORGET_STEPS + self._predict_steps = PREDICT_STEPS + self._velocity_clamp = VELOCITY_CLAMP + self._max_new_per_step = MAX_ACTIVE_TRACKS + self._pen_latch_depth = 0.0 self._tracks: dict[int, Track] = {} self._next_id = 0 self.step = 0 @@ -119,8 +154,8 @@ class SheepTracker: active_tids.sort(key=lambda tid: self._tracks[tid].last_seen) for tid in active_tids: track = self._tracks[tid] - # Use predicted position for matching. - tx, ty = track.predicted_position(self.step) + tx, ty = track.predicted_position( + self.step, self._predict_steps, self._velocity_clamp) best_j, best_d = -1, self.gate for j, (dx, dy) in enumerate(detections): if j in det_used: @@ -140,10 +175,11 @@ class SheepTracker: if tid in updated_tids: continue track = self._tracks[tid] - if (self.step - track.last_seen) < REACQUIRE_MIN_AGE: + if (self.step - track.last_seen) < self._reacquire_min_age: continue - tx, ty = track.predicted_position(self.step) - best_j, best_d = -1, REACQUIRE_GATE_M + tx, ty = track.predicted_position( + self.step, self._predict_steps, self._velocity_clamp) + best_j, best_d = -1, self._reacquire_gate for j, (dx, dy) in enumerate(detections): if j in det_used: continue @@ -161,7 +197,7 @@ class SheepTracker: penned_tids = [tid for tid, t in self._tracks.items() if t.penned] for tid in penned_tids: track = self._tracks[tid] - best_j, best_d = -1, PENNED_GATE_M + best_j, best_d = -1, self._penned_gate for j, (dx, dy) in enumerate(detections): if j in det_used: continue @@ -174,25 +210,35 @@ class SheepTracker: track.update(dx, dy, self.step) det_used.add(best_j) - # Spawn new tracks for unmatched detections. + # Spawn new tracks for unmatched detections — rate-capped. + spawned = 0 for j, (dx, dy) in enumerate(detections): if j in det_used: continue - penned = in_pen(dx, dy) or is_penned_position(dx, dy) + if spawned >= self._max_new_per_step: + break + penned = self._is_penned(dx, dy) self._tracks[self._next_id] = Track(dx, dy, self.step, penned) self._next_id += 1 + spawned += 1 # Promote active tracks whose current estimate crosses the gate. for track in self._tracks.values(): if track.penned: continue - px, py = track.predicted_position(self.step) - if is_penned_position(px, py): + px, py = track.predicted_position( + self.step, self._predict_steps, self._velocity_clamp) + if self._is_penned(px, py): track.penned = True - # Forget stale active tracks; penned tracks live forever. + # Forget stale active tracks; penned tracks decay too but at a + # longer horizon (real penned sheep are still observed occasionally + # when the dog faces south; pure FPs at gate posts stop being + # detected once the dog drives away). + penned_forget = self._forget_steps * 8 stale = [tid for tid, t in self._tracks.items() - if not t.penned and (self.step - t.last_seen) > FORGET_STEPS] + if (not t.penned and (self.step - t.last_seen) > self._forget_steps) + or (t.penned and (self.step - t.last_seen) > penned_forget)] for tid in stale: del self._tracks[tid] @@ -206,18 +252,42 @@ class SheepTracker: return self.get_positions() - def get_positions(self) -> dict[str, tuple[float, float]]: + def _is_penned(self, x: float, y: float) -> bool: + """Check whether a position should be considered penned. + + Uses ``pen_latch_depth`` to require the position to be that many + metres past the gate line before latching. Increasing the depth + prevents gate-area LiDAR false positives (gate hardware reflections + at y ≈ -15) from being permanently latched as penned tracks. + """ + from herding.world.geometry import GATE_Y + # Apply depth threshold to both in_pen and is_penned_position so + # that any position in the gate column must clear GATE_Y - depth. + threshold = GATE_Y - self._pen_latch_depth + return (in_pen(x, y) or is_penned_position(x, y)) and y <= threshold + + def get_positions(self, min_freshness: int | None = None) -> dict[str, tuple[float, float]]: """Active (not-penned) tracks as a ``{name: (x, y)}`` dict. For tracks currently being predicted (occluded but within - PREDICT_STEPS), returns the extrapolated position so the teacher + predict_steps), returns the extrapolated position so the teacher sees a smooth estimate. + + ``min_freshness`` (optional, deploy-only): drop tracks whose + last_seen is older than ``step - min_freshness``. Real sheep in + FOV are detected nearly every step; phantom tracks from sporadic + Webots FPs stop being re-observed and decay. Default ``None`` + preserves training behaviour (extrapolated tracks visible). """ result = {} for tid, track in self._tracks.items(): if track.penned: continue - px, py = track.predicted_position(self.step) + if (min_freshness is not None + and self.step - track.last_seen > min_freshness): + continue + px, py = track.predicted_position( + self.step, self._predict_steps, self._velocity_clamp) result[f"t{tid}"] = (px, py) return result @@ -234,4 +304,4 @@ class SheepTracker: """Number of active tracks currently being extrapolated (not directly observed).""" return sum(1 for t in self._tracks.values() if not t.penned and (self.step - t.last_seen) > 0 - and (self.step - t.last_seen) <= PREDICT_STEPS) + and (self.step - t.last_seen) <= self._predict_steps) diff --git a/herding/world/diffdrive.py b/herding/world/diffdrive.py index b965746..a454d11 100644 --- a/herding/world/diffdrive.py +++ b/herding/world/diffdrive.py @@ -2,14 +2,22 @@ controllers. First-order rigid-body model — no slip, wheel-accel limits, or contact -forces. Webots' ODE physics handles those at inference; the env stays -close enough to first order that a policy trained here transfers. +forces by default. Pass ``slip_std`` and an ``rng`` to +:func:`kinematics_step` / :func:`mecanum_kinematics_step` to add +per-wheel Gaussian speed noise for domain randomisation. """ +from __future__ import annotations + import math +from typing import Optional + +import numpy as np -def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt): +def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt, + slip_std: float = 0.0, + rng: Optional[np.random.Generator] = None): """Integrate one step of differential-drive forward kinematics. Inputs @@ -19,9 +27,15 @@ def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt): w_left, w_right : wheel angular velocities (rad/s) wheel_radius, wheel_base : robot dimensions (m) dt : timestep (s) + slip_std : optional Gaussian std (rad/s) added to each wheel speed + rng : numpy Generator for slip noise; required when slip_std > 0 Returns (new_x, new_y, new_h). """ + if slip_std > 0.0 and rng is not None: + noise = rng.normal(0.0, slip_std, size=2) + w_left = w_left + noise[0] + w_right = w_right + noise[1] v = (w_right + w_left) * wheel_radius * 0.5 omega = (w_right - w_left) * wheel_radius / wheel_base new_x = x + v * math.cos(h) * dt @@ -67,7 +81,9 @@ def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega, # --------------------------------------------------------------------------- def mecanum_kinematics_step(x, y, h, w_fl, w_fr, w_rl, w_rr, - wheel_radius, lx, ly, dt): + wheel_radius, lx, ly, dt, + slip_std: float = 0.0, + rng: Optional[np.random.Generator] = None): """Integrate one step of mecanum forward kinematics. Parameters @@ -79,9 +95,15 @@ def mecanum_kinematics_step(x, y, h, w_fl, w_fr, w_rl, w_rr, lx : half the front-to-back axle distance (m) ly : half the left-to-right axle distance (m) dt : timestep (s) + slip_std : optional Gaussian std (rad/s) added to each wheel speed + rng : numpy Generator for slip noise; required when slip_std > 0 Returns (new_x, new_y, new_h). """ + if slip_std > 0.0 and rng is not None: + noise = rng.normal(0.0, slip_std, size=4) + w_fl, w_fr = w_fl + noise[0], w_fr + noise[1] + w_rl, w_rr = w_rl + noise[2], w_rr + noise[3] r = wheel_radius vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0 vy_body = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0 diff --git a/herding/world/geometry.py b/herding/world/geometry.py index 28d1c03..f21aedb 100644 --- a/herding/world/geometry.py +++ b/herding/world/geometry.py @@ -72,6 +72,36 @@ if FIELD_SHAPE == "field_round": GATE_Y = FIELD_ROUND_GATE_Y +def configure_from_args(argv: list[str] | None = None) -> str: + """Parse ``--world`` from *argv* (or ``sys.argv[1:]``), call :func:`configure`, + and set ``HERDING_WORLD`` in the environment. + + Returns the resolved world name (``"field"`` or ``"field_round"``). + + Call this at the very top of any script that accepts a ``--world`` flag, + *before* importing anything from ``herding.*`` that depends on field + geometry. This centralises the pre-parse logic that was previously + duplicated in ``bc/collect.py``, ``rl/train.py``, and ``eval.py``:: + + from herding.world.geometry import configure_from_args + configure_from_args() # reads sys.argv + """ + import os + import sys as _sys + args = argv if argv is not None else _sys.argv[1:] + world = "field" + for i, a in enumerate(args): + if a == "--world" and i + 1 < len(args): + world = args[i + 1] + break + if a.startswith("--world="): + world = a.split("=", 1)[1] + break + configure(world) + os.environ["HERDING_WORLD"] = world + return world + + def configure(shape: str) -> None: """Switch the active field geometry at runtime. diff --git a/protos/ShepherdDog.proto b/protos/ShepherdDog.proto index 7bea476..2a4a4e4 100644 --- a/protos/ShepherdDog.proto +++ b/protos/ShepherdDog.proto @@ -138,7 +138,8 @@ PROTO ShepherdDog [ } ] } - # Lidar — front-facing 140° FOV, mounted at snout tip + # Lidar — front-facing 140° FOV (canonical hardware spec). + # See ShepherdDog360.proto for the 360° robustness-ablation variant. Lidar { translation 0.05 0 0.01 name "lidar" diff --git a/protos/ShepherdDog360.proto b/protos/ShepherdDog360.proto new file mode 100644 index 0000000..7be6ade --- /dev/null +++ b/protos/ShepherdDog360.proto @@ -0,0 +1,691 @@ +#VRML_SIM R2025a utf8 +# Shepherd Dog Robot - wheeled base with dog character on top, tail-mounted 360 lidar + +EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/appearances/protos/TireRubber.proto" + +PROTO ShepherdDog360 [ + field SFVec3f translation 0 0 0 + field SFRotation rotation 0 1 0 0 + field SFString name "ShepherdDog" + field SFString controller "shepherd_dog" + field MFString controllerArgs [] + field SFString customData "" + field SFBool supervisor FALSE + field SFBool synchronization TRUE +] +{ + Robot { + translation IS translation + rotation IS rotation + name IS name + controller IS controller + controllerArgs IS controllerArgs + customData IS customData + supervisor IS supervisor + synchronization IS synchronization + children [ + # ========== CHASSIS / BASE ========== + DEF CHASSIS Transform { + translation 0 0 0.05 + children [ + Shape { + appearance DEF CHASSIS_APP PBRAppearance { + baseColor 0.2 0.2 0.2 + roughness 0.6 + metalness 0.3 + } + geometry Box { + size 0.32 0.16 0.06 + } + } + ] + } + # Front slope + DEF CHASSIS_FRONT Transform { + translation 0.14 0 0.07 + children [ + Shape { + appearance USE CHASSIS_APP + geometry Box { + size 0.06 0.14 0.04 + } + } + ] + } + # Rear slope + DEF CHASSIS_REAR Transform { + translation -0.14 0 0.07 + children [ + Shape { + appearance USE CHASSIS_APP + geometry Box { + size 0.06 0.14 0.04 + } + } + ] + } + + # ========== DOG BODY on top of chassis ========== + DEF BODY Transform { + translation 0 0 0.11 + children [ + Shape { + appearance DEF FUR_BROWN PBRAppearance { + baseColor 0.55 0.35 0.17 + roughness 0.85 + metalness 0.0 + } + geometry Box { + size 0.30 0.16 0.08 + } + } + ] + } + + # ========== CHEST ========== + DEF CHEST Transform { + translation 0.12 0 0.11 + children [ + Shape { + appearance DEF FUR_CREAM PBRAppearance { + baseColor 0.85 0.72 0.55 + roughness 0.85 + metalness 0.0 + } + geometry Box { + size 0.08 0.18 0.08 + } + } + ] + } + + # ========== HEAD ========== + DEF HEAD Transform { + translation 0.20 0 0.17 + children [ + Shape { + appearance USE FUR_BROWN + geometry Box { + size 0.10 0.12 0.09 + } + } + ] + } + + # ========== SNOUT + LIDAR ========== + DEF SNOUT Transform { + translation 0.28 0 0.155 + children [ + Shape { + appearance USE FUR_CREAM + geometry Box { + size 0.08 0.07 0.05 + } + } + # Nose + Transform { + translation 0.04 0 0.01 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.1 0.1 0.1 + roughness 0.4 + } + geometry Sphere { + radius 0.013 + subdivision 2 + } + } + ] + } + # Lidar — 360° FOV (was 140°/2.44 rad). Wider FOV closes the + # FOV-loss perception gap so policies trained on 360° gym sim + # transfer cleanly without retraining. + Lidar { + translation 0.05 0 0.01 + name "lidar" + horizontalResolution 360 + fieldOfView 6.28 + numberOfLayers 1 + minRange 0.10 + maxRange 15.0 + noise 0.005 + } + ] + } + + # ========== LEFT EAR ========== + DEF LEFT_EAR HingeJoint { + jointParameters HingeJointParameters { + axis 0 0 1 + anchor 0.19 0.055 0.21 + } + device [ + RotationalMotor { + name "left ear motor" + maxVelocity 10.0 + minPosition -0.5 + maxPosition 0.5 + } + ] + endPoint Solid { + translation 0.19 0.055 0.21 + rotation 0 0 1 0.2 + name "left ear" + children [ + Shape { + appearance DEF FUR_DARK PBRAppearance { + baseColor 0.35 0.20 0.10 + roughness 0.85 + metalness 0.0 + } + geometry Box { + size 0.035 0.025 0.06 + } + } + ] + boundingObject Box { + size 0.035 0.025 0.06 + } + physics Physics { + density -1 + mass 0.005 + } + } + } + + # ========== RIGHT EAR ========== + DEF RIGHT_EAR HingeJoint { + jointParameters HingeJointParameters { + axis 0 0 1 + anchor 0.19 -0.055 0.21 + } + device [ + RotationalMotor { + name "right ear motor" + maxVelocity 10.0 + minPosition -0.5 + maxPosition 0.5 + } + ] + endPoint Solid { + translation 0.19 -0.055 0.21 + rotation 0 0 -1 0.2 + name "right ear" + children [ + Shape { + appearance USE FUR_DARK + geometry Box { + size 0.035 0.025 0.06 + } + } + ] + boundingObject Box { + size 0.035 0.025 0.06 + } + physics Physics { + density -1 + mass 0.005 + } + } + } + + # ========== EYES ========== + DEF LEFT_EYE Transform { + translation 0.25 0.05 0.19 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.95 0.95 0.95 + roughness 0.3 + } + geometry Sphere { + radius 0.016 + subdivision 2 + } + } + # Pupil + Transform { + translation 0.012 0 0.004 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.1 0.1 0.1 + roughness 0.2 + } + geometry Sphere { + radius 0.009 + subdivision 2 + } + } + ] + } + ] + } + DEF RIGHT_EYE Transform { + translation 0.25 -0.05 0.19 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.95 0.95 0.95 + roughness 0.3 + } + geometry Sphere { + radius 0.016 + subdivision 2 + } + } + # Pupil + Transform { + translation 0.012 0 0.004 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.1 0.1 0.1 + roughness 0.2 + } + geometry Sphere { + radius 0.009 + subdivision 2 + } + } + ] + } + ] + } + + # ========== COLLAR ========== + DEF COLLAR Transform { + translation 0.16 0 0.125 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.8 0.1 0.1 + roughness 0.5 + } + geometry Cylinder { + height 0.02 + radius 0.095 + subdivision 16 + } + } + # ID tag + Transform { + translation 0 0.10 0 + rotation 1 0 0 1.5708 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.75 0.75 0.0 + metalness 0.8 + roughness 0.2 + } + geometry Cylinder { + height 0.003 + radius 0.018 + subdivision 8 + } + } + ] + } + ] + } + + # ========== TAIL (lidar inside tail tip ball) ========== + DEF TAIL HingeJoint { + jointParameters HingeJointParameters { + axis 0 1 0 + anchor -0.15 0 0.11 + } + device [ + RotationalMotor { + name "tail motor" + maxVelocity 5.0 + minPosition -1.0 + maxPosition 1.0 + } + ] + endPoint Solid { + translation -0.17 0 0.13 + name "tail solid" + children [ + Shape { + appearance USE FUR_BROWN + geometry Capsule { + height 0.12 + radius 0.013 + top FALSE + } + } + # Tail tip ball + Transform { + translation 0 0 0.08 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.2 0.2 0.2 + roughness 0.3 + metalness 0.6 + } + geometry Sphere { + radius 0.028 + subdivision 4 + } + } + ] + } + ] + boundingObject Group { + children [ + Capsule { + height 0.12 + radius 0.013 + } + Transform { + translation 0 0 0.08 + children [ + Sphere { + radius 0.028 + } + ] + } + ] + } + physics Physics { + density -1 + mass 0.08 + } + } + } + + # ========== RIGHT AXLE ARM (horizontal bar from chassis to wheel) ========== + DEF RIGHT_AXLE Transform { + translation 0 -0.115 0.038 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.5 0.5 0.5 + roughness 0.3 + metalness 0.8 + } + geometry Box { + size 0.02 0.08 0.02 + } + } + ] + } + + # ========== LEFT AXLE ARM ========== + DEF LEFT_AXLE Transform { + translation 0 0.115 0.038 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.5 0.5 0.5 + roughness 0.3 + metalness 0.8 + } + geometry Box { + size 0.02 0.08 0.02 + } + } + ] + } + + # ========== RIGHT WHEEL ========== + DEF RIGHT_WHEEL_JOINT HingeJoint { + jointParameters HingeJointParameters { + axis 0 1 0 + anchor 0 -0.14 0.038 + } + device [ + RotationalMotor { + name "right wheel motor" + maxVelocity 70.0 + maxTorque 20.0 + } + PositionSensor { + name "right wheel sensor" + resolution 0.00628 + } + ] + endPoint Solid { + translation 0 -0.14 0.038 + rotation 0 -1 0 1.570796 + children [ + DEF WHEEL_VIS Pose { + rotation 1 0 0 -1.5708 + children [ + Shape { + appearance PBRAppearance { + baseColor 0.15 0.15 0.15 + roughness 0.4 + metalness 0.5 + } + geometry Cylinder { + height 0.016 + radius 0.035 + subdivision 24 + } + } + Shape { + appearance PBRAppearance { + baseColor 0.6 0.6 0.6 + roughness 0.3 + metalness 0.7 + } + geometry Cylinder { + height 0.018 + radius 0.014 + subdivision 12 + } + } + Shape { + appearance TireRubber { + textureTransform TextureTransform { + scale 1.5 0.6 + } + type "bike" + } + geometry Cylinder { + height 0.022 + radius 0.038 + subdivision 24 + } + } + ] + } + ] + name "right wheel" + boundingObject Pose { + rotation 1 0 0 -1.5708 + children [ + Cylinder { + height 0.022 + radius 0.038 + } + ] + } + physics Physics { + density -1 + mass 0.06 + centerOfMass [ + 0 0 0 + ] + } + } + } + + # ========== LEFT WHEEL ========== + DEF LEFT_WHEEL_JOINT HingeJoint { + jointParameters HingeJointParameters { + axis 0 1 0 + anchor 0 0.14 0.038 + } + device [ + RotationalMotor { + name "left wheel motor" + maxVelocity 70.0 + maxTorque 20.0 + } + PositionSensor { + name "left wheel sensor" + resolution 0.00628 + } + ] + endPoint Solid { + translation 0 0.14 0.038 + rotation 0.707105 0 0.707109 -3.14159 + children [ + USE WHEEL_VIS + ] + name "left wheel" + boundingObject Pose { + rotation 1 0 0 -1.5708 + children [ + Cylinder { + height 0.022 + radius 0.038 + } + ] + } + physics Physics { + density -1 + mass 0.12 + centerOfMass [ + 0 0 0 + ] + } + } + } + + # ========== FRONT CASTER ========== + DEF FRONT_CASTER BallJoint { + jointParameters BallJointParameters { + anchor 0.14 0 0.02 + } + endPoint Solid { + translation 0.14 0 0.02 + name "front caster" + children [ + Shape { + appearance PBRAppearance { + baseColor 0.2 0.2 0.2 + roughness 0.3 + metalness 0.5 + } + geometry Sphere { + radius 0.02 + subdivision 2 + } + } + ] + boundingObject Sphere { + radius 0.02 + } + physics Physics { + density -1 + mass 0.03 + } + } + } + + # ========== REAR CASTER ========== + DEF REAR_CASTER BallJoint { + jointParameters BallJointParameters { + anchor -0.14 0 0.02 + } + endPoint Solid { + translation -0.14 0 0.02 + name "rear caster" + children [ + Shape { + appearance PBRAppearance { + baseColor 0.2 0.2 0.2 + roughness 0.3 + metalness 0.5 + } + geometry Sphere { + radius 0.02 + subdivision 2 + } + } + ] + boundingObject Sphere { + radius 0.02 + } + physics Physics { + density -1 + mass 0.03 + } + } + } + + # ========== IMU SENSORS ========== + Accelerometer { + translation 0 0 0.10 + name "accelerometer" + } + Gyro { + translation 0 0 0.10 + name "gyro" + } + Compass { + translation 0 0 0.10 + name "compass" + } + + # ========== GPS ========== + GPS { + translation 0 0 0.17 + name "gps" + } + + # ========== RECEIVER ========== + Receiver { + name "receiver" + channel 1 + } + + # ========== EMITTER ========== + Emitter { + name "emitter" + channel 1 + range 50.0 + } + ] + + # ========== BOUNDING OBJECT ========== + boundingObject Group { + children [ + # Chassis box + Transform { + translation 0 0 0.05 + children [ + Box { + size 0.32 0.16 0.06 + } + ] + } + # Body box + Transform { + translation 0 0 0.11 + children [ + Box { + size 0.30 0.16 0.08 + } + ] + } + ] + } + + # ========== PHYSICS ========== + physics Physics { + density -1 + mass 5.0 + centerOfMass [ + 0 0 0.03 + ] + } + } +} diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..a4bbac8 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,240 @@ +"""Tests for herding/config.py — dataclass construction, defaults, overrides.""" + +import math +import pytest + +from herding.config import ( + DetectionConfig, + DomainRandomConfig, + HerdingConfig, + HERDING_DEFAULT, + HERDING_WEBOTS, + LidarConfig, + LIDAR_FULL, + LIDAR_WEBOTS, + RobotConfig, + TrackerConfig, +) + + +# --------------------------------------------------------------------------- +# LidarConfig +# --------------------------------------------------------------------------- + +class TestLidarConfig: + def test_defaults_match_full_circle_preset(self): + assert LidarConfig() == LIDAR_FULL + + def test_webots_preset(self): + assert LIDAR_WEBOTS.n_rays == 180 + assert abs(LIDAR_WEBOTS.fov_rad - math.radians(140.0)) < 1e-9 + + def test_frozen(self): + cfg = LidarConfig() + with pytest.raises((AttributeError, TypeError)): + cfg.n_rays = 42 # type: ignore[misc] + + def test_invalid_n_rays(self): + with pytest.raises(ValueError): + LidarConfig(n_rays=0) + + def test_invalid_fov(self): + with pytest.raises(ValueError): + LidarConfig(fov_rad=0.0) + with pytest.raises(ValueError): + LidarConfig(fov_rad=math.pi * 3) + + def test_invalid_max_range(self): + with pytest.raises(ValueError): + LidarConfig(max_range=-1.0) + + +# --------------------------------------------------------------------------- +# TrackerConfig +# --------------------------------------------------------------------------- + +class TestTrackerConfig: + def test_defaults(self): + cfg = TrackerConfig() + assert cfg.forget_steps == 200 + assert cfg.max_new_tracks_per_step == 10 + + def test_webots_preset_tighter(self): + cfg = HERDING_WEBOTS.tracker + assert cfg.forget_steps == 120 + assert cfg.max_new_tracks_per_step == 1 + assert cfg.pen_latch_depth == 2.0 + + def test_invalid_forget_steps(self): + with pytest.raises(ValueError): + TrackerConfig(forget_steps=0) + + def test_invalid_max_new_tracks(self): + with pytest.raises(ValueError): + TrackerConfig(max_new_tracks_per_step=0) + + +# --------------------------------------------------------------------------- +# DetectionConfig +# --------------------------------------------------------------------------- + +class TestDetectionConfig: + def test_defaults(self): + cfg = DetectionConfig() + assert cfg.wall_reject == 0.5 + + def test_webots_preset_wall_reject(self): + # wall_reject stays at 0.5 m — 1.0 m was too aggressive near the south gate + cfg = HERDING_WEBOTS.detection + assert cfg.wall_reject == 0.5 + + def test_invalid_wall_reject(self): + with pytest.raises(ValueError): + DetectionConfig(wall_reject=-0.1) + + +# --------------------------------------------------------------------------- +# RobotConfig +# --------------------------------------------------------------------------- + +class TestRobotConfig: + def test_max_linear_derived(self): + cfg = RobotConfig() + assert abs(cfg.max_linear - cfg.wheel_radius * cfg.max_wheel_omega) < 1e-9 + + def test_default_action_smooth_zero(self): + assert RobotConfig().action_smooth == 0.0 + + def test_webots_action_smooth(self): + assert HERDING_WEBOTS.robot.action_smooth == 0.55 + + def test_invalid_action_smooth(self): + with pytest.raises(ValueError): + RobotConfig(action_smooth=1.0) + with pytest.raises(ValueError): + RobotConfig(action_smooth=-0.1) + + +# --------------------------------------------------------------------------- +# DomainRandomConfig +# --------------------------------------------------------------------------- + +class TestDomainRandomConfig: + def test_all_zeros_by_default(self): + cfg = DomainRandomConfig() + assert cfg.fp_rate == 0.0 + assert cfg.wheel_slip_std == 0.0 + assert cfg.compass_noise_std == 0.0 + + def test_invalid_fp_rate(self): + with pytest.raises(ValueError): + DomainRandomConfig(fp_rate=-1.0) + + def test_invalid_slip_std(self): + with pytest.raises(ValueError): + DomainRandomConfig(wheel_slip_std=-0.01) + + +# --------------------------------------------------------------------------- +# HerdingConfig +# --------------------------------------------------------------------------- + +class TestHerdingConfig: + def test_default_is_herding_default(self): + assert HerdingConfig() == HERDING_DEFAULT + + def test_replace_sub_config(self): + new_cfg = HERDING_WEBOTS.replace( + domain_random=DomainRandomConfig(fp_rate=2.0) + ) + assert new_cfg.domain_random.fp_rate == 2.0 + # Other sub-configs unchanged + assert new_cfg.tracker == HERDING_WEBOTS.tracker + assert new_cfg.lidar == HERDING_WEBOTS.lidar + + def test_herding_default_matches_original_module_constants(self): + """Verify the default config reproduces the original hardcoded values.""" + from herding.perception.lidar_sim import ( + LIDAR_N_RAYS, LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_NOISE, + SHEEP_RADIUS, POST_RADIUS, + ) + from herding.perception.lidar_perception import ( + GAP_THRESHOLD, MAX_CLUSTER_SPAN, RANGE_HIT_EPS, + SPLIT_RANGE_GAP, WALL_REJECT, STATIC_REJECT, + ) + from herding.perception.sheep_tracker import ( + GATE_M, REACQUIRE_GATE_M, REACQUIRE_MIN_AGE, PENNED_GATE_M, + FORGET_STEPS, PREDICT_STEPS, VELOCITY_CLAMP, + ) + cfg = HERDING_DEFAULT + assert cfg.lidar.n_rays == LIDAR_N_RAYS + assert cfg.lidar.fov_rad == LIDAR_FOV + assert cfg.lidar.max_range == LIDAR_MAX_RANGE + assert cfg.lidar.noise_std == LIDAR_NOISE + assert cfg.lidar.sheep_radius == SHEEP_RADIUS + assert cfg.lidar.post_radius == POST_RADIUS + assert cfg.detection.gap_threshold == GAP_THRESHOLD + assert cfg.detection.max_cluster_span == MAX_CLUSTER_SPAN + assert cfg.detection.range_hit_eps == RANGE_HIT_EPS + assert cfg.detection.split_range_gap == SPLIT_RANGE_GAP + assert cfg.detection.wall_reject == WALL_REJECT + assert cfg.detection.static_reject == STATIC_REJECT + assert cfg.tracker.gate_m == GATE_M + assert cfg.tracker.reacquire_gate_m == REACQUIRE_GATE_M + assert cfg.tracker.reacquire_min_age == REACQUIRE_MIN_AGE + assert cfg.tracker.penned_gate_m == PENNED_GATE_M + assert cfg.tracker.forget_steps == FORGET_STEPS + assert cfg.tracker.predict_steps == PREDICT_STEPS + assert cfg.tracker.velocity_clamp == VELOCITY_CLAMP + + +# --------------------------------------------------------------------------- +# Integration: HerdingEnv honours the config +# --------------------------------------------------------------------------- + +class TestHerdingEnvConfig: + def test_default_env_unchanged(self): + """HerdingEnv() still works with no config — zero behaviour change.""" + from training.herding_env import HerdingEnv + env = HerdingEnv(n_sheep=1, max_steps=5, difficulty=1.0, seed=0) + obs, info = env.reset() + assert obs.shape == (32,) + obs2, *_ = env.step(env.action_space.sample()) + assert obs2.shape == (32,) + + def test_webots_config_propagates_action_smooth(self): + from training.herding_env import HerdingEnv + env = HerdingEnv(herding_cfg=HERDING_WEBOTS) + assert env.ACTION_SMOOTH == 0.55 + + def test_webots_config_runs(self): + from training.herding_env import HerdingEnv + env = HerdingEnv( + n_sheep=2, max_steps=10, difficulty=1.0, seed=42, + herding_cfg=HERDING_WEBOTS, + ) + obs, _ = env.reset() + for _ in range(5): + obs, _, terminated, truncated, _ = env.step(env.action_space.sample()) + assert obs.shape == (32,) + + def test_domain_random_fp_runs(self): + from training.herding_env import HerdingEnv + cfg = HERDING_WEBOTS.replace( + domain_random=DomainRandomConfig(fp_rate=3.0, fp_std_pos=0.2) + ) + env = HerdingEnv(n_sheep=2, max_steps=10, difficulty=1.0, seed=7, herding_cfg=cfg) + env.reset() + for _ in range(5): + env.step(env.action_space.sample()) + + def test_domain_random_slip_runs(self): + from training.herding_env import HerdingEnv + cfg = HERDING_WEBOTS.replace( + domain_random=DomainRandomConfig(wheel_slip_std=0.05, compass_noise_std=0.02) + ) + env = HerdingEnv(n_sheep=1, max_steps=10, difficulty=1.0, seed=3, + drive_mode="mecanum", herding_cfg=cfg) + env.reset() + for _ in range(5): + env.step(env.action_space.sample()) diff --git a/tests/test_control.py b/tests/test_control.py index 6052a76..429dfc1 100644 --- a/tests/test_control.py +++ b/tests/test_control.py @@ -106,11 +106,34 @@ def test_sequential_empty_input_idle(): def test_sequential_targets_closest_to_pen(): + # With 2 sheep (≤ STRAGGLER_THRESHOLD), sequential goes straight to + # "targeted" phase and pushes the sheep nearest to the pen. near = (10.0, -5.0) # closer to pen entry (11.5, -15) far = (-10.0, 10.0) sheep = {"near": near, "far": far} + vx, vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY) + assert mode == "targeted" + # Dog should be directed toward near sheep (south-east), not far (north-west). + assert vx > 0 and vy < 0 + + +def test_sequential_collects_when_scattered(): + # With >STRAGGLER_THRESHOLD sheep and radius > F_FACTOR*sqrt(n): + # should use collect (Strombom) not targeted. + sheep = {f"s{i}": pos for i, pos in enumerate([ + (12.0, 10.0), (-12.0, 10.0), (0.0, 12.0), + (12.0, -12.0), (-10.0, -8.0), + ])} _vx, _vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY) - assert mode.startswith("drive:near") + assert mode in ("collect", "drive") + + +def test_sequential_drives_when_compact(): + # Compact flock of 5 sheep near centre — should drive, not collect. + sheep = {f"s{i}": (float(i) * 0.3, float(i) * 0.3) + for i in range(5)} + _vx, _vy, mode = sequential_action((0.0, 5.0), sheep, PEN_ENTRY) + assert mode == "drive" # --------------------------------------------------------------------------- diff --git a/tools/calibrate_mecanum.sh b/tools/calibrate_mecanum.sh new file mode 100755 index 0000000..a4d5fb7 --- /dev/null +++ b/tools/calibrate_mecanum.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# Measure the actual velocity response of the Webots mecanum robot and +# compare against the gym's first-order kinematics prediction. +# +# Uses HERDING_MODE=calibrate in the shepherd_dog controller, which applies +# a known fixed action for N steps, records GPS displacement, and computes +# the relative error vs gym prediction. +# +# Usage: +# bash tools/calibrate_mecanum.sh [N_STEPS] +# N_STEPS : steps to hold each action (default 150, ≈ 2.4 s real-time) +# +# Output: +# calibrate_mecanum.log — per-axis results printed and written here +# +# Target: < 10% relative error on each axis. +# If errors are high, tune coulombFriction / forceDependentSlip in +# tools/run_webots.sh (mecanum contactProperties block). + +set -euo pipefail +N_STEPS="${1:-150}" +ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" +LOG="$ROOT/calibrate_mecanum.log" +export PATH="/home/jalf/miniconda3/envs/tir/bin:$PATH" + +echo "Running mecanum calibration (N_STEPS=$N_STEPS)..." +echo "Results will be written to: $LOG" +truncate -s 0 "$LOG" + +run_calib() { + local vx="$1" vy="$2" om="$3" + echo " Testing vx=$vx vy=$vy om=$om ..." + rm -f "$ROOT/training/.run_done" + timeout --kill-after=15s 60 \ + xvfb-run -a \ + env WEBOTS_HEADLESS=1 WEBOTS_EXTRA_ARGS="--stdout --stderr" \ + HERDING_MODE=calibrate HERDING_DRIVE=mecanum HERDING_WORLD=field \ + CALIB_VX="$vx" CALIB_VY="$vy" CALIB_OM="$om" \ + CALIB_N_STEPS="$N_STEPS" \ + bash "$ROOT/tools/run_webots.sh" 0 calibrate mecanum field \ + 2>&1 | grep -E "cmd=|gym|webots|error" || true + pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true + sleep 1 +} + +# Three test vectors: pure-x, pure-y, diagonal +run_calib 0.5 0.0 0.0 +run_calib 0.0 0.5 0.0 +run_calib 0.35 0.35 0.0 + +echo "" +echo "=== Calibration results ===" +cat "$LOG" 2>/dev/null || echo "(no results written — check controller output above)" +echo "" +echo "Target: <10% error on each axis." +echo "If errors are high, tune coulombFriction / forceDependentSlip in" +echo "tools/run_webots.sh (mecanum contactProperties block)." diff --git a/tools/dagger_round.sh b/tools/dagger_round.sh new file mode 100755 index 0000000..7ddd9ed --- /dev/null +++ b/tools/dagger_round.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Run one DAgger round on a (drive, world) combo. +# +# Usage: tools/dagger_round.sh [seeds_per_n] [round_idx] +# +# Collects DAgger demos using the current BC policy as the actor and the +# universal teacher as the labeller, in the HERDING_WEBOTS preset env +# (140° FOV, tight tracker — matches deployment). Concatenates with the +# original BC demos, re-trains BC, and saves to runs/bc_dagger_/. + +set -euo pipefail +ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" +cd "$ROOT" + +DRIVE="${1:-differential}" +WORLD="${2:-field}" +SEEDS="${3:-15}" +ROUND="${4:-1}" + +TAG="${DRIVE}_${WORLD}" +ORIG_DEMOS="training/bc/demos_${TAG}.npz" +DAGGER_DEMOS="training/bc/dagger${ROUND}_${TAG}.npz" +COMBINED_DEMOS="training/bc/combined${ROUND}_${TAG}.npz" +BC_DIR="training/runs/bc_${TAG}" +OUT_DIR="training/runs/bc_dagger${ROUND}_${TAG}" + +case "$WORLD" in + field_round) + EPOCHS=150 + ;; + *) + EPOCHS=60 + ;; +esac + +echo "=== DAgger round ${ROUND}: ${DRIVE}/${WORLD} ===" +echo " Actor policy: ${BC_DIR}/policy.zip" +echo " Output: ${OUT_DIR}/policy.zip" + +# 1. Collect DAgger demos: BC drives, teacher labels (privileged + HERDING_WEBOTS). +python -m training.bc.collect \ + --teacher universal --out "$DAGGER_DEMOS" \ + --seeds-per-n "$SEEDS" --subsample 3 \ + --frame-stack 4 --drive-mode "$DRIVE" --world "$WORLD" \ + --max-steps 30000 \ + --privileged --use-webots-preset \ + --fp-rate 0.0 --action-smooth 0.55 --wheel-slip-std 0.05 \ + --dagger-policy "$BC_DIR" + +# 2. Concatenate original demos + dagger demos. +python - < 10 )); then - echo "N must be 1..10, got $N" >&2; exit 1 +if (( N < 0 || N > 10 )); then + echo "N must be 0..10, got $N" >&2; exit 1 fi case "$MODE" in - bc|rl|strombom|sequential|universal) ;; - *) echo "MODE must be bc|rl|strombom|sequential|universal, got '$MODE'" >&2; exit 1 ;; + bc|rl|strombom|sequential|universal|calibrate) ;; + *) echo "MODE must be bc|rl|strombom|sequential|universal|calibrate, got '$MODE'" >&2; exit 1 ;; esac case "$DRIVE" in differential|mecanum) ;; @@ -83,29 +83,31 @@ cp "$SRC" "$DST" if [[ "$DRIVE" == "mecanum" ]]; then sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST" sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST" - # Inject mecanum contact properties after the existing contactProperties block. + # Inject mecanum contact properties into the contactProperties array. + # Strategy: find the closing ' ]' that ends the contactProperties block + # (it sits at 2-space indent, immediately before the WorldInfo closing brace) + # and insert just before it. python3 -c " -import re, sys -with open(sys.argv[1], 'r') as f: +with open('$DST', 'r') as f: txt = f.read() -# Find the closing ']' of contactProperties and insert before it. -mec = ''' - ContactProperties { +mec = ''' ContactProperties { material1 \"MecanumWheel\" coulombFriction [ - 2 + 1.0 ] bounce 0 forceDependentSlip [ - 10 + 0.01 ] softCFM 0.0001 - }''' -# Insert before the first ']' that closes contactProperties [...] -txt = re.sub(r'(contactProperties\s*\[[^\]]*)(\])', r'\1' + mec + r'\2', txt, count=1) -with open(sys.argv[1], 'w') as f: + } +''' +# The contactProperties array closes with ' ]\n}' (2-space indent ] then WorldInfo }). +# Insert the new block just before that closing ]. +txt = txt.replace('\n ]\n}', '\n' + mec + ' ]\n}', 1) +with open('$DST', 'w') as f: f.write(txt) -" "$DST" +" fi # Comment out sheep N+1..10 by prefixing the matching Sheep { ... } line. @@ -129,6 +131,7 @@ HERDING_MODE=$MODE HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR HERDING_DRIVE=$DRIVE HERDING_WORLD=$WORLD +HERDING_USE_GT=${HERDING_USE_GT:-0} EOF export HERDING_MODE="$MODE" diff --git a/tools/webots_sweep.sh b/tools/webots_sweep.sh new file mode 100755 index 0000000..e131dfe --- /dev/null +++ b/tools/webots_sweep.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# Headless Webots sweep across modes, drives, worlds, and flock sizes. +# Runs sequentially; each trial gets a hard 150s wall-clock timeout. +# Results are written to webots_sweep.log (tab-separated) and printed. +# +# Usage: bash tools/webots_sweep.sh [output_log] + +set -euo pipefail +ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" +OUT="${1:-$ROOT/webots_sweep.log}" +TIMEOUT_S=120 # ~80k steps in fast headless mode — covers slow-converging physics + +# Webots uses its own python3; put the conda env first so all deps resolve. +export PATH="/home/jalf/miniconda3/envs/tir/bin:$PATH" + +# Columns: mode drive world n_sheep success steps +printf "%-12s %-14s %-12s %7s %7s %s\n" \ + "mode" "drive" "world" "n_sheep" "success" "steps" | tee "$OUT" +printf '%s\n' "$(printf '%-12s %-14s %-12s %7s %7s %s' \ + '----' '-----' '-----' '-------' '-------' '-----')" | tee -a "$OUT" + +run_trial() { + local mode="$1" drive="$2" world="$3" n="$4" policy_dir="${5:-}" + + local done_file="$ROOT/training/.run_done" + rm -f "$done_file" + + local extra_env=() + extra_env+=(WEBOTS_HEADLESS=1) + extra_env+=(WEBOTS_EXTRA_ARGS="--stdout --stderr") + extra_env+=(HERDING_USE_GT=0) + if [[ -n "$policy_dir" ]]; then + extra_env+=(HERDING_POLICY_DIR="$ROOT/$policy_dir") + fi + + local raw + raw=$( + timeout --kill-after=15s "$TIMEOUT_S" \ + xvfb-run -a \ + env "${extra_env[@]}" \ + bash "$ROOT/tools/run_webots.sh" "$n" "$mode" "$drive" "$world" \ + 2>&1 + ) || true + # Webots-bin and Xvfb can survive the timeout; kill any orphans now. + pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true + sleep 1 + + local success="FAIL" + local steps="timeout" + + if echo "$raw" | grep -q "\[dog\] all .* sheep penned at step"; then + success="OK" + steps=$(echo "$raw" | grep "\[dog\] all .* sheep penned at step" \ + | grep -oP 'step \K[0-9]+') + fi + + printf "%-12s %-14s %-12s %7s %7s %s\n" \ + "$mode" "$drive" "$world" "$n" "$success" "$steps" | tee -a "$OUT" +} + +# --------------------------------------------------------------------------- +# Analytic baselines (differential only — that's the story context) +# strombom / sequential: canonical baselines +# universal: the actual teacher used to collect BC demos +# --------------------------------------------------------------------------- +for mode in strombom sequential universal; do + for world in field field_round; do + for n in 5 10; do + run_trial "$mode" differential "$world" "$n" + done + done +done + +# --------------------------------------------------------------------------- +# BC — world-specific policies +# --------------------------------------------------------------------------- +for drive in differential mecanum; do + for world in field field_round; do + for n in 5 10; do + run_trial bc "$drive" "$world" "$n" \ + "training/runs/bc_${drive}_${world}" + done + done +done + +# --------------------------------------------------------------------------- +# RL_FAST — MODE=rl with explicit HERDING_POLICY_DIR pointing to rl_fast dirs +# (run_webots.sh rejects "rl_fast" as a mode; "rl" + policy override is correct) +# --------------------------------------------------------------------------- +for drive in differential mecanum; do + for world in field field_round; do + for n in 5 10; do + run_trial rl "$drive" "$world" "$n" \ + "training/runs/rl_fast_${drive}_${world}" + done + done +done + +echo "" +echo "Sweep complete. Results saved to: $OUT" diff --git a/tools/webots_sweep_gt.sh b/tools/webots_sweep_gt.sh new file mode 100755 index 0000000..9f7449d --- /dev/null +++ b/tools/webots_sweep_gt.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# Headless Webots sweep across modes, drives, worlds, and flock sizes. +# Runs sequentially; each trial gets a hard 150s wall-clock timeout. +# Results are written to webots_sweep.log (tab-separated) and printed. +# +# Usage: bash tools/webots_sweep.sh [output_log] + +set -euo pipefail +ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" +OUT="${1:-$ROOT/webots_sweep.log}" +TIMEOUT_S=120 # ~80k steps in fast headless mode — covers slow-converging physics + +# Webots uses its own python3; put the conda env first so all deps resolve. +export PATH="/home/jalf/miniconda3/envs/tir/bin:$PATH" + +# Columns: mode drive world n_sheep success steps +printf "%-12s %-14s %-12s %7s %7s %s\n" \ + "mode" "drive" "world" "n_sheep" "success" "steps" | tee "$OUT" +printf '%s\n' "$(printf '%-12s %-14s %-12s %7s %7s %s' \ + '----' '-----' '-----' '-------' '-------' '-----')" | tee -a "$OUT" + +run_trial() { + local mode="$1" drive="$2" world="$3" n="$4" policy_dir="${5:-}" + + local done_file="$ROOT/training/.run_done" + rm -f "$done_file" + + local extra_env=() + extra_env+=(WEBOTS_HEADLESS=1) + extra_env+=(WEBOTS_EXTRA_ARGS="--stdout --stderr") + extra_env+=(HERDING_USE_GT=1) + if [[ -n "$policy_dir" ]]; then + extra_env+=(HERDING_POLICY_DIR="$ROOT/$policy_dir") + fi + + local raw + raw=$( + timeout --kill-after=15s "$TIMEOUT_S" \ + xvfb-run -a \ + env "${extra_env[@]}" \ + bash "$ROOT/tools/run_webots.sh" "$n" "$mode" "$drive" "$world" \ + 2>&1 + ) || true + # Webots-bin and Xvfb can survive the timeout; kill any orphans now. + pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true + sleep 1 + + local success="FAIL" + local steps="timeout" + + if echo "$raw" | grep -q "\[dog\] all .* sheep penned at step"; then + success="OK" + steps=$(echo "$raw" | grep "\[dog\] all .* sheep penned at step" \ + | grep -oP 'step \K[0-9]+') + fi + + printf "%-12s %-14s %-12s %7s %7s %s\n" \ + "$mode" "$drive" "$world" "$n" "$success" "$steps" | tee -a "$OUT" +} + +# --------------------------------------------------------------------------- +# Analytic baselines (differential only — that's the story context) +# strombom / sequential: canonical baselines +# universal: the actual teacher used to collect BC demos +# --------------------------------------------------------------------------- +for mode in strombom sequential universal; do + for world in field field_round; do + for n in 5 10; do + run_trial "$mode" differential "$world" "$n" + done + done +done + +# --------------------------------------------------------------------------- +# BC — world-specific policies +# --------------------------------------------------------------------------- +for drive in differential mecanum; do + for world in field field_round; do + for n in 5 10; do + run_trial bc "$drive" "$world" "$n" \ + "training/runs/bc_${drive}_${world}" + done + done +done + +# --------------------------------------------------------------------------- +# RL_FAST — MODE=rl with explicit HERDING_POLICY_DIR pointing to rl_fast dirs +# (run_webots.sh rejects "rl_fast" as a mode; "rl" + policy override is correct) +# --------------------------------------------------------------------------- +for drive in differential mecanum; do + for world in field field_round; do + for n in 5 10; do + run_trial rl "$drive" "$world" "$n" \ + "training/runs/rl_fast_${drive}_${world}" + done + done +done + +echo "" +echo "Sweep complete. Results saved to: $OUT" diff --git a/training/bc/collect.py b/training/bc/collect.py index cb0a03b..61eb127 100644 --- a/training/bc/collect.py +++ b/training/bc/collect.py @@ -21,22 +21,9 @@ from pathlib import Path import numpy as np -# Early CLI parse so we can configure geometry before heavy imports. -# (argparse is used again below for the full parse; this is a lightweight -# pre-pass that only reads --world.) -_pre_argv = [a for a in os.sys.argv[1:]] -_pre_world = None -for i, a in enumerate(_pre_argv): - if a == "--world" and i + 1 < len(_pre_argv): - _pre_world = _pre_argv[i + 1] - break - if a.startswith("--world="): - _pre_world = a.split("=", 1)[1] - break -if _pre_world is not None: - from herding.world.geometry import configure as _geo_configure - _geo_configure(_pre_world) - os.environ["HERDING_WORLD"] = _pre_world +# Configure field geometry before other herding imports read it at module level. +from herding.world.geometry import configure_from_args as _configure_from_args +_configure_from_args() from herding.control.active_scan import ActiveScanTeacher from herding.world.geometry import PEN_ENTRY, FIELD_SHAPE @@ -83,10 +70,17 @@ def _call_teacher(fn, dog_xy, dog_heading, sheep_positions, pen_target, def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int, teacher_fn, frame_stack: int = 1, privileged: bool = False, - drive_mode: str = "differential"): + drive_mode: str = "differential", herding_cfg=None, + actor_policy=None): + """Collect (obs, teacher_action) pairs from one episode. + + ``actor_policy`` (DAgger mode): a callable ``policy(obs) -> action`` that + drives the env. The teacher still labels each visited state. If ``None`` + (default), the teacher drives. + """ env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps, difficulty=1.0, seed=seed, frame_stack=frame_stack, - drive_mode=drive_mode) + drive_mode=drive_mode, herding_cfg=herding_cfg) obs, _ = env.reset(seed=seed) obs_list, action_list = [], [] scan_teacher = ActiveScanTeacher(teacher_fn) @@ -108,13 +102,16 @@ def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int, ) vx, vy, omega, _mode = result if drive_mode == "mecanum": - action = np.array([vx, vy, omega], dtype=np.float32) + teacher_action = np.array([vx, vy, omega], dtype=np.float32) else: - action = np.array([vx, vy], dtype=np.float32) + teacher_action = np.array([vx, vy], dtype=np.float32) if step % subsample == 0: obs_list.append(obs.copy()) - action_list.append(action.copy()) - obs, _r, term, trunc, _info = env.step(action) + action_list.append(teacher_action.copy()) + # In DAgger mode the policy drives; otherwise the teacher does. + step_action = (actor_policy(obs) if actor_policy is not None + else teacher_action) + obs, _r, term, trunc, _info = env.step(step_action) if term or trunc: break success = bool(env.sheep_penned.all()) @@ -153,6 +150,24 @@ def main(): help="World shape. If not set, uses HERDING_WORLD " "env var or defaults to 'field'. Must be set " "before geometry is imported.") + # Domain randomisation — applied to the gym env during collection so + # the teacher demonstrates under the same noise the policy will face. + parser.add_argument("--fp-rate", type=float, default=0.0, + help="Mean false-positive detections injected per " + "step (Poisson λ). 0 = clean sim (default).") + parser.add_argument("--action-smooth", type=float, default=0.0, + help="EMA coefficient on dog actions (0 = none). " + "Set to 0.55 to match the Webots controller.") + parser.add_argument("--wheel-slip-std", type=float, default=0.0, + help="Gaussian noise (rad/s) on wheel speeds for " + "mecanum dynamics domain randomisation.") + parser.add_argument("--dagger-policy", default=None, + help="Path to a BC/PPO policy directory. When set, " + "the policy drives the env (DAgger) while the " + "teacher labels every visited state.") + parser.add_argument("--use-webots-preset", action="store_true", + help="Use HERDING_WEBOTS preset (140° FOV + tight " + "tracker). Match this to deployment for DAgger.") args = parser.parse_args() # Validate --world matches geometry (already configured by the @@ -161,6 +176,53 @@ def main(): print(f"[demos] WARNING: --world={args.world} but geometry is " f"'{FIELD_SHAPE}'. This should not happen — file a bug.") + from herding.config import HerdingConfig, HERDING_WEBOTS, DomainRandomConfig, RobotConfig + if args.use_webots_preset: + herding_cfg = HERDING_WEBOTS.replace( + domain_random=DomainRandomConfig( + fp_rate=args.fp_rate, + wheel_slip_std=args.wheel_slip_std, + ), + robot=RobotConfig(action_smooth=args.action_smooth), + ) + print(f"[demos] HERDING_WEBOTS preset + DR: fp_rate={args.fp_rate} " + f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std}") + else: + herding_cfg = None + if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0: + herding_cfg = HerdingConfig( + domain_random=DomainRandomConfig( + fp_rate=args.fp_rate, + wheel_slip_std=args.wheel_slip_std, + ), + robot=RobotConfig(action_smooth=args.action_smooth), + ) + print(f"[demos] domain-random: fp_rate={args.fp_rate} " + f"action_smooth={args.action_smooth} " + f"wheel_slip_std={args.wheel_slip_std}") + + actor_policy = None + if args.dagger_policy is not None: + # DAgger: failures are the most valuable data (off-policy states + # where the student needs teacher correction). Always keep them. + args.keep_failures = True + from stable_baselines3 import PPO + from pathlib import Path as _P + run = _P(args.dagger_policy) + for name in ("policy.zip", "final.zip"): + if (run / name).exists(): + zip_path = run / name + break + else: + raise FileNotFoundError( + f"No policy found in {run} (tried policy.zip, final.zip)") + _model = PPO.load(str(zip_path), device="auto") + print(f"[demos] DAgger mode: actor = {zip_path}") + def actor_policy(obs): + obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1) + a, _ = _model.predict(obs_b, deterministic=True) + return a[0] + teacher_fn = TEACHERS[args.teacher] print(f"[demos] teacher: {args.teacher} world: {FIELD_SHAPE}") @@ -177,7 +239,8 @@ def main(): obs, actions, success, total_steps = collect_one( n, seed, args.max_steps, args.subsample, teacher_fn, frame_stack=args.frame_stack, privileged=args.privileged, - drive_mode=args.drive_mode, + drive_mode=args.drive_mode, herding_cfg=herding_cfg, + actor_policy=actor_policy, ) n_total += 1 if success: diff --git a/training/eval.py b/training/eval.py index e290449..790e51a 100644 --- a/training/eval.py +++ b/training/eval.py @@ -18,21 +18,9 @@ from statistics import mean import numpy as np -# Early CLI pre-parse for --world so geometry is configured before -# other herding.* modules are imported. -_pre_argv = [a for a in os.sys.argv[1:]] -_pre_world = None -for i, a in enumerate(_pre_argv): - if a == "--world" and i + 1 < len(_pre_argv): - _pre_world = _pre_argv[i + 1] - break - if a.startswith("--world="): - _pre_world = a.split("=", 1)[1] - break -if _pre_world is not None: - from herding.world.geometry import configure as _geo_configure - _geo_configure(_pre_world) - os.environ["HERDING_WORLD"] = _pre_world +# Configure field geometry before other herding imports read it at module level. +from herding.world.geometry import configure_from_args as _configure_from_args +_configure_from_args() from herding.world.geometry import MAX_SHEEP, PEN_ENTRY from herding.control.sequential import compute_action as sequential_action diff --git a/training/herding_env.py b/training/herding_env.py index 3948e2e..9dec733 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -47,6 +47,7 @@ from herding.perception.lidar_sim import simulate_scan from herding.perception.obs import OBS_DIM, build_obs from herding.perception.sheep_tracker import SheepTracker from herding.control.strombom import compute_action as strombom_action +from herding.config import HerdingConfig class HerdingEnv(gym.Env): @@ -87,13 +88,24 @@ class HerdingEnv(gym.Env): use_lidar: bool = True, frame_stack: int = 1, drive_mode: str = "differential", + herding_cfg: Optional[HerdingConfig] = None, ): super().__init__() + # Store the config; fall back to defaults when None. + self._herding_cfg = herding_cfg + + # Apply robot config overrides — these shadow the class attributes + # so that per-instance configuration is possible without touching + # the class-level defaults used by unconfigured instances. + if herding_cfg is not None: + self.ACTION_SMOOTH = herding_cfg.robot.action_smooth + # ``use_lidar=True`` (default): obs and imitation-reward teacher # see only LiDAR-perceived positions via a tracker, matching the # Webots controller. ``False`` exposes ground truth for ablation. self._use_lidar = bool(use_lidar) - self._tracker = SheepTracker() if self._use_lidar else None + tracker_cfg = herding_cfg.tracker if herding_cfg is not None else None + self._tracker = SheepTracker(tracker_cfg=tracker_cfg) if self._use_lidar else None self._np_rng_lidar: Optional[np.random.Generator] = None # Frame stacking: the policy receives the last K obs concatenated, @@ -261,6 +273,14 @@ class HerdingEnv(gym.Env): vx, vy = float(self.smoothed_action[0]), float(self.smoothed_action[1]) omega = float(self.smoothed_action[2]) if self._action_dim >= 3 else 0.0 + # Domain randomisation: compass (heading) noise. + dr = (self._herding_cfg.domain_random + if self._herding_cfg is not None else None) + slip_std = dr.wheel_slip_std if dr is not None else 0.0 + if dr is not None and dr.compass_noise_std > 0.0 and self._np_rng_lidar is not None: + self.dog_heading = float(self.dog_heading + self._np_rng_lidar.normal( + 0.0, dr.compass_noise_std)) + # Safety supervisor — dog stays north of the gate. if self.dog_y < DOG_SOUTH_LIMIT and vy < 0.0: vx, vy = 0.0, 1.0 @@ -282,6 +302,8 @@ class HerdingEnv(gym.Env): DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2.0, DOG_WHEEL_BASE_Y / 2.0, WEBOTS_DT, + slip_std=slip_std, + rng=self._np_rng_lidar, ) else: wL, wR = velocity_to_wheels( @@ -294,6 +316,8 @@ class HerdingEnv(gym.Env): self.dog_x, self.dog_y, self.dog_heading = kinematics_step( self.dog_x, self.dog_y, self.dog_heading, wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT, + slip_std=slip_std, + rng=self._np_rng_lidar, ) self.dog_x, self.dog_y = clip_to_field(self.dog_x, self.dog_y, margin=0.3) # Extra constraint: dog stays north of the gate area. @@ -460,16 +484,68 @@ class HerdingEnv(gym.Env): for i in range(self.n_sheep)] def _update_tracker(self) -> None: + lidar_cfg = (self._herding_cfg.lidar + if self._herding_cfg is not None else None) + detection_cfg = (self._herding_cfg.detection + if self._herding_cfg is not None else None) ranges = simulate_scan( self.dog_x, self.dog_y, self.dog_heading, self._all_sheep_xy(), rng=self._np_rng_lidar, + lidar_cfg=lidar_cfg, ) detections = detections_from_scan( ranges, self.dog_x, self.dog_y, self.dog_heading, + detection_cfg=detection_cfg, + lidar_cfg=lidar_cfg, ) + # Domain randomisation: inject false-positive detections near static + # features to mimic the spurious clusters Webots' physical LiDAR + # produces from real 3D geometry (walls, posts, fence rails). + dr = (self._herding_cfg.domain_random + if self._herding_cfg is not None else None) + if dr is not None and dr.fp_rate > 0.0 and self._np_rng_lidar is not None: + detections = list(detections) + detections.extend( + self._sample_false_positives(dr.fp_rate, dr.fp_std_pos)) self._tracker.update(detections) + # Static feature anchor points used for FP injection. + # The rectangular list covers gate posts and field corners; the round + # list covers just the gate posts (the circular wall is handled separately). + _FP_ANCHORS_RECT = ( + (10.0, -15.0), (13.0, -15.0), # gate posts + (15.0, 15.0), (15.0, -15.0), + (-15.0, 15.0), (-15.0, -15.0), # field corners + (15.0, 0.0), (-15.0, 0.0), # mid-wall returns + (0.0, 15.0), (0.0, -15.0), + ) + _FP_ANCHORS_ROUND = ( + (0.0, -15.0), # gate centre + (-1.5, -15.0), (1.5, -15.0), # gate posts + (0.0, 15.0), # north wall + (10.6, -10.6), (-10.6, -10.6), # circular wall quadrants + ) + + def _sample_false_positives( + self, fp_rate: float, fp_std: float, + ) -> list[tuple[float, float]]: + """Return a list of spurious (x, y) detections for this step.""" + from herding.world.geometry import FIELD_SHAPE + anchors = (self._FP_ANCHORS_ROUND + if FIELD_SHAPE == "field_round" + else self._FP_ANCHORS_RECT) + n_fps = int(self._np_rng_lidar.poisson(fp_rate)) + if n_fps == 0: + return [] + fps = [] + chosen = self._np_rng_lidar.integers(0, len(anchors), size=n_fps) + noise = self._np_rng_lidar.normal(0.0, fp_std, size=(n_fps, 2)) + for k in range(n_fps): + ax, ay = anchors[chosen[k]] + fps.append((float(ax + noise[k, 0]), float(ay + noise[k, 1]))) + return fps + def perceived_positions(self) -> dict[str, tuple[float, float]]: """What the controller would "see" this step: tracker output in LiDAR mode, ground truth in privileged mode. Used by demo diff --git a/training/rl/train.py b/training/rl/train.py index a0a24e9..3f0e50a 100644 --- a/training/rl/train.py +++ b/training/rl/train.py @@ -23,22 +23,9 @@ import argparse import os from pathlib import Path -# Early CLI pre-parse for --world so geometry is configured before any -# herding.* / training.* import binds geometry constants. Matches the -# pattern used by training.bc.collect and training.eval. -_pre_argv = [a for a in os.sys.argv[1:]] -_pre_world = None -for i, a in enumerate(_pre_argv): - if a == "--world" and i + 1 < len(_pre_argv): - _pre_world = _pre_argv[i + 1] - break - if a.startswith("--world="): - _pre_world = a.split("=", 1)[1] - break -if _pre_world is not None: - from herding.world.geometry import configure as _geo_configure - _geo_configure(_pre_world) - os.environ["HERDING_WORLD"] = _pre_world +# Configure field geometry before other herding imports read it at module level. +from herding.world.geometry import configure_from_args as _configure_from_args +_configure_from_args() import numpy as np import torch as th @@ -59,11 +46,12 @@ from training.herding_env import HerdingEnv def _make_env(rank: int, seed: int, frame_stack: int, drive_mode: str = "differential", difficulty: float = 1.0, - max_n_sheep: int = 10): + max_n_sheep: int = 10, + herding_cfg=None): def _thunk(): env = HerdingEnv(seed=seed + rank, frame_stack=frame_stack, drive_mode=drive_mode, difficulty=difficulty, - max_n_sheep=max_n_sheep) + max_n_sheep=max_n_sheep, herding_cfg=herding_cfg) env = Monitor(env, info_keywords=("is_success", "n_sheep", "n_penned")) return env return _thunk @@ -241,6 +229,13 @@ def main() -> None: choices=["field", "field_round"], help="World shape. If not set, uses HERDING_WORLD " "env var or defaults to 'field'.") + # Domain randomisation + parser.add_argument("--fp-rate", type=float, default=0.0, + help="Mean false-positive detections per step (Poisson λ).") + parser.add_argument("--action-smooth", type=float, default=0.0, + help="EMA on dog actions (0=none, 0.55=Webots match).") + parser.add_argument("--wheel-slip-std", type=float, default=0.0, + help="Gaussian wheel-speed noise std (rad/s).") args = parser.parse_args() # --world was already honoured in the early pre-parse above; here we # just sanity-check that the final argparse view agrees. @@ -280,15 +275,31 @@ def main() -> None: drive_mode = "differential" print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})") + from herding.config import HerdingConfig, DomainRandomConfig, RobotConfig + herding_cfg = None + if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0: + herding_cfg = HerdingConfig( + domain_random=DomainRandomConfig( + fp_rate=args.fp_rate, + wheel_slip_std=args.wheel_slip_std, + ), + robot=RobotConfig(action_smooth=args.action_smooth), + ) + print(f"[rl] domain-random: fp_rate={args.fp_rate} " + f"action_smooth={args.action_smooth} " + f"wheel_slip_std={args.wheel_slip_std}") + env_fns = [_make_env(i, args.seed, frame_stack, drive_mode, difficulty=args.difficulty, - max_n_sheep=args.max_n_sheep) + max_n_sheep=args.max_n_sheep, + herding_cfg=herding_cfg) for i in range(args.n_envs)] venv = SubprocVecEnv(env_fns) if args.n_envs > 1 else DummyVecEnv(env_fns) eval_venv = DummyVecEnv([_make_env(99, args.seed + 999, frame_stack, drive_mode, difficulty=args.difficulty, - max_n_sheep=args.max_n_sheep)]) + max_n_sheep=args.max_n_sheep, + herding_cfg=herding_cfg)]) print(f"[rl] difficulty={args.difficulty} max_n_sheep={args.max_n_sheep}") # Reward-shaping overrides (broadcast to every env instance). diff --git a/training/runs/bc_dagger1_differential_field/policy.zip b/training/runs/bc_dagger1_differential_field/policy.zip new file mode 100644 index 0000000..e04d8a3 Binary files /dev/null and b/training/runs/bc_dagger1_differential_field/policy.zip differ diff --git a/training/runs/bc_dagger2_differential_field/policy.zip b/training/runs/bc_dagger2_differential_field/policy.zip new file mode 100644 index 0000000..3ae78fa Binary files /dev/null and b/training/runs/bc_dagger2_differential_field/policy.zip differ diff --git a/training/runs/bc_differential_field/policy.zip b/training/runs/bc_differential_field/policy.zip index 22fb34d..e235512 100644 Binary files a/training/runs/bc_differential_field/policy.zip and b/training/runs/bc_differential_field/policy.zip differ diff --git a/training/runs/bc_differential_field_round/policy.zip b/training/runs/bc_differential_field_round/policy.zip index 07f9aec..a416dab 100644 Binary files a/training/runs/bc_differential_field_round/policy.zip and b/training/runs/bc_differential_field_round/policy.zip differ diff --git a/training/runs/bc_mecanum_field/policy.zip b/training/runs/bc_mecanum_field/policy.zip index 82ebd5d..5f65896 100644 Binary files a/training/runs/bc_mecanum_field/policy.zip and b/training/runs/bc_mecanum_field/policy.zip differ diff --git a/training/runs/bc_mecanum_field_round/policy.zip b/training/runs/bc_mecanum_field_round/policy.zip index 74516a2..b1da5e6 100644 Binary files a/training/runs/bc_mecanum_field_round/policy.zip and b/training/runs/bc_mecanum_field_round/policy.zip differ diff --git a/training/runs/rl_differential_field/policy.zip b/training/runs/rl_differential_field/policy.zip index e99f852..785d4e1 100644 Binary files a/training/runs/rl_differential_field/policy.zip and b/training/runs/rl_differential_field/policy.zip differ diff --git a/training/runs/rl_differential_field_round/policy.zip b/training/runs/rl_differential_field_round/policy.zip index 53349bb..3aa00d0 100644 Binary files a/training/runs/rl_differential_field_round/policy.zip and b/training/runs/rl_differential_field_round/policy.zip differ diff --git a/training/runs/rl_mecanum_field/policy.zip b/training/runs/rl_mecanum_field/policy.zip index 1a851fe..b6e2904 100644 Binary files a/training/runs/rl_mecanum_field/policy.zip and b/training/runs/rl_mecanum_field/policy.zip differ diff --git a/training/runs/rl_mecanum_field_round/policy.zip b/training/runs/rl_mecanum_field_round/policy.zip index a5718cd..4391226 100644 Binary files a/training/runs/rl_mecanum_field_round/policy.zip and b/training/runs/rl_mecanum_field_round/policy.zip differ