Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and fixed a cluster of bugs blocking the BC/RL transfer, then explored training-side mitigations for the residual perception gap. Bug fixes: - Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution and stalling PPO at 0% success across 1.46M+ steps. - controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching controllers under system python3 (no numpy) and they were crashing silently. Pinned to the conda tir env. - herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0, max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom FPs near the gate from latching as permanently-penned tracks. - herding/perception/sheep_tracker.py: penned tracks now decay at forget_steps × 8 instead of living forever. Adds get_positions min_freshness filter for deploy-time use. Training/eval matches deployment: - training/bc/collect.py: --dagger-policy flag for DAgger rollouts (policy drives, teacher labels) + --use-webots-preset for matched 140° tracker + DR config. - controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when BC/RL sees empty sheep_positions — recovers from FOV gaps. Tooling: - tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc). - tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the perception-gap diagnosis matrix. - protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation comparison. Canonical proto stays at 140° per project spec. Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field 60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show 12%→38% progression on gym HERDING_WEBOTS proxy but did not close to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or learned tracker per the project-state memory. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
@@ -0,0 +1,240 @@
+"""Tests for herding/config.py — dataclass construction, defaults, overrides."""
+
+import math
+import pytest
+
+from herding.config import (
+    DetectionConfig,
+    DomainRandomConfig,
+    HerdingConfig,
+    HERDING_DEFAULT,
+    HERDING_WEBOTS,
+    LidarConfig,
+    LIDAR_FULL,
+    LIDAR_WEBOTS,
+    RobotConfig,
+    TrackerConfig,
+)
+
+
+# ---------------------------------------------------------------------------
+# LidarConfig
+# ---------------------------------------------------------------------------
+
+class TestLidarConfig:
+    def test_defaults_match_full_circle_preset(self):
+        assert LidarConfig() == LIDAR_FULL
+
+    def test_webots_preset(self):
+        assert LIDAR_WEBOTS.n_rays == 180
+        assert abs(LIDAR_WEBOTS.fov_rad - math.radians(140.0)) < 1e-9
+
+    def test_frozen(self):
+        cfg = LidarConfig()
+        with pytest.raises((AttributeError, TypeError)):
+            cfg.n_rays = 42  # type: ignore[misc]
+
+    def test_invalid_n_rays(self):
+        with pytest.raises(ValueError):
+            LidarConfig(n_rays=0)
+
+    def test_invalid_fov(self):
+        with pytest.raises(ValueError):
+            LidarConfig(fov_rad=0.0)
+        with pytest.raises(ValueError):
+            LidarConfig(fov_rad=math.pi * 3)
+
+    def test_invalid_max_range(self):
+        with pytest.raises(ValueError):
+            LidarConfig(max_range=-1.0)
+
+
+# ---------------------------------------------------------------------------
+# TrackerConfig
+# ---------------------------------------------------------------------------
+
+class TestTrackerConfig:
+    def test_defaults(self):
+        cfg = TrackerConfig()
+        assert cfg.forget_steps == 200
+        assert cfg.max_new_tracks_per_step == 10
+
+    def test_webots_preset_tighter(self):
+        cfg = HERDING_WEBOTS.tracker
+        assert cfg.forget_steps == 120
+        assert cfg.max_new_tracks_per_step == 1
+        assert cfg.pen_latch_depth == 2.0
+
+    def test_invalid_forget_steps(self):
+        with pytest.raises(ValueError):
+            TrackerConfig(forget_steps=0)
+
+    def test_invalid_max_new_tracks(self):
+        with pytest.raises(ValueError):
+            TrackerConfig(max_new_tracks_per_step=0)
+
+
+# ---------------------------------------------------------------------------
+# DetectionConfig
+# ---------------------------------------------------------------------------
+
+class TestDetectionConfig:
+    def test_defaults(self):
+        cfg = DetectionConfig()
+        assert cfg.wall_reject == 0.5
+
+    def test_webots_preset_wall_reject(self):
+        # wall_reject stays at 0.5 m — 1.0 m was too aggressive near the south gate
+        cfg = HERDING_WEBOTS.detection
+        assert cfg.wall_reject == 0.5
+
+    def test_invalid_wall_reject(self):
+        with pytest.raises(ValueError):
+            DetectionConfig(wall_reject=-0.1)
+
+
+# ---------------------------------------------------------------------------
+# RobotConfig
+# ---------------------------------------------------------------------------
+
+class TestRobotConfig:
+    def test_max_linear_derived(self):
+        cfg = RobotConfig()
+        assert abs(cfg.max_linear - cfg.wheel_radius * cfg.max_wheel_omega) < 1e-9
+
+    def test_default_action_smooth_zero(self):
+        assert RobotConfig().action_smooth == 0.0
+
+    def test_webots_action_smooth(self):
+        assert HERDING_WEBOTS.robot.action_smooth == 0.55
+
+    def test_invalid_action_smooth(self):
+        with pytest.raises(ValueError):
+            RobotConfig(action_smooth=1.0)
+        with pytest.raises(ValueError):
+            RobotConfig(action_smooth=-0.1)
+
+
+# ---------------------------------------------------------------------------
+# DomainRandomConfig
+# ---------------------------------------------------------------------------
+
+class TestDomainRandomConfig:
+    def test_all_zeros_by_default(self):
+        cfg = DomainRandomConfig()
+        assert cfg.fp_rate == 0.0
+        assert cfg.wheel_slip_std == 0.0
+        assert cfg.compass_noise_std == 0.0
+
+    def test_invalid_fp_rate(self):
+        with pytest.raises(ValueError):
+            DomainRandomConfig(fp_rate=-1.0)
+
+    def test_invalid_slip_std(self):
+        with pytest.raises(ValueError):
+            DomainRandomConfig(wheel_slip_std=-0.01)
+
+
+# ---------------------------------------------------------------------------
+# HerdingConfig
+# ---------------------------------------------------------------------------
+
+class TestHerdingConfig:
+    def test_default_is_herding_default(self):
+        assert HerdingConfig() == HERDING_DEFAULT
+
+    def test_replace_sub_config(self):
+        new_cfg = HERDING_WEBOTS.replace(
+            domain_random=DomainRandomConfig(fp_rate=2.0)
+        )
+        assert new_cfg.domain_random.fp_rate == 2.0
+        # Other sub-configs unchanged
+        assert new_cfg.tracker == HERDING_WEBOTS.tracker
+        assert new_cfg.lidar == HERDING_WEBOTS.lidar
+
+    def test_herding_default_matches_original_module_constants(self):
+        """Verify the default config reproduces the original hardcoded values."""
+        from herding.perception.lidar_sim import (
+            LIDAR_N_RAYS, LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_NOISE,
+            SHEEP_RADIUS, POST_RADIUS,
+        )
+        from herding.perception.lidar_perception import (
+            GAP_THRESHOLD, MAX_CLUSTER_SPAN, RANGE_HIT_EPS,
+            SPLIT_RANGE_GAP, WALL_REJECT, STATIC_REJECT,
+        )
+        from herding.perception.sheep_tracker import (
+            GATE_M, REACQUIRE_GATE_M, REACQUIRE_MIN_AGE, PENNED_GATE_M,
+            FORGET_STEPS, PREDICT_STEPS, VELOCITY_CLAMP,
+        )
+        cfg = HERDING_DEFAULT
+        assert cfg.lidar.n_rays == LIDAR_N_RAYS
+        assert cfg.lidar.fov_rad == LIDAR_FOV
+        assert cfg.lidar.max_range == LIDAR_MAX_RANGE
+        assert cfg.lidar.noise_std == LIDAR_NOISE
+        assert cfg.lidar.sheep_radius == SHEEP_RADIUS
+        assert cfg.lidar.post_radius == POST_RADIUS
+        assert cfg.detection.gap_threshold == GAP_THRESHOLD
+        assert cfg.detection.max_cluster_span == MAX_CLUSTER_SPAN
+        assert cfg.detection.range_hit_eps == RANGE_HIT_EPS
+        assert cfg.detection.split_range_gap == SPLIT_RANGE_GAP
+        assert cfg.detection.wall_reject == WALL_REJECT
+        assert cfg.detection.static_reject == STATIC_REJECT
+        assert cfg.tracker.gate_m == GATE_M
+        assert cfg.tracker.reacquire_gate_m == REACQUIRE_GATE_M
+        assert cfg.tracker.reacquire_min_age == REACQUIRE_MIN_AGE
+        assert cfg.tracker.penned_gate_m == PENNED_GATE_M
+        assert cfg.tracker.forget_steps == FORGET_STEPS
+        assert cfg.tracker.predict_steps == PREDICT_STEPS
+        assert cfg.tracker.velocity_clamp == VELOCITY_CLAMP
+
+
+# ---------------------------------------------------------------------------
+# Integration: HerdingEnv honours the config
+# ---------------------------------------------------------------------------
+
+class TestHerdingEnvConfig:
+    def test_default_env_unchanged(self):
+        """HerdingEnv() still works with no config — zero behaviour change."""
+        from training.herding_env import HerdingEnv
+        env = HerdingEnv(n_sheep=1, max_steps=5, difficulty=1.0, seed=0)
+        obs, info = env.reset()
+        assert obs.shape == (32,)
+        obs2, *_ = env.step(env.action_space.sample())
+        assert obs2.shape == (32,)
+
+    def test_webots_config_propagates_action_smooth(self):
+        from training.herding_env import HerdingEnv
+        env = HerdingEnv(herding_cfg=HERDING_WEBOTS)
+        assert env.ACTION_SMOOTH == 0.55
+
+    def test_webots_config_runs(self):
+        from training.herding_env import HerdingEnv
+        env = HerdingEnv(
+            n_sheep=2, max_steps=10, difficulty=1.0, seed=42,
+            herding_cfg=HERDING_WEBOTS,
+        )
+        obs, _ = env.reset()
+        for _ in range(5):
+            obs, _, terminated, truncated, _ = env.step(env.action_space.sample())
+        assert obs.shape == (32,)
+
+    def test_domain_random_fp_runs(self):
+        from training.herding_env import HerdingEnv
+        cfg = HERDING_WEBOTS.replace(
+            domain_random=DomainRandomConfig(fp_rate=3.0, fp_std_pos=0.2)
+        )
+        env = HerdingEnv(n_sheep=2, max_steps=10, difficulty=1.0, seed=7, herding_cfg=cfg)
+        env.reset()
+        for _ in range(5):
+            env.step(env.action_space.sample())
+
+    def test_domain_random_slip_runs(self):
+        from training.herding_env import HerdingEnv
+        cfg = HERDING_WEBOTS.replace(
+            domain_random=DomainRandomConfig(wheel_slip_std=0.05, compass_noise_std=0.02)
+        )
+        env = HerdingEnv(n_sheep=1, max_steps=10, difficulty=1.0, seed=3,
+                         drive_mode="mecanum", herding_cfg=cfg)
+        env.reset()
+        for _ in range(5):
+            env.step(env.action_space.sample())