Webots sim-to-real fixes, DAgger pipeline, 360° proto variant

Today's session worked across the full Webots delivery stack — found and
fixed a cluster of bugs blocking the BC/RL transfer, then explored
training-side mitigations for the residual perception gap.

Bug fixes:
- Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL
  fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution
  and stalling PPO at 0% success across 1.46M+ steps.
- controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching
  controllers under system python3 (no numpy) and they were crashing
  silently. Pinned to the conda tir env.
- herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0,
  max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom
  FPs near the gate from latching as permanently-penned tracks.
- herding/perception/sheep_tracker.py: penned tracks now decay at
  forget_steps × 8 instead of living forever. Adds get_positions
  min_freshness filter for deploy-time use.

Training/eval matches deployment:
- training/bc/collect.py: --dagger-policy flag for DAgger rollouts
  (policy drives, teacher labels) + --use-webots-preset for matched
  140° tracker + DR config.
- controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when
  BC/RL sees empty sheep_positions — recovers from FOV gaps.

Tooling:
- tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc).
- tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the
  perception-gap diagnosis matrix.
- protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation
  comparison. Canonical proto stays at 140° per project spec.

Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained
in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field
60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show
12%→38% progression on gym HERDING_WEBOTS proxy but did not close
to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or
learned tracker per the project-state memory.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Johnny Fernandes
2026-05-16 17:21:02 +00:00
parent c61df91950
commit dd5ac669e5
34 changed files with 2336 additions and 188 deletions
+240
View File
@@ -0,0 +1,240 @@
"""Tests for herding/config.py — dataclass construction, defaults, overrides."""
import math
import pytest
from herding.config import (
DetectionConfig,
DomainRandomConfig,
HerdingConfig,
HERDING_DEFAULT,
HERDING_WEBOTS,
LidarConfig,
LIDAR_FULL,
LIDAR_WEBOTS,
RobotConfig,
TrackerConfig,
)
# ---------------------------------------------------------------------------
# LidarConfig
# ---------------------------------------------------------------------------
class TestLidarConfig:
def test_defaults_match_full_circle_preset(self):
assert LidarConfig() == LIDAR_FULL
def test_webots_preset(self):
assert LIDAR_WEBOTS.n_rays == 180
assert abs(LIDAR_WEBOTS.fov_rad - math.radians(140.0)) < 1e-9
def test_frozen(self):
cfg = LidarConfig()
with pytest.raises((AttributeError, TypeError)):
cfg.n_rays = 42 # type: ignore[misc]
def test_invalid_n_rays(self):
with pytest.raises(ValueError):
LidarConfig(n_rays=0)
def test_invalid_fov(self):
with pytest.raises(ValueError):
LidarConfig(fov_rad=0.0)
with pytest.raises(ValueError):
LidarConfig(fov_rad=math.pi * 3)
def test_invalid_max_range(self):
with pytest.raises(ValueError):
LidarConfig(max_range=-1.0)
# ---------------------------------------------------------------------------
# TrackerConfig
# ---------------------------------------------------------------------------
class TestTrackerConfig:
def test_defaults(self):
cfg = TrackerConfig()
assert cfg.forget_steps == 200
assert cfg.max_new_tracks_per_step == 10
def test_webots_preset_tighter(self):
cfg = HERDING_WEBOTS.tracker
assert cfg.forget_steps == 120
assert cfg.max_new_tracks_per_step == 1
assert cfg.pen_latch_depth == 2.0
def test_invalid_forget_steps(self):
with pytest.raises(ValueError):
TrackerConfig(forget_steps=0)
def test_invalid_max_new_tracks(self):
with pytest.raises(ValueError):
TrackerConfig(max_new_tracks_per_step=0)
# ---------------------------------------------------------------------------
# DetectionConfig
# ---------------------------------------------------------------------------
class TestDetectionConfig:
def test_defaults(self):
cfg = DetectionConfig()
assert cfg.wall_reject == 0.5
def test_webots_preset_wall_reject(self):
# wall_reject stays at 0.5 m — 1.0 m was too aggressive near the south gate
cfg = HERDING_WEBOTS.detection
assert cfg.wall_reject == 0.5
def test_invalid_wall_reject(self):
with pytest.raises(ValueError):
DetectionConfig(wall_reject=-0.1)
# ---------------------------------------------------------------------------
# RobotConfig
# ---------------------------------------------------------------------------
class TestRobotConfig:
def test_max_linear_derived(self):
cfg = RobotConfig()
assert abs(cfg.max_linear - cfg.wheel_radius * cfg.max_wheel_omega) < 1e-9
def test_default_action_smooth_zero(self):
assert RobotConfig().action_smooth == 0.0
def test_webots_action_smooth(self):
assert HERDING_WEBOTS.robot.action_smooth == 0.55
def test_invalid_action_smooth(self):
with pytest.raises(ValueError):
RobotConfig(action_smooth=1.0)
with pytest.raises(ValueError):
RobotConfig(action_smooth=-0.1)
# ---------------------------------------------------------------------------
# DomainRandomConfig
# ---------------------------------------------------------------------------
class TestDomainRandomConfig:
def test_all_zeros_by_default(self):
cfg = DomainRandomConfig()
assert cfg.fp_rate == 0.0
assert cfg.wheel_slip_std == 0.0
assert cfg.compass_noise_std == 0.0
def test_invalid_fp_rate(self):
with pytest.raises(ValueError):
DomainRandomConfig(fp_rate=-1.0)
def test_invalid_slip_std(self):
with pytest.raises(ValueError):
DomainRandomConfig(wheel_slip_std=-0.01)
# ---------------------------------------------------------------------------
# HerdingConfig
# ---------------------------------------------------------------------------
class TestHerdingConfig:
def test_default_is_herding_default(self):
assert HerdingConfig() == HERDING_DEFAULT
def test_replace_sub_config(self):
new_cfg = HERDING_WEBOTS.replace(
domain_random=DomainRandomConfig(fp_rate=2.0)
)
assert new_cfg.domain_random.fp_rate == 2.0
# Other sub-configs unchanged
assert new_cfg.tracker == HERDING_WEBOTS.tracker
assert new_cfg.lidar == HERDING_WEBOTS.lidar
def test_herding_default_matches_original_module_constants(self):
"""Verify the default config reproduces the original hardcoded values."""
from herding.perception.lidar_sim import (
LIDAR_N_RAYS, LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_NOISE,
SHEEP_RADIUS, POST_RADIUS,
)
from herding.perception.lidar_perception import (
GAP_THRESHOLD, MAX_CLUSTER_SPAN, RANGE_HIT_EPS,
SPLIT_RANGE_GAP, WALL_REJECT, STATIC_REJECT,
)
from herding.perception.sheep_tracker import (
GATE_M, REACQUIRE_GATE_M, REACQUIRE_MIN_AGE, PENNED_GATE_M,
FORGET_STEPS, PREDICT_STEPS, VELOCITY_CLAMP,
)
cfg = HERDING_DEFAULT
assert cfg.lidar.n_rays == LIDAR_N_RAYS
assert cfg.lidar.fov_rad == LIDAR_FOV
assert cfg.lidar.max_range == LIDAR_MAX_RANGE
assert cfg.lidar.noise_std == LIDAR_NOISE
assert cfg.lidar.sheep_radius == SHEEP_RADIUS
assert cfg.lidar.post_radius == POST_RADIUS
assert cfg.detection.gap_threshold == GAP_THRESHOLD
assert cfg.detection.max_cluster_span == MAX_CLUSTER_SPAN
assert cfg.detection.range_hit_eps == RANGE_HIT_EPS
assert cfg.detection.split_range_gap == SPLIT_RANGE_GAP
assert cfg.detection.wall_reject == WALL_REJECT
assert cfg.detection.static_reject == STATIC_REJECT
assert cfg.tracker.gate_m == GATE_M
assert cfg.tracker.reacquire_gate_m == REACQUIRE_GATE_M
assert cfg.tracker.reacquire_min_age == REACQUIRE_MIN_AGE
assert cfg.tracker.penned_gate_m == PENNED_GATE_M
assert cfg.tracker.forget_steps == FORGET_STEPS
assert cfg.tracker.predict_steps == PREDICT_STEPS
assert cfg.tracker.velocity_clamp == VELOCITY_CLAMP
# ---------------------------------------------------------------------------
# Integration: HerdingEnv honours the config
# ---------------------------------------------------------------------------
class TestHerdingEnvConfig:
def test_default_env_unchanged(self):
"""HerdingEnv() still works with no config — zero behaviour change."""
from training.herding_env import HerdingEnv
env = HerdingEnv(n_sheep=1, max_steps=5, difficulty=1.0, seed=0)
obs, info = env.reset()
assert obs.shape == (32,)
obs2, *_ = env.step(env.action_space.sample())
assert obs2.shape == (32,)
def test_webots_config_propagates_action_smooth(self):
from training.herding_env import HerdingEnv
env = HerdingEnv(herding_cfg=HERDING_WEBOTS)
assert env.ACTION_SMOOTH == 0.55
def test_webots_config_runs(self):
from training.herding_env import HerdingEnv
env = HerdingEnv(
n_sheep=2, max_steps=10, difficulty=1.0, seed=42,
herding_cfg=HERDING_WEBOTS,
)
obs, _ = env.reset()
for _ in range(5):
obs, _, terminated, truncated, _ = env.step(env.action_space.sample())
assert obs.shape == (32,)
def test_domain_random_fp_runs(self):
from training.herding_env import HerdingEnv
cfg = HERDING_WEBOTS.replace(
domain_random=DomainRandomConfig(fp_rate=3.0, fp_std_pos=0.2)
)
env = HerdingEnv(n_sheep=2, max_steps=10, difficulty=1.0, seed=7, herding_cfg=cfg)
env.reset()
for _ in range(5):
env.step(env.action_space.sample())
def test_domain_random_slip_runs(self):
from training.herding_env import HerdingEnv
cfg = HERDING_WEBOTS.replace(
domain_random=DomainRandomConfig(wheel_slip_std=0.05, compass_noise_std=0.02)
)
env = HerdingEnv(n_sheep=1, max_steps=10, difficulty=1.0, seed=3,
drive_mode="mecanum", herding_cfg=cfg)
env.reset()
for _ in range(5):
env.step(env.action_space.sample())