Webots sim-to-real fixes, DAgger pipeline, 360° proto variant
Today's session worked across the full Webots delivery stack — found and
fixed a cluster of bugs blocking the BC/RL transfer, then explored
training-side mitigations for the residual perception gap.
Bug fixes:
- Makefile FP_RATE default 2.0 → 0.0: BC demos used fp_rate=0 but RL
fine-tune defaulted to fp_rate=2, poisoning the BC obs distribution
and stalling PPO at 0% success across 1.46M+ steps.
- controllers/{shepherd_dog,sheep}/runtime.ini: Webots was launching
controllers under system python3 (no numpy) and they were crashing
silently. Pinned to the conda tir env.
- herding/config.py HERDING_WEBOTS preset: pen_latch_depth 0.5 → 2.0,
max_new_tracks_per_step 3 → 1, static_reject 0.8 → 1.2. Stops phantom
FPs near the gate from latching as permanently-penned tracks.
- herding/perception/sheep_tracker.py: penned tracks now decay at
forget_steps × 8 instead of living forever. Adds get_positions
min_freshness filter for deploy-time use.
Training/eval matches deployment:
- training/bc/collect.py: --dagger-policy flag for DAgger rollouts
(policy drives, teacher labels) + --use-webots-preset for matched
140° tracker + DR config.
- controllers/shepherd_dog/shepherd_dog.py: scan-fallback (0, 0.6) when
BC/RL sees empty sheep_positions — recovers from FOV gaps.
Tooling:
- tools/dagger_round.sh: one-shot DAgger round (collect + concat + bc).
- tools/webots_sweep_gt.sh: full sweep with HERDING_USE_GT=1 for the
perception-gap diagnosis matrix.
- protos/ShepherdDog360.proto: 360° FOV variant for the FOV-ablation
comparison. Canonical proto stays at 140° per project spec.
Artifacts: v1 BC/RL policies for all 4 (drive × world) combos trained
in clean gym (success: diff/field 90-100%, diff/round 58%, mec/field
60-100%, mec/round 50-100%). DAgger r1/r2 BCs for diff/field show
12%→38% progression on gym HERDING_WEBOTS proxy but did not close
to actual Webots LiDAR (0/5 throughout). Next: LSTM policy or
learned tracker per the project-state memory.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+77
-1
@@ -47,6 +47,7 @@ from herding.perception.lidar_sim import simulate_scan
|
||||
from herding.perception.obs import OBS_DIM, build_obs
|
||||
from herding.perception.sheep_tracker import SheepTracker
|
||||
from herding.control.strombom import compute_action as strombom_action
|
||||
from herding.config import HerdingConfig
|
||||
|
||||
|
||||
class HerdingEnv(gym.Env):
|
||||
@@ -87,13 +88,24 @@ class HerdingEnv(gym.Env):
|
||||
use_lidar: bool = True,
|
||||
frame_stack: int = 1,
|
||||
drive_mode: str = "differential",
|
||||
herding_cfg: Optional[HerdingConfig] = None,
|
||||
):
|
||||
super().__init__()
|
||||
# Store the config; fall back to defaults when None.
|
||||
self._herding_cfg = herding_cfg
|
||||
|
||||
# Apply robot config overrides — these shadow the class attributes
|
||||
# so that per-instance configuration is possible without touching
|
||||
# the class-level defaults used by unconfigured instances.
|
||||
if herding_cfg is not None:
|
||||
self.ACTION_SMOOTH = herding_cfg.robot.action_smooth
|
||||
|
||||
# ``use_lidar=True`` (default): obs and imitation-reward teacher
|
||||
# see only LiDAR-perceived positions via a tracker, matching the
|
||||
# Webots controller. ``False`` exposes ground truth for ablation.
|
||||
self._use_lidar = bool(use_lidar)
|
||||
self._tracker = SheepTracker() if self._use_lidar else None
|
||||
tracker_cfg = herding_cfg.tracker if herding_cfg is not None else None
|
||||
self._tracker = SheepTracker(tracker_cfg=tracker_cfg) if self._use_lidar else None
|
||||
self._np_rng_lidar: Optional[np.random.Generator] = None
|
||||
|
||||
# Frame stacking: the policy receives the last K obs concatenated,
|
||||
@@ -261,6 +273,14 @@ class HerdingEnv(gym.Env):
|
||||
vx, vy = float(self.smoothed_action[0]), float(self.smoothed_action[1])
|
||||
omega = float(self.smoothed_action[2]) if self._action_dim >= 3 else 0.0
|
||||
|
||||
# Domain randomisation: compass (heading) noise.
|
||||
dr = (self._herding_cfg.domain_random
|
||||
if self._herding_cfg is not None else None)
|
||||
slip_std = dr.wheel_slip_std if dr is not None else 0.0
|
||||
if dr is not None and dr.compass_noise_std > 0.0 and self._np_rng_lidar is not None:
|
||||
self.dog_heading = float(self.dog_heading + self._np_rng_lidar.normal(
|
||||
0.0, dr.compass_noise_std))
|
||||
|
||||
# Safety supervisor — dog stays north of the gate.
|
||||
if self.dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
|
||||
vx, vy = 0.0, 1.0
|
||||
@@ -282,6 +302,8 @@ class HerdingEnv(gym.Env):
|
||||
DOG_WHEEL_RADIUS,
|
||||
DOG_WHEEL_BASE_X / 2.0, DOG_WHEEL_BASE_Y / 2.0,
|
||||
WEBOTS_DT,
|
||||
slip_std=slip_std,
|
||||
rng=self._np_rng_lidar,
|
||||
)
|
||||
else:
|
||||
wL, wR = velocity_to_wheels(
|
||||
@@ -294,6 +316,8 @@ class HerdingEnv(gym.Env):
|
||||
self.dog_x, self.dog_y, self.dog_heading = kinematics_step(
|
||||
self.dog_x, self.dog_y, self.dog_heading,
|
||||
wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT,
|
||||
slip_std=slip_std,
|
||||
rng=self._np_rng_lidar,
|
||||
)
|
||||
self.dog_x, self.dog_y = clip_to_field(self.dog_x, self.dog_y, margin=0.3)
|
||||
# Extra constraint: dog stays north of the gate area.
|
||||
@@ -460,16 +484,68 @@ class HerdingEnv(gym.Env):
|
||||
for i in range(self.n_sheep)]
|
||||
|
||||
def _update_tracker(self) -> None:
|
||||
lidar_cfg = (self._herding_cfg.lidar
|
||||
if self._herding_cfg is not None else None)
|
||||
detection_cfg = (self._herding_cfg.detection
|
||||
if self._herding_cfg is not None else None)
|
||||
ranges = simulate_scan(
|
||||
self.dog_x, self.dog_y, self.dog_heading,
|
||||
self._all_sheep_xy(),
|
||||
rng=self._np_rng_lidar,
|
||||
lidar_cfg=lidar_cfg,
|
||||
)
|
||||
detections = detections_from_scan(
|
||||
ranges, self.dog_x, self.dog_y, self.dog_heading,
|
||||
detection_cfg=detection_cfg,
|
||||
lidar_cfg=lidar_cfg,
|
||||
)
|
||||
# Domain randomisation: inject false-positive detections near static
|
||||
# features to mimic the spurious clusters Webots' physical LiDAR
|
||||
# produces from real 3D geometry (walls, posts, fence rails).
|
||||
dr = (self._herding_cfg.domain_random
|
||||
if self._herding_cfg is not None else None)
|
||||
if dr is not None and dr.fp_rate > 0.0 and self._np_rng_lidar is not None:
|
||||
detections = list(detections)
|
||||
detections.extend(
|
||||
self._sample_false_positives(dr.fp_rate, dr.fp_std_pos))
|
||||
self._tracker.update(detections)
|
||||
|
||||
# Static feature anchor points used for FP injection.
|
||||
# The rectangular list covers gate posts and field corners; the round
|
||||
# list covers just the gate posts (the circular wall is handled separately).
|
||||
_FP_ANCHORS_RECT = (
|
||||
(10.0, -15.0), (13.0, -15.0), # gate posts
|
||||
(15.0, 15.0), (15.0, -15.0),
|
||||
(-15.0, 15.0), (-15.0, -15.0), # field corners
|
||||
(15.0, 0.0), (-15.0, 0.0), # mid-wall returns
|
||||
(0.0, 15.0), (0.0, -15.0),
|
||||
)
|
||||
_FP_ANCHORS_ROUND = (
|
||||
(0.0, -15.0), # gate centre
|
||||
(-1.5, -15.0), (1.5, -15.0), # gate posts
|
||||
(0.0, 15.0), # north wall
|
||||
(10.6, -10.6), (-10.6, -10.6), # circular wall quadrants
|
||||
)
|
||||
|
||||
def _sample_false_positives(
|
||||
self, fp_rate: float, fp_std: float,
|
||||
) -> list[tuple[float, float]]:
|
||||
"""Return a list of spurious (x, y) detections for this step."""
|
||||
from herding.world.geometry import FIELD_SHAPE
|
||||
anchors = (self._FP_ANCHORS_ROUND
|
||||
if FIELD_SHAPE == "field_round"
|
||||
else self._FP_ANCHORS_RECT)
|
||||
n_fps = int(self._np_rng_lidar.poisson(fp_rate))
|
||||
if n_fps == 0:
|
||||
return []
|
||||
fps = []
|
||||
chosen = self._np_rng_lidar.integers(0, len(anchors), size=n_fps)
|
||||
noise = self._np_rng_lidar.normal(0.0, fp_std, size=(n_fps, 2))
|
||||
for k in range(n_fps):
|
||||
ax, ay = anchors[chosen[k]]
|
||||
fps.append((float(ax + noise[k, 0]), float(ay + noise[k, 1])))
|
||||
return fps
|
||||
|
||||
def perceived_positions(self) -> dict[str, tuple[float, float]]:
|
||||
"""What the controller would "see" this step: tracker output in
|
||||
LiDAR mode, ground truth in privileged mode. Used by demo
|
||||
|
||||
Reference in New Issue
Block a user