"""Shepherd Dog controller (Webots). Mode is selected by ``HERDING_MODE`` — read from the env var or from the ``herding_runtime.cfg`` file the launcher writes (Webots strips env vars from controller subprocesses on some setups): strombom → canonical Strömbom (2014) collect/drive heuristic wrapped in ActiveScanTeacher (opening rotation + walk-to-centre when the tracker briefly empties) sequential → single-target "pin-and-push", same wrapper universal → mecanum-aware teacher (Strömbom + omega + recovery) bc → behaviour-cloned MLP, trained on universal demos rl → KL-regularised PPO fine-tune of `bc` Policy directories are resolved by `policy_loader` from ``training/runs/{bc,rl}_{drive}_{world}`` with a fallback to ``training/runs/{bc,rl}`` (legacy single-policy paths). Sheep perception ---------------- The dog perceives sheep through its front-mounted 140° LiDAR (``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step: 1. Read ``lidar.getRangeImage()``. 2. Cluster returns into world-frame ``(x, y)`` estimates (``herding.perception.lidar_perception.detections_from_scan``). 3. Fold detections into a ``SheepTracker``, which maintains last-seen positions for sheep currently out of FOV, requires consensus across multiple frames before promoting a candidate to a real track, and latches "penned" once a track crosses the gate plane south. Setting ``HERDING_USE_GT=1`` bypasses the tracker and feeds emitter ground-truth positions to the policy — useful as a perception ablation for the analytic baselines. Sheep emitter messages are otherwise read for diagnostic logging only (``GT_penned`` counter + auto-finish sentinel); the control loop never depends on them. Auto-finish ----------- When every emitter-reported sheep is penned, the controller writes ``training/.run_done``. The launcher (``tools/run_webots.sh``) detects the sentinel and closes Webots so headless sweep runs are bounded. """ import math import os import sys # --- Make the shared herding/ package importable from this controller dir --- _HERE = os.path.dirname(os.path.abspath(__file__)) _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", "..")) if _PROJECT_ROOT not in sys.path: sys.path.insert(0, _PROJECT_ROOT) # --- Read runtime cfg early so env vars are set before geometry import --- def _load_runtime_config(): cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg") if not os.path.exists(cfg_path): return {} out = {} try: with open(cfg_path) as f: for line in f: line = line.strip() if not line or line.startswith("#") or "=" not in line: continue k, _, v = line.partition("=") out[k.strip().upper()] = v.strip() except OSError: return {} return out _runtime_cfg = _load_runtime_config() # Seed env vars from runtime cfg so downstream modules (geometry.py) see them. for _rk, _rv in _runtime_cfg.items(): if _rk.startswith("HERDING_") and _rk not in os.environ: os.environ[_rk] = _rv import numpy as np from controller import Robot from herding.control.active_scan import ActiveScanTeacher from herding.control.modulation import modulate_speed from herding.control.sequential import compute_action as sequential_action from herding.control.strombom import compute_action as strombom_action from herding.control.universal import compute_action as universal_action from herding.perception.obs import build_obs from herding.perception.lidar_perception import detections_from_scan from herding.perception.sheep_tracker import SheepTracker from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels from herding.world.geometry import ( DOG_SOUTH_LIMIT, PEN_ENTRY, is_penned, ) from herding.config import HERDING_WEBOTS, LIDAR_WEBOTS_360, RobotConfig # Robot physical constants come from RobotConfig so they stay in sync with # the training environment. The Webots preset uses action_smooth=0.55. _ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x DOG_WHEEL_BASE_Y = _ROBOT_CFG.wheel_base_y DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega DOG_MAX_LINEAR = _ROBOT_CFG.max_linear # --------------------------------------------------------------------------- # Mode + policy resolution (cfg already loaded above) # --------------------------------------------------------------------------- MODE = (os.environ.get("HERDING_MODE") or _runtime_cfg.get("HERDING_MODE") or "bc").lower() _VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate") if MODE not in _VALID_MODES: print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.") MODE = "strombom" # Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional). DRIVE_MODE = (os.environ.get("HERDING_DRIVE") or _runtime_cfg.get("HERDING_DRIVE") or "differential").lower() if DRIVE_MODE not in ("differential", "mecanum"): print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.") DRIVE_MODE = "differential" # World shape — used to disambiguate the trained policy directory. WORLD = (os.environ.get("HERDING_WORLD") or _runtime_cfg.get("HERDING_WORLD") or "field").lower() # LiDAR FOV variant — "140" (default, ShepherdDog.proto) or "360" # (ShepherdDog360.proto, FOV ablation). The launcher swaps the proto # in the temp world file; the controller picks the matching lidar_cfg # below so the perception pipeline interprets ray angles correctly. LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR") or _runtime_cfg.get("HERDING_LIDAR") or "140").lower() if LIDAR_FOV_VARIANT == "360": _LIDAR_CFG = LIDAR_WEBOTS_360 else: _LIDAR_CFG = HERDING_WEBOTS.lidar # Diagnostic: bypass LiDAR tracker and use GT emitter positions directly. # Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality. USE_GT_PERCEPTION = bool(int( os.environ.get("HERDING_USE_GT") or _runtime_cfg.get("HERDING_USE_GT", "0") )) def _resolve_policy_dir(mode: str, drive: str, world: str) -> str: """Where to look for the trained policy for the given mode/drive/world. Priority: 1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points to a real directory. 2. Canonical: training/runs/{bc,rl}__ 3. Drive-only: training/runs/{bc,rl}_ 4. Bare-mode: training/runs/{bc,rl} The first existing directory wins; if none exist, the canonical path is returned so the loader's error message is informative. """ env_dir = (os.environ.get("HERDING_POLICY_DIR") or _runtime_cfg.get("HERDING_POLICY_DIR")) if env_dir and os.path.isdir(env_dir): return env_dir base = "rl" if mode == "rl" else "bc" runs = os.path.join(_PROJECT_ROOT, "training", "runs") for cand in (f"{base}_{drive}_{world}", f"{base}_{drive}", base): path = os.path.join(runs, cand) if os.path.isdir(path): return path return os.path.join(runs, f"{base}_{drive}_{world}") print(f"[dog] mode={MODE} drive={DRIVE_MODE} world={WORLD}") POLICY_DIR = _resolve_policy_dir(MODE, DRIVE_MODE, WORLD) policy_handle = None if MODE in ("bc", "rl"): print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}") try: from policy_loader import load as _load_policy policy_handle = _load_policy(POLICY_DIR) print(f"[dog] policy loaded from {POLICY_DIR}") except Exception as exc: print(f"[dog] policy load failed ({exc!r}); falling back to strombom.") MODE = "strombom" # --------------------------------------------------------------------------- # Control parameters # --------------------------------------------------------------------------- ACTION_SMOOTH = _ROBOT_CFG.action_smooth # EMA on (vx, vy) — kills frame-to-frame jitter RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done") def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple: """If the dog is near the south barrier and the action would push it further south, override with a northward action. Hard invariant: the dog never enters the pen.""" if dog_y < DOG_SOUTH_LIMIT and vy < 0.0: return (0.0, 1.0) if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2: return (vx * 0.5, max(0.0, vy + 0.5)) return (vx, vy) def drive_diff(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float): if math.hypot(vx, vy) < 1e-3: left_motor.setVelocity(0.0) right_motor.setVelocity(0.0) return n = compass.getValues() h = math.atan2(n[0], n[1]) left, right = velocity_to_wheels( vx, vy, h, max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS, max_wheel_omega=motor_max, k_turn=4.0, ) left_motor.setVelocity(left) right_motor.setVelocity(right) def drive_mecanum(vx: float, vy: float, omega: float, fl_motor, fr_motor, rl_motor, rr_motor, compass, motor_max: float): if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3: fl_motor.setVelocity(0.0) fr_motor.setVelocity(0.0) rl_motor.setVelocity(0.0) rr_motor.setVelocity(0.0) return n = compass.getValues() h = math.atan2(n[0], n[1]) w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels( vx, vy, omega, h, max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS, lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0, max_wheel_omega=motor_max, k_turn=4.0, wheel_base=DOG_WHEEL_BASE, ) fl_motor.setVelocity(w_fl) fr_motor.setVelocity(w_fr) rl_motor.setVelocity(w_rl) rr_motor.setVelocity(w_rr) # --------------------------------------------------------------------------- # Webots devices # --------------------------------------------------------------------------- robot = Robot() timestep = int(robot.getBasicTimeStep()) # Multi-shepherd axis split. When the launcher creates two dog instances # it sets each robot's customData to "axis=x" or "axis=y"; the controller # then masks the off-axis component of every action so the two dogs # share the herding workload along orthogonal axes (one closes flank # distance in x, the other in y). customData empty = single-dog mode. _AXIS_TAG = (robot.getCustomData() or "").strip().lower() if _AXIS_TAG.startswith("axis="): DOG_AXIS = _AXIS_TAG[5:] if DOG_AXIS not in ("x", "y"): print(f"[dog] unknown axis={DOG_AXIS!r} in customData; ignoring.") DOG_AXIS = None else: DOG_AXIS = None DOG_NAME = robot.getName() print(f"[dog] name={DOG_NAME} axis={DOG_AXIS}") if DRIVE_MODE == "mecanum": fl_motor = robot.getDevice("front left wheel motor") fr_motor = robot.getDevice("front right wheel motor") rl_motor = robot.getDevice("rear left wheel motor") rr_motor = robot.getDevice("rear right wheel motor") for m in (fl_motor, fr_motor, rl_motor, rr_motor): m.setPosition(float("inf")) m.setVelocity(0.0) MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA) else: left_motor = robot.getDevice("left wheel motor") right_motor = robot.getDevice("right wheel motor") left_motor.setPosition(float("inf")) right_motor.setPosition(float("inf")) left_motor.setVelocity(0.0) right_motor.setVelocity(0.0) MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA) gps = robot.getDevice("gps"); gps.enable(timestep) compass = robot.getDevice("compass"); compass.enable(timestep) receiver = robot.getDevice("receiver"); receiver.enable(timestep) emitter = robot.getDevice("emitter") lidar = robot.getDevice("lidar"); lidar.enable(timestep) tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker) # Cosmetic ear motors — animated; not used by control. left_ear = robot.getDevice("left ear motor") right_ear = robot.getDevice("right ear motor") left_ear.setPosition(float("inf")) right_ear.setPosition(float("inf")) left_ear.setVelocity(0.0) right_ear.setVelocity(0.0) ear_phase = 0.0 EAR_AMPLITUDE = 0.35 EAR_RATE = 8.0 # --------------------------------------------------------------------------- # Main loop # --------------------------------------------------------------------------- # Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay # the import-time cost). Each gets the same ActiveScanTeacher treatment: # rotate-on-empty, walk-to-centre, near-sheep speed modulation. analytic_teacher = None if MODE in ("strombom", "sequential"): base_fn = strombom_action if MODE == "strombom" else sequential_action analytic_teacher = ActiveScanTeacher(base_fn) elif MODE == "universal": analytic_teacher = ActiveScanTeacher(universal_action) # GT positions from sheep emitters — used **only** for the auto-finish # sentinel and the GT_penned diagnostic line. Never fed into control. _gt_sheep: dict = {} _run_done = False prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0) step_count = 0 # --------------------------------------------------------------------------- # Calibration mode — apply fixed action, measure GPS displacement, compare # against gym kinematics prediction, write results to calibrate_mecanum.log. # --------------------------------------------------------------------------- if MODE == "calibrate": import json as _json _calib_vx = float(os.environ.get("CALIB_VX", "0.5")) _calib_vy = float(os.environ.get("CALIB_VY", "0.0")) _calib_om = float(os.environ.get("CALIB_OM", "0.0")) _calib_n = int(os.environ.get("CALIB_N_STEPS", "150")) _log_path = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log") # Settle for 5 steps so GPS stabilises. for _ in range(5): robot.step(timestep) _pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1] _n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1]) # Gym-predicted displacement using shared kinematics. from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_step from herding.world.geometry import WEBOTS_DT as _DT _xg, _yg, _hg = _x0, _y0, _h0 for _ in range(_calib_n): _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels( _calib_vx, _calib_vy, _calib_om, _hg, max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS, lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2, max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0, wheel_base=DOG_WHEEL_BASE, ) _xg, _yg, _hg = mecanum_step( _xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT, ) # Run actual Webots steps. for _ in range(_calib_n): _nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1]) _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels( _calib_vx, _calib_vy, _calib_om, _h, max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS, lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2, max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0, wheel_base=DOG_WHEEL_BASE, ) if DRIVE_MODE == "mecanum": drive_mecanum(_calib_vx, _calib_vy, _calib_om, fl_motor, fr_motor, rl_motor, rr_motor, compass, MOTOR_MAX) robot.step(timestep) _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1] _T = _calib_n * _DT _vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T _vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p) _result = ( f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) " f"steps={_calib_n}\n" f" gym displacement: dx={_xg-_x0:.4f} dy={_yg-_y0:.4f} " f"(vx={_vx_g:.3f} vy={_vy_g:.3f} m/s)\n" f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} " f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n" f" vx error: {_pct(_vx_w, _vx_g):.1f}% " f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n" ) print(_result) with open(_log_path, "a") as _f: _f.write(_result + "\n") # Write the run-done sentinel so run_webots.sh closes Webots cleanly. with open(RUN_DONE_FILE, "w") as _f: _f.write("calibrate\n") import sys as _sys; _sys.exit(0) while robot.step(timestep) != -1: step_count += 1 # Drain sheep emitter messages → GT (diagnostic only). while receiver.getQueueLength() > 0: msg = receiver.getString() receiver.nextPacket() parts = msg.split(":") if len(parts) == 4 and parts[0] == "sheep": try: _gt_sheep[parts[1]] = (float(parts[2]), float(parts[3])) except ValueError: pass pos = gps.getValues() dog_xy = (pos[0], pos[1]) n = compass.getValues() dog_heading = math.atan2(n[0], n[1]) # ---- LiDAR perception → tracker → active sheep positions ---- ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32) detections = detections_from_scan( ranges, dog_xy[0], dog_xy[1], dog_heading, detection_cfg=HERDING_WEBOTS.detection, lidar_cfg=_LIDAR_CFG, ) if USE_GT_PERCEPTION and _gt_sheep: # Bypass tracker: feed GT emitter positions directly to policy/teacher. sheep_positions = {k: v for k, v in _gt_sheep.items() if not is_penned(v[0], v[1])} tracker.update(detections) # still advance tracker for diagnostics else: sheep_positions = tracker.update(detections) sheep_xy_list = list(sheep_positions.values()) sheep_penned_list = [False] * len(sheep_xy_list) single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list) # ---- Action selection ---- omega = 0.0 if MODE in ("bc", "rl") and policy_handle is not None: if not sheep_positions: # BC/RL never saw "empty obs during operation" in training (empty # obs only happened at episode end), so the policy outputs ~zero # and the dog gets stuck. Fall back to an *active scan*: rotate # the desired heading slowly so the narrow 140° FOV sweeps the # field instead of charging in one fixed direction (which # otherwise drives the dog into the north wall and ends the run). scan_h = (step_count * 0.015) % (2.0 * math.pi) vx = 0.5 * math.cos(scan_h) vy = 0.5 * math.sin(scan_h) omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0 else: action = policy_handle.predict(single_obs) vx, vy = float(action[0]), float(action[1]) if DRIVE_MODE == "mecanum" and len(action) >= 3: omega = float(action[2]) else: result = analytic_teacher( dog_xy, dog_heading, sheep_positions, PEN_ENTRY, DRIVE_MODE, ) if len(result) == 4: vx, vy, omega, _mode_str = result else: vx, vy, _mode_str = result # Near-sheep speed modulation (shared by every mode). vx, vy = modulate_speed(vx, vy, dog_xy, sheep_positions) # Axis-split mask for the dual-dog setup: this dog only commits to # its assigned axis (x or y) so its partner covers the other. if DOG_AXIS == "x": vy = 0.0 elif DOG_AXIS == "y": vx = 0.0 # EMA smoothing — kills frame-to-frame action jitter. if DRIVE_MODE == "mecanum": vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega else: vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy # Safety: dog must never enter the pen. vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1]) prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy) if DRIVE_MODE == "mecanum": drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor, compass, MOTOR_MAX) else: drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX) emitter.send(f"dog:{DOG_NAME}:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}") # Cosmetic ear wiggle. ear_phase += 0.12 ear_pos = EAR_AMPLITUDE * math.sin(ear_phase) left_ear.setVelocity(EAR_RATE) right_ear.setVelocity(EAR_RATE) left_ear.setPosition(ear_pos) right_ear.setPosition(-ear_pos) # Auto-finish: when all GT sheep are penned, write the sentinel. # The launcher polls for it and closes Webots so batch evals don't # hang after the task is done. Bounded by `_gt_sheep` so we don't # fire during the first few steps while the receiver fills. if _gt_sheep and not _run_done: gt_active = sum(1 for x, y in _gt_sheep.values() if not is_penned(x, y)) if gt_active == 0: os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True) open(RUN_DONE_FILE, "w").close() _run_done = True print(f"[dog] all {len(_gt_sheep)} sheep penned at step " f"{step_count} — wrote sentinel, launcher will close Webots") if step_count % 200 == 0: gt_penned = sum(1 for x, y in _gt_sheep.values() if is_penned(x, y)) gt_total = len(_gt_sheep) common = (f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} " f"GT_penned={gt_penned}/{gt_total} " f"tracks_active={tracker.n_active()} " f"tracks_cand={tracker.n_candidate()} " f"tracks_penned={tracker.n_penned()} " f"detections={len(detections)}") if DRIVE_MODE == "mecanum": print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})") else: print(f"{common} action=({vx:+.2f}, {vy:+.2f})")