"""Shepherd Dog controller (Webots). Mode is selected by ``HERDING_MODE`` (env var, or via the ``herding_runtime.cfg`` file the launcher writes since Webots strips env vars on some setups): rl → load a BC-trained SB3 policy from HERDING_POLICY_DIR and use its (vx, vy) action each step. strombom → canonical Strömbom collect/drive heuristic. sequential → single-target "pin and push" — drives the sheep closest to the pen. All modes share the same low-level differential-drive controller (``herding.diffdrive.velocity_to_wheels`` with cos(err)-clamped forward speed), so switching modes does not retune actuation. A safety supervisor enforces the "dog stays out of the pen" invariant: if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is overridden with a north-driving correction. RL fallback: if the policy zip can't be loaded (SB3 missing, file missing), the controller drops to strombom mode automatically. """ import math import os import sys # --- Make the shared herding/ package importable from this controller dir --- _HERE = os.path.dirname(os.path.abspath(__file__)) _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", "..")) if _PROJECT_ROOT not in sys.path: sys.path.insert(0, _PROJECT_ROOT) from controller import Robot from herding.diffdrive import velocity_to_wheels from herding.geometry import ( DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA, DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS, PEN_ENTRY, ) from herding.obs import build_obs from herding.sequential import compute_action_debug as sequential_action_debug from herding.strombom import compute_action_debug as strombom_action_debug # --------------------------------------------------------------------------- # Mode selection # --------------------------------------------------------------------------- def _load_runtime_config(): """Read mode + policy_dir overrides from a runtime config file. Webots strips HERDING_* env vars in some configurations, so the launcher writes a tiny ``herding_runtime.cfg`` (key=value lines) in the project root and the controller reads it here. Env vars win if both are present; the file is the fallback. """ cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg") if not os.path.exists(cfg_path): return {} out = {} try: with open(cfg_path) as f: for line in f: line = line.strip() if not line or line.startswith("#") or "=" not in line: continue k, _, v = line.partition("=") out[k.strip().upper()] = v.strip() except OSError: return {} return out _runtime_cfg = _load_runtime_config() MODE = (os.environ.get("HERDING_MODE") or _runtime_cfg.get("HERDING_MODE") or "rl").lower() def _resolve_policy_dir() -> str: """Where to look for the trained policy. Priority: 1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points to a real directory. 2. ``training/runs/bc_flock`` — flock-style BC (current default; requires the tight-cohesion sheep regime). 3. ``training/runs/bc_solo`` — single-target BC (1-by-1 style; only works if ``herding/flocking_sim.py`` is reverted to the loose-cohesion regime). """ env_dir = (os.environ.get("HERDING_POLICY_DIR") or _runtime_cfg.get("HERDING_POLICY_DIR")) if env_dir and os.path.isdir(env_dir): return env_dir candidates = [ os.path.join(_PROJECT_ROOT, "training", "runs", "bc_flock"), os.path.join(_PROJECT_ROOT, "training", "runs", "bc_solo"), ] for c in candidates: if os.path.isdir(c): return c # Last resort — return env var anyway so error message is informative. return env_dir or candidates[0] _VALID_MODES = ("rl", "strombom", "sequential") if MODE not in _VALID_MODES: print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.") MODE = "strombom" POLICY_DIR = _resolve_policy_dir() policy_handle = None if MODE == "rl": print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}") try: from policy_loader import load as _load_policy policy_handle = _load_policy(POLICY_DIR) print(f"[dog] RL policy loaded from {POLICY_DIR}") except Exception as exc: print(f"[dog] RL policy load failed ({exc!r}); falling back to strombom.") MODE = "strombom" print(f"[dog] running in mode={MODE}") # --------------------------------------------------------------------------- # Action smoothing + safety supervisor # --------------------------------------------------------------------------- ACTION_SMOOTH = 0.35 prev_action = (0.0, 0.0) def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple: """If the dog is near the south barrier and the action would push it further south, override with a northward action. Hard invariant: the dog never enters the pen.""" if dog_y < DOG_SOUTH_LIMIT and vy < 0.0: return (0.0, 1.0) if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2: return (vx * 0.5, max(0.0, vy + 0.5)) return (vx, vy) # --------------------------------------------------------------------------- # Driving # --------------------------------------------------------------------------- def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float): if math.hypot(vx, vy) < 1e-3: left_motor.setVelocity(0.0) right_motor.setVelocity(0.0) return n = compass.getValues() h = math.atan2(n[0], n[1]) left, right = velocity_to_wheels( vx, vy, h, max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS, max_wheel_omega=motor_max, k_turn=4.0, ) left_motor.setVelocity(left) right_motor.setVelocity(right) # --------------------------------------------------------------------------- # Webots devices # --------------------------------------------------------------------------- robot = Robot() timestep = int(robot.getBasicTimeStep()) left_motor = robot.getDevice("left wheel motor") right_motor = robot.getDevice("right wheel motor") left_motor.setPosition(float("inf")) right_motor.setPosition(float("inf")) left_motor.setVelocity(0.0) right_motor.setVelocity(0.0) MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA) gps = robot.getDevice("gps"); gps.enable(timestep) compass = robot.getDevice("compass"); compass.enable(timestep) receiver = robot.getDevice("receiver"); receiver.enable(timestep) emitter = robot.getDevice("emitter") # Cosmetic ear motors — ignored by control logic but keep them animated. left_ear = robot.getDevice("left ear motor") right_ear = robot.getDevice("right ear motor") left_ear.setPosition(float("inf")) right_ear.setPosition(float("inf")) left_ear.setVelocity(0.0) right_ear.setVelocity(0.0) ear_phase = 0.0 EAR_AMPLITUDE = 0.35 EAR_RATE = 8.0 # --------------------------------------------------------------------------- # Main loop # --------------------------------------------------------------------------- # {name: (x, y)} — kept across all sheep ever heard from. Sheep that drift # into the pen are tracked by ``penned`` so observations and Strömbom # agree on which ones still need herding. sheep_positions: dict = {} penned_set: set = set() step_count = 0 from herding.geometry import is_penned_position while robot.step(timestep) != -1: step_count += 1 while receiver.getQueueLength() > 0: msg = receiver.getString() receiver.nextPacket() parts = msg.split(":") if len(parts) == 4 and parts[0] == "sheep": try: x, y = float(parts[2]), float(parts[3]) except ValueError: continue sheep_positions[parts[1]] = (x, y) if parts[1] not in penned_set and is_penned_position(x, y): penned_set.add(parts[1]) pos = gps.getValues() dog_xy = (pos[0], pos[1]) n = compass.getValues() dog_heading = math.atan2(n[0], n[1]) # ---- Action selection ---- if MODE == "rl" and policy_handle is not None: sheep_xy_list = list(sheep_positions.values()) sheep_names = list(sheep_positions.keys()) sheep_penned_list = [s in penned_set for s in sheep_names] obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list) action = policy_handle.predict(obs) vx, vy = float(action[0]), float(action[1]) elif MODE == "sequential": vx, vy, _mode_str, _dbg = sequential_action_debug( dog_xy, sheep_positions, PEN_ENTRY, ) else: # Strömbom (canonical baseline). vx, vy, _mode_str, _dbg = strombom_action_debug( dog_xy, sheep_positions, PEN_ENTRY, ) # EMA smoothing — reduces oscillation from policy or Strömbom flips. vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy # Safety: dog must never enter the pen. vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1]) prev_action = (vx, vy) drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX) emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}") # Cosmetic ear wiggle — purely visual. ear_phase += 0.12 ear_pos = EAR_AMPLITUDE * math.sin(ear_phase) left_ear.setVelocity(EAR_RATE) right_ear.setVelocity(EAR_RATE) left_ear.setPosition(ear_pos) right_ear.setPosition(-ear_pos) if step_count % 200 == 0: n_active = sum(1 for s in sheep_positions if s not in penned_set) print(f"[dog mode={MODE}] step={step_count} known={len(sheep_positions)} " f"penned={len(penned_set)} active={n_active} action=({vx:+.2f}, {vy:+.2f})")