Files
TIR_PROJ/controllers/shepherd_dog/shepherd_dog.py
T
Johnny Fernandes 27c0f65722 Mecanum Webots via Supervisor kinematic injection
Replace the failing ODE-rolled mecanum chassis dynamics with a
Supervisor.setVelocity call that uses the gym mecanum forward
kinematics formula directly. Wheel motors still spin (visual);
chassis motion comes from the gym model so training and deployment
match by construction.

Results (seed=42, n=10 sheep): BC + RL mecanum pen 10/10 in both
field and field_round. n=5 mecanum cells still 0/5 due to tracker
phantoms anchored to wall corners under the 360° LiDAR — documented
in docs/status.md as the remaining gap.

Cleanup: drop deploy-time hacks (HERDING_HEADING_*, HERDING_OMEGA_CLAMP,
HERDING_TRACKER_*) that were workarounds for the old ODE chaos;
revert the proto inertiaMatrix, roller dampingConstant, and reduced
motor torque since they no longer carry load; refresh comments
around the mecanum config presets.
2026-05-18 22:46:37 +00:00

676 lines
28 KiB
Python

"""Shepherd Dog controller (Webots).
Mode is selected by ``HERDING_MODE`` — read from the env var or from
the ``herding_runtime.cfg`` file the launcher writes (Webots strips
env vars from controller subprocesses on some setups):
strombom → canonical Strömbom (2014) collect/drive heuristic
wrapped in ActiveScanTeacher (opening rotation +
walk-to-centre when the tracker briefly empties)
sequential → single-target "pin-and-push", same wrapper
universal → mecanum-aware teacher (Strömbom + omega + recovery)
bc → behaviour-cloned MLP, trained on universal demos
rl → KL-regularised PPO fine-tune of `bc`
Policy directories are resolved by `policy_loader` from
``training/runs/{bc,rl}_{drive}_{world}`` with a fallback to
``training/runs/{bc,rl}`` (legacy single-policy paths).
Sheep perception
----------------
The dog perceives sheep through its front-mounted 140° LiDAR
(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step:
1. Read ``lidar.getRangeImage()``.
2. Cluster returns into world-frame ``(x, y)`` estimates
(``herding.perception.lidar_perception.detections_from_scan``).
3. Fold detections into a ``SheepTracker``, which maintains
last-seen positions for sheep currently out of FOV, requires
consensus across multiple frames before promoting a candidate
to a real track, and latches "penned" once a track crosses
the gate plane south.
Setting ``HERDING_USE_GT=1`` bypasses the tracker and feeds emitter
ground-truth positions to the policy — useful as a perception
ablation for the analytic baselines.
Sheep emitter messages are otherwise read for diagnostic logging
only (``GT_penned`` counter + auto-finish sentinel); the control
loop never depends on them.
Auto-finish
-----------
When every emitter-reported sheep is penned, the controller writes
``training/.run_done``. The launcher (``tools/run_webots.sh``)
detects the sentinel and closes Webots so headless sweep runs are
bounded.
"""
import math
import os
import sys
# --- Make the shared herding/ package importable from this controller dir ---
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
# --- Read runtime cfg early so env vars are set before geometry import ---
def _load_runtime_config():
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
if not os.path.exists(cfg_path):
return {}
out = {}
try:
with open(cfg_path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
out[k.strip().upper()] = v.strip()
except OSError:
return {}
return out
_runtime_cfg = _load_runtime_config()
# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
for _rk, _rv in _runtime_cfg.items():
if _rk.startswith("HERDING_") and _rk not in os.environ:
os.environ[_rk] = _rv
import numpy as np
from controller import Supervisor
from herding.control.active_scan import ActiveScanTeacher
from herding.control.modulation import modulate_speed
from herding.control.sequential import compute_action as sequential_action
from herding.control.strombom import compute_action as strombom_action
from herding.control.universal import compute_action as universal_action
from herding.perception.obs import build_obs
from herding.perception.lidar_perception import detections_from_scan
from herding.perception.sheep_tracker import SheepTracker
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
from herding.world.geometry import (
DOG_SOUTH_LIMIT,
PEN_ENTRY, is_penned,
)
from herding.config import (
HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
LIDAR_WEBOTS_360, RobotConfig,
)
# Robot physical constants come from RobotConfig so they stay in sync with
# the training environment. The Webots preset uses action_smooth=0.55.
# Mecanum picks the matched preset so kinematic injection uses the same
# strafe_efficiency/bleed values the policy was trained against.
_DRIVE_MODE_PEEK = (os.environ.get("HERDING_DRIVE")
or _runtime_cfg.get("HERDING_DRIVE")
or "differential").lower()
if _DRIVE_MODE_PEEK == "mecanum":
_ROBOT_CFG: RobotConfig = HERDING_MEC_WEBOTS_360.robot
else:
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x
DOG_WHEEL_BASE_Y = _ROBOT_CFG.wheel_base_y
DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
DOG_MAX_LINEAR = _ROBOT_CFG.max_linear
# ---------------------------------------------------------------------------
# Mode + policy resolution (cfg already loaded above)
# ---------------------------------------------------------------------------
MODE = (os.environ.get("HERDING_MODE")
or _runtime_cfg.get("HERDING_MODE")
or "bc").lower()
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
if MODE not in _VALID_MODES:
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
MODE = "strombom"
# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
or _runtime_cfg.get("HERDING_DRIVE")
or "differential").lower()
if DRIVE_MODE not in ("differential", "mecanum"):
print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
DRIVE_MODE = "differential"
# World shape — used to disambiguate the trained policy directory.
WORLD = (os.environ.get("HERDING_WORLD")
or _runtime_cfg.get("HERDING_WORLD")
or "field").lower()
# LiDAR FOV variant — "140" (default, ShepherdDog.proto) or "360"
# (ShepherdDog360.proto, FOV ablation). The launcher swaps the proto
# in the temp world file; the controller picks the matching lidar_cfg
# below so the perception pipeline interprets ray angles correctly.
LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR")
or _runtime_cfg.get("HERDING_LIDAR")
or "140").lower()
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
_LIDAR_CFG = HERDING_MEC_WEBOTS_360.lidar
elif LIDAR_FOV_VARIANT == "360":
_LIDAR_CFG = LIDAR_WEBOTS_360
else:
_LIDAR_CFG = HERDING_WEBOTS.lidar
# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
USE_GT_PERCEPTION = bool(int(
os.environ.get("HERDING_USE_GT")
or _runtime_cfg.get("HERDING_USE_GT", "0")
))
def _resolve_policy_dir(mode: str, drive: str, world: str) -> str:
"""Where to look for the trained policy for the given mode/drive/world.
Priority:
1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
to a real directory.
2. Canonical: training/runs/{bc,rl}_<drive>_<world>
3. Drive-only: training/runs/{bc,rl}_<drive>
4. Bare-mode: training/runs/{bc,rl}
The first existing directory wins; if none exist, the canonical
path is returned so the loader's error message is informative.
"""
env_dir = (os.environ.get("HERDING_POLICY_DIR")
or _runtime_cfg.get("HERDING_POLICY_DIR"))
if env_dir and os.path.isdir(env_dir):
return env_dir
base = "rl" if mode == "rl" else "bc"
runs = os.path.join(_PROJECT_ROOT, "training", "runs")
for cand in (f"{base}_{drive}_{world}", f"{base}_{drive}", base):
path = os.path.join(runs, cand)
if os.path.isdir(path):
return path
return os.path.join(runs, f"{base}_{drive}_{world}")
print(f"[dog] mode={MODE} drive={DRIVE_MODE} world={WORLD}")
POLICY_DIR = _resolve_policy_dir(MODE, DRIVE_MODE, WORLD)
policy_handle = None
if MODE in ("bc", "rl"):
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}")
try:
from policy_loader import load as _load_policy
policy_handle = _load_policy(POLICY_DIR)
print(f"[dog] policy loaded from {POLICY_DIR}")
except Exception as exc:
print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
MODE = "strombom"
# ---------------------------------------------------------------------------
# Control parameters
# ---------------------------------------------------------------------------
ACTION_SMOOTH = _ROBOT_CFG.action_smooth # EMA on (vx, vy) — kills frame-to-frame jitter
RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")
def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
"""If the dog is near the south barrier and the action would push it
further south, override with a northward action. Hard invariant: the
dog never enters the pen."""
if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
return (0.0, 1.0)
if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
return (vx * 0.5, max(0.0, vy + 0.5))
return (vx, vy)
def drive_diff(vx: float, vy: float, left_motor, right_motor,
compass, motor_max: float):
if math.hypot(vx, vy) < 1e-3:
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
return
n = compass.getValues()
h = math.atan2(n[0], n[1])
left, right = velocity_to_wheels(
vx, vy, h,
max_linear=DOG_MAX_LINEAR,
wheel_radius=DOG_WHEEL_RADIUS,
max_wheel_omega=motor_max,
k_turn=4.0,
)
left_motor.setVelocity(left)
right_motor.setVelocity(right)
def drive_mecanum(vx: float, vy: float, omega: float,
fl_motor, fr_motor, rl_motor, rr_motor,
compass, motor_max: float):
"""Drive the mecanum chassis kinematically.
The wheel motors are spun for visual fidelity, but the chassis
motion comes from Supervisor.setVelocity using the gym mecanum
forward-kinematics formula. Gym training and Webots deployment
therefore produce identical body motion.
"""
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
fl_motor.setVelocity(0.0)
fr_motor.setVelocity(0.0)
rl_motor.setVelocity(0.0)
rr_motor.setVelocity(0.0)
if _self_node is not None:
_self_node.setVelocity([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
return
n = compass.getValues()
h = math.atan2(n[0], n[1])
w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
vx, vy, omega, h,
max_linear=DOG_MAX_LINEAR,
wheel_radius=DOG_WHEEL_RADIUS,
lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
max_wheel_omega=motor_max,
k_turn=4.0,
wheel_base=DOG_WHEEL_BASE,
)
fl_motor.setVelocity(w_fl)
fr_motor.setVelocity(w_fr)
rl_motor.setVelocity(w_rl)
rr_motor.setVelocity(w_rr)
# Kinematic body injection — derive body velocity from the same
# wheel speeds using the gym forward-kinematics formula and the
# active preset's strafe/bleed parameters.
if _self_node is not None:
r = DOG_WHEEL_RADIUS
vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
vy_body = vy_body_ideal * _ROBOT_CFG.strafe_efficiency
if _ROBOT_CFG.strafe_to_forward_bleed != 0.0:
vx_body += _ROBOT_CFG.strafe_to_forward_bleed * abs(vy_body_ideal)
omega_body = (-w_fl + w_fr - w_rl + w_rr) * r / (
4.0 * (DOG_WHEEL_BASE_X / 2.0 + DOG_WHEEL_BASE_Y / 2.0))
cos_h, sin_h = math.cos(h), math.sin(h)
vx_w = vx_body * cos_h - vy_body * sin_h
vy_w = vx_body * sin_h + vy_body * cos_h
_self_node.setVelocity([vx_w, vy_w, 0.0, 0.0, 0.0, omega_body])
# ---------------------------------------------------------------------------
# Webots devices
# ---------------------------------------------------------------------------
robot = Supervisor()
timestep = int(robot.getBasicTimeStep())
# Mecanum uses Supervisor.setVelocity for chassis motion (see
# drive_mecanum); diff-drive keeps full ODE simulation.
_self_node = robot.getSelf() if DRIVE_MODE == "mecanum" else None
# Multi-shepherd axis split. When the launcher creates two dog instances
# it sets each robot's customData to "axis=x" or "axis=y"; the controller
# then attenuates the off-axis component of every action so the two
# dogs share the herding workload along orthogonal axes. customData
# empty = single-dog mode (no masking).
#
# HERDING_AXIS_LEAK controls how strict the mask is:
# 0.0 → hard mask (off-axis component zeroed; pure axis-split)
# 1.0 → no mask (both dogs run full action; equivalent to NDOGS=2
# without axis assignment)
# Defaults to 0.3 — empirically the 100/0 strict mask deadlocks once
# both dogs reach their drive standoff (the Strömbom target shrinks
# and each dog has only one degree of freedom). A small leak keeps
# pressure on the flock while preserving the "one dog leads each
# axis" coordination story.
_AXIS_TAG = (robot.getCustomData() or "").strip().lower()
if _AXIS_TAG.startswith("axis="):
DOG_AXIS = _AXIS_TAG[5:]
if DOG_AXIS not in ("x", "y"):
print(f"[dog] unknown axis={DOG_AXIS!r} in customData; ignoring.")
DOG_AXIS = None
else:
DOG_AXIS = None
try:
AXIS_LEAK = float(os.environ.get("HERDING_AXIS_LEAK")
or _runtime_cfg.get("HERDING_AXIS_LEAK", "0.3"))
except ValueError:
AXIS_LEAK = 0.3
AXIS_LEAK = max(0.0, min(1.0, AXIS_LEAK))
DOG_NAME = robot.getName()
print(f"[dog] name={DOG_NAME} axis={DOG_AXIS} leak={AXIS_LEAK:.2f}")
if DRIVE_MODE == "mecanum":
fl_motor = robot.getDevice("front left wheel motor")
fr_motor = robot.getDevice("front right wheel motor")
rl_motor = robot.getDevice("rear left wheel motor")
rr_motor = robot.getDevice("rear right wheel motor")
for m in (fl_motor, fr_motor, rl_motor, rr_motor):
m.setPosition(float("inf"))
m.setVelocity(0.0)
MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
else:
left_motor = robot.getDevice("left wheel motor")
right_motor = robot.getDevice("right wheel motor")
left_motor.setPosition(float("inf"))
right_motor.setPosition(float("inf"))
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
gps = robot.getDevice("gps"); gps.enable(timestep)
compass = robot.getDevice("compass"); compass.enable(timestep)
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")
lidar = robot.getDevice("lidar"); lidar.enable(timestep)
# Tracker config: pick the preset that matches the (drive, lidar) combo
# so the tracker's consensus parameters match what the policy was
# trained against.
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
_TRACKER_CFG = HERDING_MEC_WEBOTS_360.tracker
elif DRIVE_MODE == "mecanum":
_TRACKER_CFG = HERDING_MEC_WEBOTS.tracker
else:
_TRACKER_CFG = HERDING_WEBOTS.tracker
tracker = SheepTracker(tracker_cfg=_TRACKER_CFG)
# Cosmetic ear motors — animated; not used by control.
left_ear = robot.getDevice("left ear motor")
right_ear = robot.getDevice("right ear motor")
left_ear.setPosition(float("inf"))
right_ear.setPosition(float("inf"))
left_ear.setVelocity(0.0)
right_ear.setVelocity(0.0)
ear_phase = 0.0
EAR_AMPLITUDE = 0.35
EAR_RATE = 8.0
# ---------------------------------------------------------------------------
# Main loop
# ---------------------------------------------------------------------------
# Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay
# the import-time cost). Each gets the same ActiveScanTeacher treatment:
# rotate-on-empty, walk-to-centre, near-sheep speed modulation.
analytic_teacher = None
if MODE in ("strombom", "sequential"):
base_fn = strombom_action if MODE == "strombom" else sequential_action
analytic_teacher = ActiveScanTeacher(base_fn)
elif MODE == "universal":
analytic_teacher = ActiveScanTeacher(universal_action)
# Optional deterministic seed for the controller's RNG. The sheep
# controller seeds itself the same way, so identical HERDING_SEED
# values give reproducible trials. If unset (empty), Python uses its
# time-based default and runs are non-deterministic.
import random as _random
_seed_raw = (os.environ.get("HERDING_SEED")
or _runtime_cfg.get("HERDING_SEED")
or "").strip()
if _seed_raw:
try:
HERDING_SEED = int(_seed_raw)
except ValueError:
HERDING_SEED = None
print(f"[dog] could not parse HERDING_SEED={_seed_raw!r}; using random")
else:
_random.seed(HERDING_SEED)
else:
HERDING_SEED = None
# GT positions from sheep emitters — used **only** for the auto-finish
# sentinel, the GT_penned diagnostic line, and the per-sheep pen-time
# metrics printed at end of run. Never fed into control.
_gt_sheep: dict = {}
_pen_step: dict = {} # sheep name -> step at which it first became penned
_run_done = False
_t_start = None # step at which we first see GT positions (sim start)
prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
step_count = 0
# ---------------------------------------------------------------------------
# Calibration mode — apply fixed action, measure GPS displacement, compare
# against gym kinematics prediction, write results to calibrate_mecanum.log.
# ---------------------------------------------------------------------------
if MODE == "calibrate":
import json as _json
_calib_vx = float(os.environ.get("CALIB_VX", "0.5"))
_calib_vy = float(os.environ.get("CALIB_VY", "0.0"))
_calib_om = float(os.environ.get("CALIB_OM", "0.0"))
_calib_n = int(os.environ.get("CALIB_N_STEPS", "150"))
_log_path = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
# Settle for 5 steps so GPS stabilises.
for _ in range(5):
robot.step(timestep)
_pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
_n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
# Gym-predicted displacement using shared kinematics.
from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_step
from herding.world.geometry import WEBOTS_DT as _DT
_xg, _yg, _hg = _x0, _y0, _h0
for _ in range(_calib_n):
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
_calib_vx, _calib_vy, _calib_om, _hg,
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
wheel_base=DOG_WHEEL_BASE,
)
_xg, _yg, _hg = mecanum_step(
_xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
)
# Run actual Webots steps.
for _ in range(_calib_n):
_nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
_calib_vx, _calib_vy, _calib_om, _h,
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
wheel_base=DOG_WHEEL_BASE,
)
if DRIVE_MODE == "mecanum":
drive_mecanum(_calib_vx, _calib_vy, _calib_om,
fl_motor, fr_motor, rl_motor, rr_motor,
compass, MOTOR_MAX)
robot.step(timestep)
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
_nv1 = compass.getValues(); _h1 = math.atan2(_nv1[0], _nv1[1])
_T = _calib_n * _DT
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
_dh_deg = math.degrees(math.atan2(math.sin(_h1 - _h0),
math.cos(_h1 - _h0)))
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
_result = (
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
f"steps={_calib_n}\n"
f" gym displacement: dx={_xg-_x0:.4f} dy={_yg-_y0:.4f} "
f"(vx={_vx_g:.3f} vy={_vy_g:.3f} m/s)\n"
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
f"vy error: {_pct(_vy_w, _vy_g):.1f}% "
f"heading drift: {_dh_deg:+.1f}°\n"
)
print(_result)
with open(_log_path, "a") as _f:
_f.write(_result + "\n")
# Write the run-done sentinel so run_webots.sh closes Webots cleanly.
with open(RUN_DONE_FILE, "w") as _f:
_f.write("calibrate\n")
import sys as _sys; _sys.exit(0)
while robot.step(timestep) != -1:
step_count += 1
# Drain sheep emitter messages → GT (diagnostic only).
while receiver.getQueueLength() > 0:
msg = receiver.getString()
receiver.nextPacket()
parts = msg.split(":")
if len(parts) == 4 and parts[0] == "sheep":
try:
_gt_sheep[parts[1]] = (float(parts[2]), float(parts[3]))
except ValueError:
pass
pos = gps.getValues()
dog_xy = (pos[0], pos[1])
n = compass.getValues()
dog_heading = math.atan2(n[0], n[1])
# ---- LiDAR perception → tracker → active sheep positions ----
ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
detections = detections_from_scan(
ranges, dog_xy[0], dog_xy[1], dog_heading,
detection_cfg=HERDING_WEBOTS.detection,
lidar_cfg=_LIDAR_CFG,
)
if USE_GT_PERCEPTION and _gt_sheep:
# Bypass tracker: feed GT emitter positions directly to policy/teacher.
sheep_positions = {k: v for k, v in _gt_sheep.items()
if not is_penned(v[0], v[1])}
tracker.update(detections) # still advance tracker for diagnostics
else:
sheep_positions = tracker.update(detections)
sheep_xy_list = list(sheep_positions.values())
sheep_penned_list = [False] * len(sheep_xy_list)
single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
# ---- Action selection ----
omega = 0.0
if MODE in ("bc", "rl") and policy_handle is not None:
if not sheep_positions:
# BC/RL never saw "empty obs during operation" in training (empty
# obs only happened at episode end), so the policy outputs ~zero
# and the dog gets stuck. Fall back to an *active scan*: rotate
# the desired heading slowly so the narrow 140° FOV sweeps the
# field instead of charging in one fixed direction (which
# otherwise drives the dog into the north wall and ends the run).
scan_h = (step_count * 0.015) % (2.0 * math.pi)
vx = 0.5 * math.cos(scan_h)
vy = 0.5 * math.sin(scan_h)
omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
else:
action = policy_handle.predict(single_obs)
vx, vy = float(action[0]), float(action[1])
if DRIVE_MODE == "mecanum" and len(action) >= 3:
omega = float(action[2])
else:
result = analytic_teacher(
dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
DRIVE_MODE,
)
if len(result) == 4:
vx, vy, omega, _mode_str = result
else:
vx, vy, _mode_str = result
# Near-sheep speed modulation (shared by every mode).
vx, vy = modulate_speed(vx, vy, dog_xy, sheep_positions)
# Axis-split mask for the dual-dog setup: this dog leads its
# assigned axis (full gain) and contributes AXIS_LEAK on the
# off-axis. With LEAK=0 the mask is strict; with LEAK=1 the dogs
# run identical full-power policies.
if DOG_AXIS == "x":
vy *= AXIS_LEAK
elif DOG_AXIS == "y":
vx *= AXIS_LEAK
# EMA smoothing — kills frame-to-frame action jitter.
if DRIVE_MODE == "mecanum":
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
else:
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
# Safety: dog must never enter the pen.
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)
if DRIVE_MODE == "mecanum":
drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
compass, MOTOR_MAX)
else:
drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
emitter.send(f"dog:{DOG_NAME}:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
# Cosmetic ear wiggle.
ear_phase += 0.12
ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
left_ear.setVelocity(EAR_RATE)
right_ear.setVelocity(EAR_RATE)
left_ear.setPosition(ear_pos)
right_ear.setPosition(-ear_pos)
# First step we have GT visibility — record the simulation start
# so per-sheep pen times can be reported relative to it.
if _gt_sheep and _t_start is None:
_t_start = step_count
# Record the first step at which each sheep is observed penned.
for _sname, (_sx, _sy) in _gt_sheep.items():
if _sname not in _pen_step and is_penned(_sx, _sy):
_pen_step[_sname] = step_count
# Auto-finish: when all GT sheep are penned, write the sentinel
# and print the per-sheep penning summary so the operator sees
# the metrics in the terminal. The launcher polls for the
# sentinel and closes Webots cleanly.
if _gt_sheep and not _run_done:
gt_active = sum(1 for x, y in _gt_sheep.values()
if not is_penned(x, y))
if gt_active == 0:
os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True)
open(RUN_DONE_FILE, "w").close()
_run_done = True
print(f"[dog] all {len(_gt_sheep)} sheep penned at step "
f"{step_count} — wrote sentinel, launcher will close Webots")
# Only the first dog to detect the finish prints the
# summary (in dual-dog mode both run in lock-step but the
# sentinel acts as a one-shot lock).
_dt = 0.016 # Webots basicTimeStep, seconds
print("")
print(f"[results] mode={MODE} drive={DRIVE_MODE} world={WORLD} "
f"lidar={LIDAR_FOV_VARIANT} dogs={DOG_NAME}"
+ (f" seed={HERDING_SEED}" if HERDING_SEED is not None else ""))
print(f"[results] total steps: {step_count} "
f"({step_count * _dt:.1f} s simulated)")
ordered = sorted(_pen_step.items(), key=lambda kv: kv[1])
for i, (sn, st) in enumerate(ordered, 1):
rel = st - (_t_start or 0)
print(f"[results] #{i} {sn:8s} penned at step {st:>6d} "
f"({rel * _dt:6.2f} s)")
if len(ordered) >= 2:
first = ordered[0][1]
last = ordered[-1][1]
print(f"[results] gate spread: {last - first} steps "
f"({(last - first) * _dt:.2f} s) between first and last pen")
if step_count % 200 == 0:
gt_penned = sum(1 for x, y in _gt_sheep.values()
if is_penned(x, y))
gt_total = len(_gt_sheep)
common = (f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
f"GT_penned={gt_penned}/{gt_total} "
f"tracks_active={tracker.n_active()} "
f"tracks_cand={tracker.n_candidate()} "
f"tracks_penned={tracker.n_penned()} "
f"detections={len(detections)} "
f"h={math.degrees(dog_heading):+.1f}°"
+ (f"→{math.degrees(dog_heading):+.1f}°" if _h_ema > 0 else ""))
if DRIVE_MODE == "mecanum":
print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
else:
print(f"{common} action=({vx:+.2f}, {vy:+.2f})")