7ab69ab0f3
Naming pass: rename functions whose third+ segment is redundant or implementation-detail, sticking to the codebase's preferred ``noun_verb`` / ``verb_noun`` two-concept idiom. Renames are atomic across definitions, callers, and tests. is_penned_position → is_penned modulate_speed_near_sheep → modulate_speed mecanum_kinematics_step → mecanum_step policy_forward_mean → forward_mean Two-concept patterns like ``velocity_to_wheels`` / ``detections_from_scan`` / ``make_strombom_predictor`` are left alone — they're idiomatic converters / factories that read as a single concept, and the longer form aids grep-ability. Docstring polish: * ``herding/config.py`` header drops the "previously lived as a module-level literal" historical framing — we ship as a single thing, so the refactor anecdote no longer earns its keep. The usage examples now mention both ``HERDING_WEBOTS`` and ``HERDING_MEC_WEBOTS`` presets. 126 pytest cases still pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
514 lines
21 KiB
Python
514 lines
21 KiB
Python
"""Shepherd Dog controller (Webots).
|
|
|
|
Mode is selected by ``HERDING_MODE`` — read from the env var or from
|
|
the ``herding_runtime.cfg`` file the launcher writes (Webots strips
|
|
env vars from controller subprocesses on some setups):
|
|
|
|
strombom → canonical Strömbom (2014) collect/drive heuristic
|
|
wrapped in ActiveScanTeacher (opening rotation +
|
|
walk-to-centre when the tracker briefly empties)
|
|
sequential → single-target "pin-and-push", same wrapper
|
|
universal → mecanum-aware teacher (Strömbom + omega + recovery)
|
|
bc → behaviour-cloned MLP, trained on universal demos
|
|
rl → KL-regularised PPO fine-tune of `bc`
|
|
|
|
Policy directories are resolved by `policy_loader` from
|
|
``training/runs/{bc,rl}_{drive}_{world}`` with a fallback to
|
|
``training/runs/{bc,rl}`` (legacy single-policy paths).
|
|
|
|
Sheep perception
|
|
----------------
|
|
The dog perceives sheep through its front-mounted 140° LiDAR
|
|
(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step:
|
|
|
|
1. Read ``lidar.getRangeImage()``.
|
|
2. Cluster returns into world-frame ``(x, y)`` estimates
|
|
(``herding.perception.lidar_perception.detections_from_scan``).
|
|
3. Fold detections into a ``SheepTracker``, which maintains
|
|
last-seen positions for sheep currently out of FOV, requires
|
|
consensus across multiple frames before promoting a candidate
|
|
to a real track, and latches "penned" once a track crosses
|
|
the gate plane south.
|
|
|
|
Setting ``HERDING_USE_GT=1`` bypasses the tracker and feeds emitter
|
|
ground-truth positions to the policy — useful as a perception
|
|
ablation for the analytic baselines.
|
|
|
|
Sheep emitter messages are otherwise read for diagnostic logging
|
|
only (``GT_penned`` counter + auto-finish sentinel); the control
|
|
loop never depends on them.
|
|
|
|
Auto-finish
|
|
-----------
|
|
When every emitter-reported sheep is penned, the controller writes
|
|
``training/.run_done``. The launcher (``tools/run_webots.sh``)
|
|
detects the sentinel and closes Webots so headless sweep runs are
|
|
bounded.
|
|
"""
|
|
|
|
import math
|
|
import os
|
|
import sys
|
|
|
|
# --- Make the shared herding/ package importable from this controller dir ---
|
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
|
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
|
if _PROJECT_ROOT not in sys.path:
|
|
sys.path.insert(0, _PROJECT_ROOT)
|
|
|
|
# --- Read runtime cfg early so env vars are set before geometry import ---
|
|
def _load_runtime_config():
|
|
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
|
|
if not os.path.exists(cfg_path):
|
|
return {}
|
|
out = {}
|
|
try:
|
|
with open(cfg_path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line or line.startswith("#") or "=" not in line:
|
|
continue
|
|
k, _, v = line.partition("=")
|
|
out[k.strip().upper()] = v.strip()
|
|
except OSError:
|
|
return {}
|
|
return out
|
|
|
|
_runtime_cfg = _load_runtime_config()
|
|
# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
|
|
for _rk, _rv in _runtime_cfg.items():
|
|
if _rk.startswith("HERDING_") and _rk not in os.environ:
|
|
os.environ[_rk] = _rv
|
|
|
|
import numpy as np
|
|
|
|
from controller import Robot
|
|
|
|
from herding.control.active_scan import ActiveScanTeacher
|
|
from herding.control.modulation import modulate_speed
|
|
from herding.control.sequential import compute_action as sequential_action
|
|
from herding.control.strombom import compute_action as strombom_action
|
|
from herding.control.universal import compute_action as universal_action
|
|
from herding.perception.obs import build_obs
|
|
from herding.perception.lidar_perception import detections_from_scan
|
|
from herding.perception.sheep_tracker import SheepTracker
|
|
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
|
|
from herding.world.geometry import (
|
|
DOG_SOUTH_LIMIT,
|
|
PEN_ENTRY, is_penned,
|
|
)
|
|
from herding.config import HERDING_WEBOTS, RobotConfig
|
|
|
|
# Robot physical constants come from RobotConfig so they stay in sync with
|
|
# the training environment. The Webots preset uses action_smooth=0.55.
|
|
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
|
|
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
|
|
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
|
|
DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x
|
|
DOG_WHEEL_BASE_Y = _ROBOT_CFG.wheel_base_y
|
|
DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
|
|
DOG_MAX_LINEAR = _ROBOT_CFG.max_linear
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Mode + policy resolution (cfg already loaded above)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
MODE = (os.environ.get("HERDING_MODE")
|
|
or _runtime_cfg.get("HERDING_MODE")
|
|
or "bc").lower()
|
|
|
|
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
|
|
if MODE not in _VALID_MODES:
|
|
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
|
MODE = "strombom"
|
|
|
|
# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
|
|
DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
|
|
or _runtime_cfg.get("HERDING_DRIVE")
|
|
or "differential").lower()
|
|
if DRIVE_MODE not in ("differential", "mecanum"):
|
|
print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
|
|
DRIVE_MODE = "differential"
|
|
|
|
# World shape — used to disambiguate the trained policy directory.
|
|
WORLD = (os.environ.get("HERDING_WORLD")
|
|
or _runtime_cfg.get("HERDING_WORLD")
|
|
or "field").lower()
|
|
|
|
# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
|
|
# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
|
|
USE_GT_PERCEPTION = bool(int(
|
|
os.environ.get("HERDING_USE_GT")
|
|
or _runtime_cfg.get("HERDING_USE_GT", "0")
|
|
))
|
|
|
|
|
|
def _resolve_policy_dir(mode: str, drive: str, world: str) -> str:
|
|
"""Where to look for the trained policy for the given mode/drive/world.
|
|
|
|
Priority:
|
|
1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
|
|
to a real directory.
|
|
2. Canonical: training/runs/{bc,rl}_<drive>_<world>
|
|
3. Drive-only: training/runs/{bc,rl}_<drive>
|
|
4. Bare-mode: training/runs/{bc,rl}
|
|
The first existing directory wins; if none exist, the canonical
|
|
path is returned so the loader's error message is informative.
|
|
"""
|
|
env_dir = (os.environ.get("HERDING_POLICY_DIR")
|
|
or _runtime_cfg.get("HERDING_POLICY_DIR"))
|
|
if env_dir and os.path.isdir(env_dir):
|
|
return env_dir
|
|
base = "rl" if mode == "rl" else "bc"
|
|
runs = os.path.join(_PROJECT_ROOT, "training", "runs")
|
|
for cand in (f"{base}_{drive}_{world}", f"{base}_{drive}", base):
|
|
path = os.path.join(runs, cand)
|
|
if os.path.isdir(path):
|
|
return path
|
|
return os.path.join(runs, f"{base}_{drive}_{world}")
|
|
|
|
|
|
print(f"[dog] mode={MODE} drive={DRIVE_MODE} world={WORLD}")
|
|
|
|
POLICY_DIR = _resolve_policy_dir(MODE, DRIVE_MODE, WORLD)
|
|
policy_handle = None
|
|
if MODE in ("bc", "rl"):
|
|
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}")
|
|
try:
|
|
from policy_loader import load as _load_policy
|
|
policy_handle = _load_policy(POLICY_DIR)
|
|
print(f"[dog] policy loaded from {POLICY_DIR}")
|
|
except Exception as exc:
|
|
print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
|
|
MODE = "strombom"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Control parameters
|
|
# ---------------------------------------------------------------------------
|
|
|
|
ACTION_SMOOTH = _ROBOT_CFG.action_smooth # EMA on (vx, vy) — kills frame-to-frame jitter
|
|
RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")
|
|
|
|
|
|
def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
|
|
"""If the dog is near the south barrier and the action would push it
|
|
further south, override with a northward action. Hard invariant: the
|
|
dog never enters the pen."""
|
|
if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
|
|
return (0.0, 1.0)
|
|
if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
|
|
return (vx * 0.5, max(0.0, vy + 0.5))
|
|
return (vx, vy)
|
|
|
|
|
|
def drive_diff(vx: float, vy: float, left_motor, right_motor,
|
|
compass, motor_max: float):
|
|
if math.hypot(vx, vy) < 1e-3:
|
|
left_motor.setVelocity(0.0)
|
|
right_motor.setVelocity(0.0)
|
|
return
|
|
n = compass.getValues()
|
|
h = math.atan2(n[0], n[1])
|
|
left, right = velocity_to_wheels(
|
|
vx, vy, h,
|
|
max_linear=DOG_MAX_LINEAR,
|
|
wheel_radius=DOG_WHEEL_RADIUS,
|
|
max_wheel_omega=motor_max,
|
|
k_turn=4.0,
|
|
)
|
|
left_motor.setVelocity(left)
|
|
right_motor.setVelocity(right)
|
|
|
|
|
|
def drive_mecanum(vx: float, vy: float, omega: float,
|
|
fl_motor, fr_motor, rl_motor, rr_motor,
|
|
compass, motor_max: float):
|
|
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
|
|
fl_motor.setVelocity(0.0)
|
|
fr_motor.setVelocity(0.0)
|
|
rl_motor.setVelocity(0.0)
|
|
rr_motor.setVelocity(0.0)
|
|
return
|
|
n = compass.getValues()
|
|
h = math.atan2(n[0], n[1])
|
|
w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
|
|
vx, vy, omega, h,
|
|
max_linear=DOG_MAX_LINEAR,
|
|
wheel_radius=DOG_WHEEL_RADIUS,
|
|
lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
|
|
max_wheel_omega=motor_max,
|
|
k_turn=4.0,
|
|
wheel_base=DOG_WHEEL_BASE,
|
|
)
|
|
fl_motor.setVelocity(w_fl)
|
|
fr_motor.setVelocity(w_fr)
|
|
rl_motor.setVelocity(w_rl)
|
|
rr_motor.setVelocity(w_rr)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Webots devices
|
|
# ---------------------------------------------------------------------------
|
|
|
|
robot = Robot()
|
|
timestep = int(robot.getBasicTimeStep())
|
|
|
|
if DRIVE_MODE == "mecanum":
|
|
fl_motor = robot.getDevice("front left wheel motor")
|
|
fr_motor = robot.getDevice("front right wheel motor")
|
|
rl_motor = robot.getDevice("rear left wheel motor")
|
|
rr_motor = robot.getDevice("rear right wheel motor")
|
|
for m in (fl_motor, fr_motor, rl_motor, rr_motor):
|
|
m.setPosition(float("inf"))
|
|
m.setVelocity(0.0)
|
|
MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
|
else:
|
|
left_motor = robot.getDevice("left wheel motor")
|
|
right_motor = robot.getDevice("right wheel motor")
|
|
left_motor.setPosition(float("inf"))
|
|
right_motor.setPosition(float("inf"))
|
|
left_motor.setVelocity(0.0)
|
|
right_motor.setVelocity(0.0)
|
|
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
|
|
|
gps = robot.getDevice("gps"); gps.enable(timestep)
|
|
compass = robot.getDevice("compass"); compass.enable(timestep)
|
|
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
|
emitter = robot.getDevice("emitter")
|
|
lidar = robot.getDevice("lidar"); lidar.enable(timestep)
|
|
|
|
tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)
|
|
|
|
# Cosmetic ear motors — animated; not used by control.
|
|
left_ear = robot.getDevice("left ear motor")
|
|
right_ear = robot.getDevice("right ear motor")
|
|
left_ear.setPosition(float("inf"))
|
|
right_ear.setPosition(float("inf"))
|
|
left_ear.setVelocity(0.0)
|
|
right_ear.setVelocity(0.0)
|
|
ear_phase = 0.0
|
|
EAR_AMPLITUDE = 0.35
|
|
EAR_RATE = 8.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main loop
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay
|
|
# the import-time cost). Each gets the same ActiveScanTeacher treatment:
|
|
# rotate-on-empty, walk-to-centre, near-sheep speed modulation.
|
|
analytic_teacher = None
|
|
if MODE in ("strombom", "sequential"):
|
|
base_fn = strombom_action if MODE == "strombom" else sequential_action
|
|
analytic_teacher = ActiveScanTeacher(base_fn)
|
|
elif MODE == "universal":
|
|
analytic_teacher = ActiveScanTeacher(universal_action)
|
|
|
|
# GT positions from sheep emitters — used **only** for the auto-finish
|
|
# sentinel and the GT_penned diagnostic line. Never fed into control.
|
|
_gt_sheep: dict = {}
|
|
_run_done = False
|
|
|
|
prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
|
|
step_count = 0
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Calibration mode — apply fixed action, measure GPS displacement, compare
|
|
# against gym kinematics prediction, write results to calibrate_mecanum.log.
|
|
# ---------------------------------------------------------------------------
|
|
if MODE == "calibrate":
|
|
import json as _json
|
|
_calib_vx = float(os.environ.get("CALIB_VX", "0.5"))
|
|
_calib_vy = float(os.environ.get("CALIB_VY", "0.0"))
|
|
_calib_om = float(os.environ.get("CALIB_OM", "0.0"))
|
|
_calib_n = int(os.environ.get("CALIB_N_STEPS", "150"))
|
|
_log_path = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
|
|
# Settle for 5 steps so GPS stabilises.
|
|
for _ in range(5):
|
|
robot.step(timestep)
|
|
_pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
|
|
_n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
|
|
# Gym-predicted displacement using shared kinematics.
|
|
from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_step
|
|
from herding.world.geometry import WEBOTS_DT as _DT
|
|
_xg, _yg, _hg = _x0, _y0, _h0
|
|
for _ in range(_calib_n):
|
|
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
|
|
_calib_vx, _calib_vy, _calib_om, _hg,
|
|
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
|
|
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
|
|
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
|
|
wheel_base=DOG_WHEEL_BASE,
|
|
)
|
|
_xg, _yg, _hg = mecanum_step(
|
|
_xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
|
|
DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
|
|
)
|
|
# Run actual Webots steps.
|
|
for _ in range(_calib_n):
|
|
_nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
|
|
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
|
|
_calib_vx, _calib_vy, _calib_om, _h,
|
|
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
|
|
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
|
|
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
|
|
wheel_base=DOG_WHEEL_BASE,
|
|
)
|
|
if DRIVE_MODE == "mecanum":
|
|
drive_mecanum(_calib_vx, _calib_vy, _calib_om,
|
|
fl_motor, fr_motor, rl_motor, rr_motor,
|
|
compass, MOTOR_MAX)
|
|
robot.step(timestep)
|
|
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
|
|
_T = _calib_n * _DT
|
|
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
|
|
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
|
|
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
|
|
_result = (
|
|
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
|
|
f"steps={_calib_n}\n"
|
|
f" gym displacement: dx={_xg-_x0:.4f} dy={_yg-_y0:.4f} "
|
|
f"(vx={_vx_g:.3f} vy={_vy_g:.3f} m/s)\n"
|
|
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
|
|
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
|
|
f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
|
|
f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
|
|
)
|
|
print(_result)
|
|
with open(_log_path, "a") as _f:
|
|
_f.write(_result + "\n")
|
|
# Write the run-done sentinel so run_webots.sh closes Webots cleanly.
|
|
with open(RUN_DONE_FILE, "w") as _f:
|
|
_f.write("calibrate\n")
|
|
import sys as _sys; _sys.exit(0)
|
|
|
|
while robot.step(timestep) != -1:
|
|
step_count += 1
|
|
|
|
# Drain sheep emitter messages → GT (diagnostic only).
|
|
while receiver.getQueueLength() > 0:
|
|
msg = receiver.getString()
|
|
receiver.nextPacket()
|
|
parts = msg.split(":")
|
|
if len(parts) == 4 and parts[0] == "sheep":
|
|
try:
|
|
_gt_sheep[parts[1]] = (float(parts[2]), float(parts[3]))
|
|
except ValueError:
|
|
pass
|
|
|
|
pos = gps.getValues()
|
|
dog_xy = (pos[0], pos[1])
|
|
n = compass.getValues()
|
|
dog_heading = math.atan2(n[0], n[1])
|
|
|
|
# ---- LiDAR perception → tracker → active sheep positions ----
|
|
ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
|
|
detections = detections_from_scan(
|
|
ranges, dog_xy[0], dog_xy[1], dog_heading,
|
|
detection_cfg=HERDING_WEBOTS.detection,
|
|
lidar_cfg=HERDING_WEBOTS.lidar,
|
|
)
|
|
if USE_GT_PERCEPTION and _gt_sheep:
|
|
# Bypass tracker: feed GT emitter positions directly to policy/teacher.
|
|
sheep_positions = {k: v for k, v in _gt_sheep.items()
|
|
if not is_penned(v[0], v[1])}
|
|
tracker.update(detections) # still advance tracker for diagnostics
|
|
else:
|
|
sheep_positions = tracker.update(detections)
|
|
|
|
sheep_xy_list = list(sheep_positions.values())
|
|
sheep_penned_list = [False] * len(sheep_xy_list)
|
|
single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
|
|
|
|
# ---- Action selection ----
|
|
omega = 0.0
|
|
if MODE in ("bc", "rl") and policy_handle is not None:
|
|
if not sheep_positions:
|
|
# BC/RL never saw "empty obs during operation" in training (empty
|
|
# obs only happened at episode end), so the policy outputs ~zero
|
|
# and the dog gets stuck. Fall back to an *active scan*: rotate
|
|
# the desired heading slowly so the narrow 140° FOV sweeps the
|
|
# field instead of charging in one fixed direction (which
|
|
# otherwise drives the dog into the north wall and ends the run).
|
|
scan_h = (step_count * 0.015) % (2.0 * math.pi)
|
|
vx = 0.5 * math.cos(scan_h)
|
|
vy = 0.5 * math.sin(scan_h)
|
|
omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
|
|
else:
|
|
action = policy_handle.predict(single_obs)
|
|
vx, vy = float(action[0]), float(action[1])
|
|
if DRIVE_MODE == "mecanum" and len(action) >= 3:
|
|
omega = float(action[2])
|
|
else:
|
|
result = analytic_teacher(
|
|
dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
|
|
DRIVE_MODE,
|
|
)
|
|
if len(result) == 4:
|
|
vx, vy, omega, _mode_str = result
|
|
else:
|
|
vx, vy, _mode_str = result
|
|
|
|
# Near-sheep speed modulation (shared by every mode).
|
|
vx, vy = modulate_speed(vx, vy, dog_xy, sheep_positions)
|
|
|
|
# EMA smoothing — kills frame-to-frame action jitter.
|
|
if DRIVE_MODE == "mecanum":
|
|
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
|
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
|
omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
|
|
else:
|
|
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
|
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
|
|
|
# Safety: dog must never enter the pen.
|
|
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
|
|
prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)
|
|
|
|
if DRIVE_MODE == "mecanum":
|
|
drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
|
|
compass, MOTOR_MAX)
|
|
else:
|
|
drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
|
|
emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
|
|
|
|
# Cosmetic ear wiggle.
|
|
ear_phase += 0.12
|
|
ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
|
|
left_ear.setVelocity(EAR_RATE)
|
|
right_ear.setVelocity(EAR_RATE)
|
|
left_ear.setPosition(ear_pos)
|
|
right_ear.setPosition(-ear_pos)
|
|
|
|
# Auto-finish: when all GT sheep are penned, write the sentinel.
|
|
# The launcher polls for it and closes Webots so batch evals don't
|
|
# hang after the task is done. Bounded by `_gt_sheep` so we don't
|
|
# fire during the first few steps while the receiver fills.
|
|
if _gt_sheep and not _run_done:
|
|
gt_active = sum(1 for x, y in _gt_sheep.values()
|
|
if not is_penned(x, y))
|
|
if gt_active == 0:
|
|
os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True)
|
|
open(RUN_DONE_FILE, "w").close()
|
|
_run_done = True
|
|
print(f"[dog] all {len(_gt_sheep)} sheep penned at step "
|
|
f"{step_count} — wrote sentinel, launcher will close Webots")
|
|
|
|
if step_count % 200 == 0:
|
|
gt_penned = sum(1 for x, y in _gt_sheep.values()
|
|
if is_penned(x, y))
|
|
gt_total = len(_gt_sheep)
|
|
common = (f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
|
|
f"GT_penned={gt_penned}/{gt_total} "
|
|
f"tracks_active={tracker.n_active()} "
|
|
f"tracks_cand={tracker.n_candidate()} "
|
|
f"tracks_penned={tracker.n_penned()} "
|
|
f"detections={len(detections)}")
|
|
if DRIVE_MODE == "mecanum":
|
|
print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
|
|
else:
|
|
print(f"{common} action=({vx:+.2f}, {vy:+.2f})")
|