TIR_PROJ/controllers/shepherd_dog/shepherd_dog.py

"""Shepherd Dog controller (Webots).

Mode is selected by ``HERDING_MODE`` — read from the env var or from
the ``herding_runtime.cfg`` file the launcher writes (Webots strips
env vars from controller subprocesses on some setups):

    strombom    → canonical Strömbom (2014) collect/drive heuristic
                  wrapped in ActiveScanTeacher (opening rotation +
                  walk-to-centre when the tracker briefly empties)
    sequential  → single-target "pin-and-push", same wrapper
    universal   → mecanum-aware teacher (Strömbom + omega + recovery)
    bc          → behaviour-cloned MLP, trained on universal demos
    rl          → KL-regularised PPO fine-tune of `bc`

Policy directories are resolved by `policy_loader` from
``training/runs/{bc,rl}_{drive}_{world}`` with a fallback to
``training/runs/{bc,rl}`` (legacy single-policy paths).

Sheep perception
----------------
The dog perceives sheep through its front-mounted 140° LiDAR
(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step:

    1. Read ``lidar.getRangeImage()``.
    2. Cluster returns into world-frame ``(x, y)`` estimates
       (``herding.perception.lidar_perception.detections_from_scan``).
    3. Fold detections into a ``SheepTracker``, which maintains
       last-seen positions for sheep currently out of FOV, requires
       consensus across multiple frames before promoting a candidate
       to a real track, and latches "penned" once a track crosses
       the gate plane south.

Setting ``HERDING_USE_GT=1`` bypasses the tracker and feeds emitter
ground-truth positions to the policy — useful as a perception
ablation for the analytic baselines.

Sheep emitter messages are otherwise read for diagnostic logging
only (``GT_penned`` counter + auto-finish sentinel); the control
loop never depends on them.

Auto-finish
-----------
When every emitter-reported sheep is penned, the controller writes
``training/.run_done``. The launcher (``tools/run_webots.sh``)
detects the sentinel and closes Webots so headless sweep runs are
bounded.
"""

import math
import os
import sys

# --- Make the shared herding/ package importable from this controller dir ---
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

# --- Read runtime cfg early so env vars are set before geometry import ---
def _load_runtime_config():
    cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
    if not os.path.exists(cfg_path):
        return {}
    out = {}
    try:
        with open(cfg_path) as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith("#") or "=" not in line:
                    continue
                k, _, v = line.partition("=")
                out[k.strip().upper()] = v.strip()
    except OSError:
        return {}
    return out

_runtime_cfg = _load_runtime_config()
# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
for _rk, _rv in _runtime_cfg.items():
    if _rk.startswith("HERDING_") and _rk not in os.environ:
        os.environ[_rk] = _rv

import numpy as np

from controller import Robot

from herding.control.active_scan import ActiveScanTeacher
from herding.control.modulation import modulate_speed
from herding.control.sequential import compute_action as sequential_action
from herding.control.strombom import compute_action as strombom_action
from herding.control.universal import compute_action as universal_action
from herding.perception.obs import build_obs
from herding.perception.lidar_perception import detections_from_scan
from herding.perception.sheep_tracker import SheepTracker
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
from herding.world.geometry import (
    DOG_SOUTH_LIMIT,
    PEN_ENTRY, is_penned,
)
from herding.config import HERDING_WEBOTS, RobotConfig

# Robot physical constants come from RobotConfig so they stay in sync with
# the training environment.  The Webots preset uses action_smooth=0.55.
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
DOG_WHEEL_RADIUS  = _ROBOT_CFG.wheel_radius
DOG_WHEEL_BASE    = _ROBOT_CFG.wheel_base
DOG_WHEEL_BASE_X  = _ROBOT_CFG.wheel_base_x
DOG_WHEEL_BASE_Y  = _ROBOT_CFG.wheel_base_y
DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
DOG_MAX_LINEAR    = _ROBOT_CFG.max_linear


# ---------------------------------------------------------------------------
# Mode + policy resolution (cfg already loaded above)
# ---------------------------------------------------------------------------

MODE = (os.environ.get("HERDING_MODE")
        or _runtime_cfg.get("HERDING_MODE")
        or "bc").lower()

_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
if MODE not in _VALID_MODES:
    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
    MODE = "strombom"

# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
              or _runtime_cfg.get("HERDING_DRIVE")
              or "differential").lower()
if DRIVE_MODE not in ("differential", "mecanum"):
    print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
    DRIVE_MODE = "differential"

# World shape — used to disambiguate the trained policy directory.
WORLD = (os.environ.get("HERDING_WORLD")
         or _runtime_cfg.get("HERDING_WORLD")
         or "field").lower()

# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
USE_GT_PERCEPTION = bool(int(
    os.environ.get("HERDING_USE_GT")
    or _runtime_cfg.get("HERDING_USE_GT", "0")
))


def _resolve_policy_dir(mode: str, drive: str, world: str) -> str:
    """Where to look for the trained policy for the given mode/drive/world.

    Priority:
      1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
         to a real directory.
      2. Canonical:  training/runs/{bc,rl}_<drive>_<world>
      3. Drive-only: training/runs/{bc,rl}_<drive>
      4. Bare-mode:  training/runs/{bc,rl}
    The first existing directory wins; if none exist, the canonical
    path is returned so the loader's error message is informative.
    """
    env_dir = (os.environ.get("HERDING_POLICY_DIR")
               or _runtime_cfg.get("HERDING_POLICY_DIR"))
    if env_dir and os.path.isdir(env_dir):
        return env_dir
    base = "rl" if mode == "rl" else "bc"
    runs = os.path.join(_PROJECT_ROOT, "training", "runs")
    for cand in (f"{base}_{drive}_{world}", f"{base}_{drive}", base):
        path = os.path.join(runs, cand)
        if os.path.isdir(path):
            return path
    return os.path.join(runs, f"{base}_{drive}_{world}")


print(f"[dog] mode={MODE}  drive={DRIVE_MODE}  world={WORLD}")

POLICY_DIR = _resolve_policy_dir(MODE, DRIVE_MODE, WORLD)
policy_handle = None
if MODE in ("bc", "rl"):
    print(f"[dog] resolved POLICY_DIR={POLICY_DIR}  exists={os.path.isdir(POLICY_DIR)}")
    try:
        from policy_loader import load as _load_policy
        policy_handle = _load_policy(POLICY_DIR)
        print(f"[dog] policy loaded from {POLICY_DIR}")
    except Exception as exc:
        print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
        MODE = "strombom"


# ---------------------------------------------------------------------------
# Control parameters
# ---------------------------------------------------------------------------

ACTION_SMOOTH = _ROBOT_CFG.action_smooth  # EMA on (vx, vy) — kills frame-to-frame jitter
RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")


def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
    """If the dog is near the south barrier and the action would push it
    further south, override with a northward action. Hard invariant: the
    dog never enters the pen."""
    if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
        return (0.0, 1.0)
    if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
        return (vx * 0.5, max(0.0, vy + 0.5))
    return (vx, vy)


def drive_diff(vx: float, vy: float, left_motor, right_motor,
               compass, motor_max: float):
    if math.hypot(vx, vy) < 1e-3:
        left_motor.setVelocity(0.0)
        right_motor.setVelocity(0.0)
        return
    n = compass.getValues()
    h = math.atan2(n[0], n[1])
    left, right = velocity_to_wheels(
        vx, vy, h,
        max_linear=DOG_MAX_LINEAR,
        wheel_radius=DOG_WHEEL_RADIUS,
        max_wheel_omega=motor_max,
        k_turn=4.0,
    )
    left_motor.setVelocity(left)
    right_motor.setVelocity(right)


def drive_mecanum(vx: float, vy: float, omega: float,
                  fl_motor, fr_motor, rl_motor, rr_motor,
                  compass, motor_max: float):
    if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
        fl_motor.setVelocity(0.0)
        fr_motor.setVelocity(0.0)
        rl_motor.setVelocity(0.0)
        rr_motor.setVelocity(0.0)
        return
    n = compass.getValues()
    h = math.atan2(n[0], n[1])
    w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
        vx, vy, omega, h,
        max_linear=DOG_MAX_LINEAR,
        wheel_radius=DOG_WHEEL_RADIUS,
        lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
        max_wheel_omega=motor_max,
        k_turn=4.0,
        wheel_base=DOG_WHEEL_BASE,
    )
    fl_motor.setVelocity(w_fl)
    fr_motor.setVelocity(w_fr)
    rl_motor.setVelocity(w_rl)
    rr_motor.setVelocity(w_rr)


# ---------------------------------------------------------------------------
# Webots devices
# ---------------------------------------------------------------------------

robot = Robot()
timestep = int(robot.getBasicTimeStep())

if DRIVE_MODE == "mecanum":
    fl_motor = robot.getDevice("front left wheel motor")
    fr_motor = robot.getDevice("front right wheel motor")
    rl_motor = robot.getDevice("rear left wheel motor")
    rr_motor = robot.getDevice("rear right wheel motor")
    for m in (fl_motor, fr_motor, rl_motor, rr_motor):
        m.setPosition(float("inf"))
        m.setVelocity(0.0)
    MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
else:
    left_motor = robot.getDevice("left wheel motor")
    right_motor = robot.getDevice("right wheel motor")
    left_motor.setPosition(float("inf"))
    right_motor.setPosition(float("inf"))
    left_motor.setVelocity(0.0)
    right_motor.setVelocity(0.0)
    MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)

gps = robot.getDevice("gps");           gps.enable(timestep)
compass = robot.getDevice("compass");   compass.enable(timestep)
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")
lidar = robot.getDevice("lidar");       lidar.enable(timestep)

tracker = SheepTracker(tracker_cfg=HERDING_WEBOTS.tracker)

# Cosmetic ear motors — animated; not used by control.
left_ear = robot.getDevice("left ear motor")
right_ear = robot.getDevice("right ear motor")
left_ear.setPosition(float("inf"))
right_ear.setPosition(float("inf"))
left_ear.setVelocity(0.0)
right_ear.setVelocity(0.0)
ear_phase = 0.0
EAR_AMPLITUDE = 0.35
EAR_RATE = 8.0


# ---------------------------------------------------------------------------
# Main loop
# ---------------------------------------------------------------------------

# Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay
# the import-time cost). Each gets the same ActiveScanTeacher treatment:
# rotate-on-empty, walk-to-centre, near-sheep speed modulation.
analytic_teacher = None
if MODE in ("strombom", "sequential"):
    base_fn = strombom_action if MODE == "strombom" else sequential_action
    analytic_teacher = ActiveScanTeacher(base_fn)
elif MODE == "universal":
    analytic_teacher = ActiveScanTeacher(universal_action)

# GT positions from sheep emitters — used **only** for the auto-finish
# sentinel and the GT_penned diagnostic line. Never fed into control.
_gt_sheep: dict = {}
_run_done = False

prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
step_count = 0

# ---------------------------------------------------------------------------
# Calibration mode — apply fixed action, measure GPS displacement, compare
# against gym kinematics prediction, write results to calibrate_mecanum.log.
# ---------------------------------------------------------------------------
if MODE == "calibrate":
    import json as _json
    _calib_vx  = float(os.environ.get("CALIB_VX", "0.5"))
    _calib_vy  = float(os.environ.get("CALIB_VY", "0.0"))
    _calib_om  = float(os.environ.get("CALIB_OM", "0.0"))
    _calib_n   = int(os.environ.get("CALIB_N_STEPS", "150"))
    _log_path  = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
    # Settle for 5 steps so GPS stabilises.
    for _ in range(5):
        robot.step(timestep)
    _pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
    _n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
    # Gym-predicted displacement using shared kinematics.
    from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_step
    from herding.world.geometry import WEBOTS_DT as _DT
    _xg, _yg, _hg = _x0, _y0, _h0
    for _ in range(_calib_n):
        _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
            _calib_vx, _calib_vy, _calib_om, _hg,
            max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
            lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
            max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
            wheel_base=DOG_WHEEL_BASE,
        )
        _xg, _yg, _hg = mecanum_step(
            _xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
            DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
        )
    # Run actual Webots steps.
    for _ in range(_calib_n):
        _nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
        _wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
            _calib_vx, _calib_vy, _calib_om, _h,
            max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
            lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
            max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
            wheel_base=DOG_WHEEL_BASE,
        )
        if DRIVE_MODE == "mecanum":
            drive_mecanum(_calib_vx, _calib_vy, _calib_om,
                          fl_motor, fr_motor, rl_motor, rr_motor,
                          compass, MOTOR_MAX)
        robot.step(timestep)
    _pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
    _T = _calib_n * _DT
    _vx_w = (_x1 - _x0) / _T;  _vy_w = (_y1 - _y0) / _T
    _vx_g = (_xg - _x0) / _T;  _vy_g = (_yg - _y0) / _T
    def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
    _result = (
        f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f})  "
        f"steps={_calib_n}\n"
        f"  gym    displacement: dx={_xg-_x0:.4f}  dy={_yg-_y0:.4f}  "
        f"(vx={_vx_g:.3f}  vy={_vy_g:.3f} m/s)\n"
        f"  webots displacement: dx={_x1-_x0:.4f}  dy={_y1-_y0:.4f}  "
        f"(vx={_vx_w:.3f}  vy={_vy_w:.3f} m/s)\n"
        f"  vx error: {_pct(_vx_w, _vx_g):.1f}%    "
        f"vy error: {_pct(_vy_w, _vy_g):.1f}%\n"
    )
    print(_result)
    with open(_log_path, "a") as _f:
        _f.write(_result + "\n")
    # Write the run-done sentinel so run_webots.sh closes Webots cleanly.
    with open(RUN_DONE_FILE, "w") as _f:
        _f.write("calibrate\n")
    import sys as _sys; _sys.exit(0)

while robot.step(timestep) != -1:
    step_count += 1

    # Drain sheep emitter messages → GT (diagnostic only).
    while receiver.getQueueLength() > 0:
        msg = receiver.getString()
        receiver.nextPacket()
        parts = msg.split(":")
        if len(parts) == 4 and parts[0] == "sheep":
            try:
                _gt_sheep[parts[1]] = (float(parts[2]), float(parts[3]))
            except ValueError:
                pass

    pos = gps.getValues()
    dog_xy = (pos[0], pos[1])
    n = compass.getValues()
    dog_heading = math.atan2(n[0], n[1])

    # ---- LiDAR perception → tracker → active sheep positions ----
    ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
    detections = detections_from_scan(
        ranges, dog_xy[0], dog_xy[1], dog_heading,
        detection_cfg=HERDING_WEBOTS.detection,
        lidar_cfg=HERDING_WEBOTS.lidar,
    )
    if USE_GT_PERCEPTION and _gt_sheep:
        # Bypass tracker: feed GT emitter positions directly to policy/teacher.
        sheep_positions = {k: v for k, v in _gt_sheep.items()
                          if not is_penned(v[0], v[1])}
        tracker.update(detections)  # still advance tracker for diagnostics
    else:
        sheep_positions = tracker.update(detections)

    sheep_xy_list = list(sheep_positions.values())
    sheep_penned_list = [False] * len(sheep_xy_list)
    single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)

    # ---- Action selection ----
    omega = 0.0
    if MODE in ("bc", "rl") and policy_handle is not None:
        if not sheep_positions:
            # BC/RL never saw "empty obs during operation" in training (empty
            # obs only happened at episode end), so the policy outputs ~zero
            # and the dog gets stuck. Fall back to an *active scan*: rotate
            # the desired heading slowly so the narrow 140° FOV sweeps the
            # field instead of charging in one fixed direction (which
            # otherwise drives the dog into the north wall and ends the run).
            scan_h = (step_count * 0.015) % (2.0 * math.pi)
            vx = 0.5 * math.cos(scan_h)
            vy = 0.5 * math.sin(scan_h)
            omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
        else:
            action = policy_handle.predict(single_obs)
            vx, vy = float(action[0]), float(action[1])
            if DRIVE_MODE == "mecanum" and len(action) >= 3:
                omega = float(action[2])
    else:
        result = analytic_teacher(
            dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
            DRIVE_MODE,
        )
        if len(result) == 4:
            vx, vy, omega, _mode_str = result
        else:
            vx, vy, _mode_str = result

    # Near-sheep speed modulation (shared by every mode).
    vx, vy = modulate_speed(vx, vy, dog_xy, sheep_positions)

    # EMA smoothing — kills frame-to-frame action jitter.
    if DRIVE_MODE == "mecanum":
        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
        omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
    else:
        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy

    # Safety: dog must never enter the pen.
    vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
    prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)

    if DRIVE_MODE == "mecanum":
        drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
                      compass, MOTOR_MAX)
    else:
        drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
    emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")

    # Cosmetic ear wiggle.
    ear_phase += 0.12
    ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
    left_ear.setVelocity(EAR_RATE)
    right_ear.setVelocity(EAR_RATE)
    left_ear.setPosition(ear_pos)
    right_ear.setPosition(-ear_pos)

    # Auto-finish: when all GT sheep are penned, write the sentinel.
    # The launcher polls for it and closes Webots so batch evals don't
    # hang after the task is done. Bounded by `_gt_sheep` so we don't
    # fire during the first few steps while the receiver fills.
    if _gt_sheep and not _run_done:
        gt_active = sum(1 for x, y in _gt_sheep.values()
                        if not is_penned(x, y))
        if gt_active == 0:
            os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True)
            open(RUN_DONE_FILE, "w").close()
            _run_done = True
            print(f"[dog] all {len(_gt_sheep)} sheep penned at step "
                  f"{step_count} — wrote sentinel, launcher will close Webots")

    if step_count % 200 == 0:
        gt_penned = sum(1 for x, y in _gt_sheep.values()
                        if is_penned(x, y))
        gt_total = len(_gt_sheep)
        common = (f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
                  f"GT_penned={gt_penned}/{gt_total} "
                  f"tracks_active={tracker.n_active()} "
                  f"tracks_cand={tracker.n_candidate()} "
                  f"tracks_penned={tracker.n_penned()} "
                  f"detections={len(detections)}")
        if DRIVE_MODE == "mecanum":
            print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
        else:
            print(f"{common} action=({vx:+.2f}, {vy:+.2f})")