TIR_PROJ/controllers/shepherd_dog/shepherd_dog.py

"""Shepherd Dog controller (Webots).

Mode is selected by ``HERDING_MODE`` (env var, or via the
``herding_runtime.cfg`` file the launcher writes since Webots strips
env vars on some setups):

    rl          → load a BC-trained SB3 policy from HERDING_POLICY_DIR
                  and use its (vx, vy) action each step.
    strombom    → canonical Strömbom collect/drive heuristic.
    sequential  → single-target "pin and push" — drives the sheep
                  closest to the pen.

All modes share the same low-level differential-drive controller
(``herding.diffdrive.velocity_to_wheels`` with cos(err)-clamped forward
speed), so switching modes does not retune actuation.

A safety supervisor enforces the "dog stays out of the pen" invariant:
if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
overridden with a north-driving correction. RL fallback: if the policy
zip can't be loaded (SB3 missing, file missing), the controller drops
to strombom mode automatically.
"""

import math
import os
import sys

# --- Make the shared herding/ package importable from this controller dir ---
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

from controller import Robot

from herding.diffdrive import velocity_to_wheels
from herding.geometry import (
    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
    DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
    PEN_ENTRY,
)
from herding.obs import build_obs
from herding.sequential import compute_action_debug as sequential_action_debug
from herding.strombom import compute_action_debug as strombom_action_debug


# ---------------------------------------------------------------------------
# Mode selection
# ---------------------------------------------------------------------------

def _load_runtime_config():
    """Read mode + policy_dir overrides from a runtime config file.

    Webots strips HERDING_* env vars in some configurations, so the
    launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
    in the project root and the controller reads it here. Env vars
    win if both are present; the file is the fallback.
    """
    cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
    if not os.path.exists(cfg_path):
        return {}
    out = {}
    try:
        with open(cfg_path) as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith("#") or "=" not in line:
                    continue
                k, _, v = line.partition("=")
                out[k.strip().upper()] = v.strip()
    except OSError:
        return {}
    return out


_runtime_cfg = _load_runtime_config()
MODE = (os.environ.get("HERDING_MODE")
        or _runtime_cfg.get("HERDING_MODE")
        or "rl").lower()


def _resolve_policy_dir() -> str:
    """Where to look for the trained policy.

    Priority:
      1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
         to a real directory.
      2. ``training/runs/bc_flock`` — flock-style BC (current default;
         requires the tight-cohesion sheep regime).
      3. ``training/runs/bc_solo`` — single-target BC (1-by-1 style;
         only works if ``herding/flocking_sim.py`` is reverted to the
         loose-cohesion regime).
    """
    env_dir = (os.environ.get("HERDING_POLICY_DIR")
               or _runtime_cfg.get("HERDING_POLICY_DIR"))
    if env_dir and os.path.isdir(env_dir):
        return env_dir
    candidates = [
        os.path.join(_PROJECT_ROOT, "training", "runs", "bc_flock"),
        os.path.join(_PROJECT_ROOT, "training", "runs", "bc_solo"),
    ]
    for c in candidates:
        if os.path.isdir(c):
            return c
    # Last resort — return env var anyway so error message is informative.
    return env_dir or candidates[0]


_VALID_MODES = ("rl", "strombom", "sequential")
if MODE not in _VALID_MODES:
    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
    MODE = "strombom"

POLICY_DIR = _resolve_policy_dir()
policy_handle = None
if MODE == "rl":
    print(f"[dog] resolved POLICY_DIR={POLICY_DIR}  exists={os.path.isdir(POLICY_DIR)}")
    try:
        from policy_loader import load as _load_policy
        policy_handle = _load_policy(POLICY_DIR)
        print(f"[dog] RL policy loaded from {POLICY_DIR}")
    except Exception as exc:
        print(f"[dog] RL policy load failed ({exc!r}); falling back to strombom.")
        MODE = "strombom"
print(f"[dog] running in mode={MODE}")


# ---------------------------------------------------------------------------
# Action smoothing + safety supervisor
# ---------------------------------------------------------------------------

ACTION_SMOOTH = 0.35
prev_action = (0.0, 0.0)


def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
    """If the dog is near the south barrier and the action would push it
    further south, override with a northward action. Hard invariant: the
    dog never enters the pen."""
    if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
        return (0.0, 1.0)
    if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
        return (vx * 0.5, max(0.0, vy + 0.5))
    return (vx, vy)


# ---------------------------------------------------------------------------
# Driving
# ---------------------------------------------------------------------------

def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
    if math.hypot(vx, vy) < 1e-3:
        left_motor.setVelocity(0.0)
        right_motor.setVelocity(0.0)
        return
    n = compass.getValues()
    h = math.atan2(n[0], n[1])
    left, right = velocity_to_wheels(
        vx, vy, h,
        max_linear=DOG_MAX_LINEAR,
        wheel_radius=DOG_WHEEL_RADIUS,
        max_wheel_omega=motor_max,
        k_turn=4.0,
    )
    left_motor.setVelocity(left)
    right_motor.setVelocity(right)


# ---------------------------------------------------------------------------
# Webots devices
# ---------------------------------------------------------------------------

robot = Robot()
timestep = int(robot.getBasicTimeStep())

left_motor = robot.getDevice("left wheel motor")
right_motor = robot.getDevice("right wheel motor")
left_motor.setPosition(float("inf"))
right_motor.setPosition(float("inf"))
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)

gps = robot.getDevice("gps");           gps.enable(timestep)
compass = robot.getDevice("compass");   compass.enable(timestep)
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")

# Cosmetic ear motors — ignored by control logic but keep them animated.
left_ear = robot.getDevice("left ear motor")
right_ear = robot.getDevice("right ear motor")
left_ear.setPosition(float("inf"))
right_ear.setPosition(float("inf"))
left_ear.setVelocity(0.0)
right_ear.setVelocity(0.0)
ear_phase = 0.0
EAR_AMPLITUDE = 0.35
EAR_RATE = 8.0


# ---------------------------------------------------------------------------
# Main loop
# ---------------------------------------------------------------------------

# {name: (x, y)} — kept across all sheep ever heard from. Sheep that drift
# into the pen are tracked by ``penned`` so observations and Strömbom
# agree on which ones still need herding.
sheep_positions: dict = {}
penned_set: set = set()
step_count = 0

from herding.geometry import is_penned_position

while robot.step(timestep) != -1:
    step_count += 1

    while receiver.getQueueLength() > 0:
        msg = receiver.getString()
        receiver.nextPacket()
        parts = msg.split(":")
        if len(parts) == 4 and parts[0] == "sheep":
            try:
                x, y = float(parts[2]), float(parts[3])
            except ValueError:
                continue
            sheep_positions[parts[1]] = (x, y)
            if parts[1] not in penned_set and is_penned_position(x, y):
                penned_set.add(parts[1])

    pos = gps.getValues()
    dog_xy = (pos[0], pos[1])
    n = compass.getValues()
    dog_heading = math.atan2(n[0], n[1])

    # ---- Action selection ----
    if MODE == "rl" and policy_handle is not None:
        sheep_xy_list = list(sheep_positions.values())
        sheep_names = list(sheep_positions.keys())
        sheep_penned_list = [s in penned_set for s in sheep_names]
        obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
        action = policy_handle.predict(obs)
        vx, vy = float(action[0]), float(action[1])
    elif MODE == "sequential":
        vx, vy, _mode_str, _dbg = sequential_action_debug(
            dog_xy, sheep_positions, PEN_ENTRY,
        )
    else:
        # Strömbom (canonical baseline).
        vx, vy, _mode_str, _dbg = strombom_action_debug(
            dog_xy, sheep_positions, PEN_ENTRY,
        )

    # EMA smoothing — reduces oscillation from policy or Strömbom flips.
    vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
    vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy

    # Safety: dog must never enter the pen.
    vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
    prev_action = (vx, vy)

    drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
    emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")

    # Cosmetic ear wiggle — purely visual.
    ear_phase += 0.12
    ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
    left_ear.setVelocity(EAR_RATE)
    right_ear.setVelocity(EAR_RATE)
    left_ear.setPosition(ear_pos)
    right_ear.setPosition(-ear_pos)

    if step_count % 200 == 0:
        n_active = sum(1 for s in sheep_positions if s not in penned_set)
        print(f"[dog mode={MODE}] step={step_count} known={len(sheep_positions)} "
              f"penned={len(penned_set)} active={n_active} action=({vx:+.2f}, {vy:+.2f})")