Checkpoint 2

This commit is contained in:
Johnny Fernandes
2026-05-07 22:00:10 +01:00
parent 90aa3bbcb4
commit 1bb9415414
37 changed files with 3068 additions and 2912 deletions
+78 -163
View File
@@ -1,233 +1,148 @@
"""
Sheep flocking controller (Webots, Reynolds boids variant).
"""Sheep flocking controller (Webots).
Each sheep broadcasts its GPS position every 3 steps on channel 1 and
listens for the dog and peer sheep positions. Peers are keyed by robot
name so each neighbour has exactly one current entry in the dict.
listens for the dog and peer sheep positions. The behavioural step is
delegated to ``herding.flocking_sim.compute_heading_speed`` so the
training environment and Webots run identical sheep dynamics.
Force stack each step (summed then converted to a heading + speed):
flee — away from dog, quadratic ramp, dominant when close
cohesion — toward flock centre, halved while fleeing
separation — inverse-distance push, prevents physical overlap
walls — linear repulsion from field boundary
wander — small persistent drift for natural idle motion
Pen behaviour: on first entry into the quarantine pen the sheep latches
permanently — it turns pink (via the exposed woolColor PROTO field) and
the normal force stack is replaced by pen-confinement forces only.
Pen behaviour: a sheep latches to ``penned`` the first time it crosses
the south-wall gate plane into the gate corridor. Once latched it turns
pink (via the exposed ``woolColor`` PROTO field) and the force stack
switches to in-pen containment.
"""
import random
import math
import os
import random
import sys
# --- Make the shared herding/ package importable from this controller dir ---
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
from controller import Supervisor
# ---------------------------------------------------------------------------
# Tuning constants
# ---------------------------------------------------------------------------
from herding.diffdrive import heading_speed_to_wheels
from herding.flocking_sim import MAX_SPEED, compute_heading_speed
from herding.geometry import (
SHEEP_MAX_WHEEL_OMEGA,
is_penned_position,
)
MAX_SPEED = 22.0 # rad/s hard clamp on both motors
FLEE_SPEED = 20.0 # rad/s upper bound while panicking
WANDER_SPEED = 3.0 # rad/s lower bound during calm wandering
X_MIN, X_MAX = -14.5, 14.5 # stone wall inner edges (metres)
Y_MIN, Y_MAX = -14.5, 14.5
WALL_MARGIN = 3.5 # avoidance starts this far from the wall
FLEE_DIST = 7.0 # dog within this radius triggers flee (metres)
SEPARATION_DIST = 2.5 # inverse-distance push active inside this radius
COHESION_DIST = 8.0 # pull toward flock centre active inside this radius
PEN_X_MIN, PEN_X_MAX = 10.0, 13.0 # quarantine pen extents (metres)
PEN_Y_MIN, PEN_Y_MAX = -15.0, -8.0 # open entrance at y=-8, gate at y=-15
PEN_MARGIN = 0.8 # confinement force starts this far from pen wall
# ---------------------------------------------------------------------------
# Device setup
# ---------------------------------------------------------------------------
robot = Supervisor()
robot = Supervisor()
timestep = int(robot.getBasicTimeStep())
name = robot.getName()
name = robot.getName()
self_node = robot.getSelf()
left_motor = robot.getDevice("left wheel motor")
left_motor = robot.getDevice("left wheel motor")
right_motor = robot.getDevice("right wheel motor")
left_motor.setPosition(float("inf"))
right_motor.setPosition(float("inf"))
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
MOTOR_MAX = min(left_motor.getMaxVelocity(), SHEEP_MAX_WHEEL_OMEGA)
gps = robot.getDevice("gps"); gps.enable(timestep)
compass = robot.getDevice("compass"); compass.enable(timestep)
gps = robot.getDevice("gps"); gps.enable(timestep)
compass = robot.getDevice("compass"); compass.enable(timestep)
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")
emitter = robot.getDevice("emitter")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def norm_angle(a):
return math.atan2(math.sin(a), math.cos(a))
def bearing():
# Compass returns north direction in sensor frame; for this Z-up world
# with north = +Y, atan2(n[0], n[1]) gives the standard math angle
# (0 = east, π/2 = north) matching atan2(fy, fx) used for heading.
# (0 = east, π/2 = north) matching atan2(fy, fx) used for headings.
n = compass.getValues()
return math.atan2(n[0], n[1])
def drive(heading, speed):
err = norm_angle(heading - bearing())
# Scale forward component by cos(err): at 90° error fwd→0 so the robot
# spins in place to realign rather than driving sideways at full speed.
fwd = speed * max(0.0, math.cos(err))
k = 4.0
left_motor.setVelocity( max(-MAX_SPEED, min(MAX_SPEED, fwd - k * err)))
right_motor.setVelocity(max(-MAX_SPEED, min(MAX_SPEED, fwd + k * err)))
def drive(heading, speed_motor):
left_w, right_w = heading_speed_to_wheels(
heading, min(speed_motor, MAX_SPEED), bearing(), MOTOR_MAX, k_turn=4.0
)
left_motor.setVelocity(left_w)
right_motor.setVelocity(right_w)
def paint_pink():
# woolColor is declared as a PROTO field with IS binding to the DEF WOOL
# PBRAppearance baseColor. Changing it here propagates to every USE WOOL
# shape on the body. Direct field access avoids PROTO-internal opacity.
# PBRAppearance baseColor; setting it propagates to every USE WOOL shape.
self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])
# ---------------------------------------------------------------------------
# State
# ---------------------------------------------------------------------------
wander_angle = random.uniform(-math.pi, math.pi)
step = 0
dog_x = None
dog_y = None
peers = {} # name → (x, y), one entry per neighbour, cleared every 30 steps
step_count = 0
dog_x, dog_y = None, None
peers = {} # name → (x, y), one entry per neighbour, cleared every 30 steps
penned = False
# Stuck detection: differential-drive sheep can pin against a wall and need
# a forced reverse-and-rotate to escape. If displacement < STUCK_DIST for
# STUCK_STEPS consecutive steps, drive toward field centre.
_prev_x, _prev_y = None, None
_stuck_count = 0
STUCK_STEPS = 20
STUCK_DIST = 0.05
# ---------------------------------------------------------------------------
# Main loop
# ---------------------------------------------------------------------------
while robot.step(timestep) != -1:
step += 1
step_count += 1
pos = gps.getValues()
x, y = pos[0], pos[1]
# Pen entry: one-way latch, never unset
if not penned and PEN_X_MIN < x < PEN_X_MAX and PEN_Y_MIN < y < PEN_Y_MAX:
# Pen entry: one-way latch. Penned sheep get pink wool and switch behaviour.
if not penned and is_penned_position(x, y):
penned = True
paint_pink()
# Refresh peer table (clear before receiving so fresh data is never lost)
if step % 30 == 0:
# Refresh peer table clear before receiving so fresh data is never lost.
if step_count % 30 == 0:
peers.clear()
while receiver.getQueueLength() > 0:
msg = receiver.getString()
receiver.nextPacket()
p = msg.split(":")
if p[0] == "dog" and len(p) >= 3:
dog_x, dog_y = float(p[1]), float(p[2])
elif p[0] == "sheep" and len(p) >= 4 and p[1] != name:
peers[p[1]] = (float(p[2]), float(p[3]))
parts = msg.split(":")
if parts[0] == "dog" and len(parts) >= 3:
dog_x, dog_y = float(parts[1]), float(parts[2])
elif parts[0] == "sheep" and len(parts) >= 4 and parts[1] != name:
peers[parts[1]] = (float(parts[2]), float(parts[3]))
fx, fy = 0.0, 0.0
dog_xy = (dog_x, dog_y) if dog_x is not None and dog_y is not None else None
heading, speed, wander_angle = compute_heading_speed(
x=x, y=y, penned=penned, dog_xy=dog_xy, peers=peers,
wander_angle=wander_angle,
)
# Repel unpenned sheep from the exterior of the pen's side walls so they
# don't get pinned by flee forces. Only fires when strictly outside the pen
# (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range).
# Entrance is open on the north (y > PEN_Y_MAX) — no force there.
PEN_EXT_MARGIN = 0.8
if not penned and PEN_Y_MIN < y < PEN_Y_MAX:
if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN:
fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0
if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN:
fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0
# Stuck detection — safety net for differential-drive wall pinning.
if _prev_x is not None:
moved = math.hypot(x - _prev_x, y - _prev_y)
_stuck_count = _stuck_count + 1 if moved < STUCK_DIST else 0
if _stuck_count >= STUCK_STEPS:
heading = math.atan2(-y, -x) # always points away from the boundary
speed = MAX_SPEED
_stuck_count = 0
_prev_x, _prev_y = x, y
if penned:
# Inside pen: wander freely, strong boundary forces prevent exit,
# separation still active to avoid collisions with other penned sheep.
pm = PEN_MARGIN
if x < PEN_X_MIN + pm: fx += ((PEN_X_MIN + pm - x) / pm) * 15.0
if x > PEN_X_MAX - pm: fx -= ((x - (PEN_X_MAX - pm)) / pm) * 15.0
if y < PEN_Y_MIN + pm: fy += ((PEN_Y_MIN + pm - y) / pm) * 15.0
if y > PEN_Y_MAX - pm: fy -= ((y - (PEN_Y_MAX - pm)) / pm) * 15.0
for px, py in peers.values():
dx, dy = px - x, py - y
d = math.hypot(dx, dy)
if 0.05 < d < SEPARATION_DIST:
push = (SEPARATION_DIST - d) / d
fx -= (dx / d) * push * 2.5
fy -= (dy / d) * push * 2.5
if random.random() < 0.02:
wander_angle += random.uniform(-0.6, 0.6)
fx += math.cos(wander_angle) * 0.5
fy += math.sin(wander_angle) * 0.5
else:
fleeing = False
# Flee — quadratic ramp so force grows rapidly as the dog closes in
if dog_x is not None:
dx = dog_x - x
dy = dog_y - y
dist = math.hypot(dx, dy)
if 0.01 < dist < FLEE_DIST:
fleeing = True
t = 1.0 - dist / FLEE_DIST
s = t * t * 20.0
fx -= (dx / dist) * s
fy -= (dy / dist) * s
# Cohesion — halved while fleeing to reduce mid-panic collisions
cx, cy, cn = 0.0, 0.0, 0
for px, py in peers.values():
d = math.hypot(px - x, py - y)
if 0.3 < d < COHESION_DIST:
cx += px; cy += py; cn += 1
if cn > 0:
w = 0.08 if fleeing else 0.15
fx += (cx / cn - x) * w
fy += (cy / cn - y) * w
# Separation — inverse-distance: huge when nearly overlapping, fades quickly
for px, py in peers.values():
dx, dy = px - x, py - y
d = math.hypot(dx, dy)
if 0.05 < d < SEPARATION_DIST:
push = (SEPARATION_DIST - d) / d
fx -= (dx / d) * push * 2.5
fy -= (dy / d) * push * 2.5
# Walls
if x < X_MIN + WALL_MARGIN: fx += ((X_MIN + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
if x > X_MAX - WALL_MARGIN: fx -= ((x - (X_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
if y < Y_MIN + WALL_MARGIN: fy += ((Y_MIN + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
if y > Y_MAX - WALL_MARGIN: fy -= ((y - (Y_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
# Wander — suppressed while fleeing so drift cannot deflect the flee heading
if not fleeing:
if random.random() < 0.02:
wander_angle += random.uniform(-0.6, 0.6)
fx += math.cos(wander_angle) * 0.5
fy += math.sin(wander_angle) * 0.5
# Hard-stop clamp: within 0.5 m of a wall, zero any force component that
# would push further into it. Prevents the flee force from pinning a sheep
# against the boundary when the dog approaches from outside.
HS = 0.5
if x < X_MIN + HS and fx < 0: fx = 0.0
if x > X_MAX - HS and fx > 0: fx = 0.0
if y < Y_MIN + HS and fy < 0: fy = 0.0
if y > Y_MAX - HS and fy > 0: fy = 0.0
heading = math.atan2(fy, fx)
mag = math.hypot(fx, fy)
speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
drive(heading, speed)
if step % 3 == 0:
if step_count % 3 == 0:
emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
+78
View File
@@ -0,0 +1,78 @@
"""Lazy loader for the SB3 PPO policy used by the dog controller.
Importing stable-baselines3 inside the Webots Python interpreter is only
needed when ``HERDING_MODE=rl``; the Strömbom mode runs without it. This
loader keeps SB3 out of the import path until you actually ask for the RL
policy, so users without SB3 installed can still run the Strömbom
baseline.
The policy + VecNormalize statistics are saved together by
``training/train_ppo.py``:
runs/<name>/best/best_model.zip # SB3 PPO checkpoint
runs/<name>/best/vecnormalize.pkl # observation-normaliser stats
Pass either the directory or the explicit zip path.
"""
import os
from pathlib import Path
class PolicyHandle:
"""Wrap a loaded PPO policy + VecNormalize so the controller can call
``predict(obs)`` without thinking about either."""
def __init__(self, model, vecnorm):
self.model = model
self.vecnorm = vecnorm
def predict(self, obs):
# VecNormalize expects a batched obs of shape (n_envs, obs_dim).
if self.vecnorm is not None:
import numpy as np
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
obs_b = self.vecnorm.normalize_obs(obs_b)
else:
import numpy as np
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
action, _ = self.model.predict(obs_b, deterministic=True)
return action[0]
def load(model_path: str, vecnorm_path: str | None = None) -> PolicyHandle:
"""Load a PPO model (and optional VecNormalize) from disk.
``model_path`` may be the .zip checkpoint or a directory containing
``best_model.zip`` (and optionally ``vecnormalize.pkl``).
"""
p = Path(model_path)
if p.is_dir():
zip_candidates = [p / "best_model.zip", p / "final.zip", p / "policy.zip"]
zip_path = next((z for z in zip_candidates if z.exists()), None)
if zip_path is None:
raise FileNotFoundError(
f"No PPO zip found in {p} (looked for best_model.zip, final.zip, policy.zip)"
)
if vecnorm_path is None:
vn = p / "vecnormalize.pkl"
if vn.exists():
vecnorm_path = str(vn)
else:
zip_path = p
# Imports deferred so the Strömbom path doesn't require SB3.
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize
model = PPO.load(str(zip_path), device="auto")
vecnorm = None
if vecnorm_path and os.path.exists(vecnorm_path):
# VecNormalize.load needs a venv to attach to; we only need its stats
# at inference, so we reconstruct the wrapper manually.
import pickle
with open(vecnorm_path, "rb") as f:
vecnorm = pickle.load(f)
vecnorm.training = False
vecnorm.norm_reward = False
return PolicyHandle(model=model, vecnorm=vecnorm)
+249 -54
View File
@@ -1,88 +1,283 @@
"""
Shepherd Dog controller (Webots, manual keyboard control).
"""Shepherd Dog controller (Webots).
WASD / arrow keys drive the robot. +/- adjust speed in 10 % increments.
GPS position is broadcast every step on channel 1 so sheep controllers
can compute flee forces. Ears wag continuously via sinusoidal position
targets — purely cosmetic.
Runs in one of two modes selected by the ``HERDING_MODE`` environment
variable:
HERDING_MODE=rl → load an SB3 PPO policy from
HERDING_POLICY_DIR (default
training/runs/latest/best) and use its
(vx, vy) action each step.
HERDING_MODE=strombom → use the analytic Strömbom collect/drive
heuristic. This is the fallback if the RL
policy can't be loaded (e.g. SB3 not
installed in the Webots Python env, or no
checkpoint yet).
Both modes share the same low-level differential-drive controller
(``herding.diffdrive.velocity_to_wheels`` + clamped forward speed), so
switching modes does not retune the actuation layer.
A safety supervisor enforces the "dog stays out of the pen" invariant:
if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
overridden with a north-driving correction. This is a hard guarantee
the policy cannot escape.
"""
import math
from controller import Robot, Keyboard
import os
import sys
robot = Robot()
# --- Make the shared herding/ package importable from this controller dir ---
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
from controller import Robot
from herding.diffdrive import velocity_to_wheels
from herding.geometry import (
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
PEN_ENTRY,
)
from herding.obs import build_obs
from herding.sequential import compute_action_debug as sequential_action_debug
from herding.strombom import compute_action_debug as strombom_action_debug
# ---------------------------------------------------------------------------
# Mode selection
# ---------------------------------------------------------------------------
def _load_runtime_config():
"""Read mode + policy_dir overrides from a runtime config file.
Webots strips HERDING_* env vars in some configurations, so the
launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
in the project root and the controller reads it here. Env vars
win if both are present; the file is the fallback.
"""
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
if not os.path.exists(cfg_path):
return {}
out = {}
try:
with open(cfg_path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
out[k.strip().upper()] = v.strip()
except OSError:
return {}
return out
_runtime_cfg = _load_runtime_config()
MODE = (os.environ.get("HERDING_MODE")
or _runtime_cfg.get("HERDING_MODE")
or "rl").lower()
def _resolve_policy_dir() -> str:
"""Where to look for the trained policy.
Priority:
1. HERDING_POLICY_DIR env var (if set and points to a real dir)
2. training/runs/bc_pretrained/ (BC-only checkpoint)
3. training/runs/bc_ppo/best/ (PPO fine-tuned best)
4. training/runs/latest/best/ (legacy default)
"""
env_dir = (os.environ.get("HERDING_POLICY_DIR")
or _runtime_cfg.get("HERDING_POLICY_DIR"))
if env_dir and os.path.isdir(env_dir):
return env_dir
candidates = [
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_pretrained"),
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_ppo", "best"),
os.path.join(_PROJECT_ROOT, "training", "runs", "latest", "best"),
]
for c in candidates:
if os.path.isdir(c):
return c
# Last resort — return env var anyway so error message is informative.
return env_dir or candidates[0]
POLICY_DIR = _resolve_policy_dir()
policy_handle = None
if MODE == "rl":
print(f"[dog] HERDING_MODE={MODE} HERDING_POLICY_DIR(env)="
f"{os.environ.get('HERDING_POLICY_DIR', '<unset>')}")
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists="
f"{os.path.isdir(POLICY_DIR)}")
if os.path.isdir(POLICY_DIR):
try:
entries = sorted(os.listdir(POLICY_DIR))
except OSError:
entries = []
print(f"[dog] dir contents: {entries}")
try:
from policy_loader import load as _load_policy
policy_handle = _load_policy(POLICY_DIR)
print(f"[dog] RL policy loaded from {POLICY_DIR}")
except Exception as exc:
print(f"[dog] RL policy load failed ({exc!r}); falling back to Strömbom.")
MODE = "strombom"
if MODE not in ("rl", "strombom", "sequential"):
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
MODE = "strombom"
print(f"[dog] running in mode={MODE}")
# ---------------------------------------------------------------------------
# Action smoothing + safety supervisor
# ---------------------------------------------------------------------------
ACTION_SMOOTH = 0.35
prev_action = (0.0, 0.0)
def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
"""If the dog is near the south barrier and the action would push it
further south, override with a northward action. Hard invariant: the
dog never enters the pen."""
if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
return (0.0, 1.0)
if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
return (vx * 0.5, max(0.0, vy + 0.5))
return (vx, vy)
# ---------------------------------------------------------------------------
# Driving
# ---------------------------------------------------------------------------
def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
if math.hypot(vx, vy) < 1e-3:
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
return
n = compass.getValues()
h = math.atan2(n[0], n[1])
left, right = velocity_to_wheels(
vx, vy, h,
max_linear=DOG_MAX_LINEAR,
wheel_radius=DOG_WHEEL_RADIUS,
max_wheel_omega=motor_max,
k_turn=4.0,
)
left_motor.setVelocity(left)
right_motor.setVelocity(right)
# ---------------------------------------------------------------------------
# Webots devices
# ---------------------------------------------------------------------------
robot = Robot()
timestep = int(robot.getBasicTimeStep())
left_motor = robot.getDevice("left wheel motor")
left_motor = robot.getDevice("left wheel motor")
right_motor = robot.getDevice("right wheel motor")
left_motor.setPosition(float("inf"))
right_motor.setPosition(float("inf"))
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
lidar = robot.getDevice("lidar")
lidar.enable(timestep)
lidar.enablePointCloud()
gps = robot.getDevice("gps"); gps.enable(timestep)
compass = robot.getDevice("compass"); compass.enable(timestep)
emitter = robot.getDevice("emitter")
gps = robot.getDevice("gps"); gps.enable(timestep)
compass = robot.getDevice("compass"); compass.enable(timestep)
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")
left_ear = robot.getDevice("left ear motor")
# Cosmetic ear motors — ignored by control logic but keep them animated.
left_ear = robot.getDevice("left ear motor")
right_ear = robot.getDevice("right ear motor")
left_ear.setPosition(float("inf"))
right_ear.setPosition(float("inf"))
left_ear.setVelocity(0.0)
right_ear.setVelocity(0.0)
ear_phase = 0.0
EAR_AMPLITUDE = 0.35
EAR_RATE = 8.0
keyboard = robot.getKeyboard()
keyboard.enable(timestep)
MOTOR_MAX = left_motor.getMaxVelocity()
speed_level = 0.5 # fraction of MOTOR_MAX; adjusted by +/-
# ---------------------------------------------------------------------------
# Main loop
# ---------------------------------------------------------------------------
EAR_AMPLITUDE = 0.35 # rad, peak ear deflection
EAR_RATE = 8.0 # rad/s, how fast the ears are driven
ear_phase = 0.0
# {name: (x, y)} — kept across all sheep ever heard from. Sheep that drift
# into the pen are tracked by ``penned`` so observations and Strömbom
# agree on which ones still need herding.
sheep_positions: dict = {}
penned_set: set = set()
step_count = 0
from herding.geometry import is_penned_position
while robot.step(timestep) != -1:
speed = MOTOR_MAX * speed_level
turn = speed * 0.6 # differential turn radius
step_count += 1
left_vel = 0.0
right_vel = 0.0
key = keyboard.getKey()
while key > 0:
if key in (ord('W'), Keyboard.UP):
left_vel = speed
right_vel = speed
elif key in (ord('S'), Keyboard.DOWN):
left_vel = -speed
right_vel = -speed
elif key in (ord('A'), Keyboard.LEFT):
left_vel = -turn
right_vel = turn
elif key in (ord('D'), Keyboard.RIGHT):
left_vel = turn
right_vel = -turn
elif key in (ord('+'), ord('=')):
speed_level = min(1.0, speed_level + 0.1)
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
elif key in (ord('-'), ord('_')):
speed_level = max(0.1, speed_level - 0.1)
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
key = keyboard.getKey()
left_motor.setVelocity(left_vel)
right_motor.setVelocity(right_vel)
while receiver.getQueueLength() > 0:
msg = receiver.getString()
receiver.nextPacket()
parts = msg.split(":")
if len(parts) == 4 and parts[0] == "sheep":
try:
x, y = float(parts[2]), float(parts[3])
except ValueError:
continue
sheep_positions[parts[1]] = (x, y)
if parts[1] not in penned_set and is_penned_position(x, y):
penned_set.add(parts[1])
pos = gps.getValues()
emitter.send(f"dog:{pos[0]}:{pos[1]}")
dog_xy = (pos[0], pos[1])
n = compass.getValues()
dog_heading = math.atan2(n[0], n[1])
# ---- Action selection ----
if MODE == "rl" and policy_handle is not None:
sheep_xy_list = list(sheep_positions.values())
sheep_names = list(sheep_positions.keys())
sheep_penned_list = [s in penned_set for s in sheep_names]
obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
action = policy_handle.predict(obs)
vx, vy = float(action[0]), float(action[1])
elif MODE == "sequential":
vx, vy, _mode_str, _dbg = sequential_action_debug(
dog_xy, sheep_positions, PEN_ENTRY,
)
else:
# Strömbom (canonical baseline).
vx, vy, _mode_str, _dbg = strombom_action_debug(
dog_xy, sheep_positions, PEN_ENTRY,
)
# EMA smoothing — reduces oscillation from policy or Strömbom flips.
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
# Safety: dog must never enter the pen.
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
prev_action = (vx, vy)
drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
# Cosmetic ear wiggle — purely visual.
ear_phase += 0.12
ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
left_ear.setVelocity(EAR_RATE)
right_ear.setVelocity(EAR_RATE)
left_ear.setPosition( ear_pos)
left_ear.setPosition(ear_pos)
right_ear.setPosition(-ear_pos)
if step_count % 200 == 0:
n_active = sum(1 for s in sheep_positions if s not in penned_set)
print(f"[dog mode={MODE}] step={step_count} known={len(sheep_positions)} "
f"penned={len(penned_set)} active={n_active} action=({vx:+.2f}, {vy:+.2f})")
Binary file not shown.
-153
View File
@@ -1,153 +0,0 @@
"""
Render Webots-side debug trajectory from debug.csv.
The shepherd_dog_rl controller writes per-step state to debug.csv when
DOG_DEBUG=1. This script reads it and produces:
trajectory.png — dog path + sheep paths overlaid on the field
obs_drift.png — normalized observation distribution over time
actions.png — vx, vy time series
Run:
python plot_debug.py # uses debug.csv next to this file
python plot_debug.py --csv path/to.csv --out-dir somewhere/
"""
import argparse
import csv
import os
import sys
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
def load_csv(path):
rows = []
with open(path) as f:
rd = csv.DictReader(f)
for r in rd:
rows.append(r)
if not rows:
sys.exit(f"empty CSV: {path}")
return rows
def parse_floats(s):
return [float(x) for x in s.split(";") if x]
def plot_trajectory(rows, out_path):
fig, ax = plt.subplots(figsize=(7, 7))
ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
ax.set_facecolor("#dcedc8")
ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
fill=False, edgecolor="#795548", lw=2))
ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
facecolor="#ffe082", edgecolor="#795548", lw=2))
ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
dog_x = [float(r["dog_x"]) for r in rows]
dog_y = [float(r["dog_y"]) for r in rows]
ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
# Sheep — re-shape into per-sheep tracks
sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
if sx_all and sx_all[-1]:
n_sheep = len(sx_all[-1])
palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
"#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
for i in range(n_sheep):
xs = [r[i] if i < len(r) else None for r in sx_all]
ys = [r[i] if i < len(r) else None for r in sy_all]
xs = [x for x in xs if x is not None]
ys = [y for y in ys if y is not None]
if xs:
c = palette[i % len(palette)]
ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
ax.plot(xs[0], ys[0], "o", color=c, ms=6)
ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
n_in_pen = int(rows[-1]["n_penned"])
ax.set_title(f"Webots trajectory {len(rows)} steps penned={n_in_pen}",
fontsize=12)
ax.legend(loc="upper left", fontsize=7, ncol=2)
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_actions(rows, out_path):
t = np.arange(len(rows))
vx = np.array([float(r["vx"]) for r in rows])
vy = np.array([float(r["vy"]) for r in rows])
mag = np.sqrt(vx ** 2 + vy ** 2)
fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
axes[2].axhline(1.0, color="gray", ls="--", lw=1)
axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
fig.suptitle("Webots action time series")
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_obs(rows, out_path):
norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
raw = np.array([parse_floats(r["raw_obs"]) for r in rows])
if norm.size == 0:
return
n_dims = norm.shape[1]
labels = [
"dog_x", "dog_y", "com-dog_x", "com-dog_y",
"far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
"far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
"pen-far1_x", "pen-far1_y", "radius", "frac_active",
][:n_dims]
t = np.arange(norm.shape[0])
fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
if n_dims == 1: axes = [axes]
for i in range(n_dims):
axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
axes[i].set_ylabel(labels[i], fontsize=8)
axes[i].tick_params(labelsize=7)
if i == 0:
axes[i].legend(fontsize=7, loc="upper right")
axes[-1].set_xlabel("step")
fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
plt.tight_layout()
fig.savefig(out_path, dpi=110)
plt.close(fig)
def main():
p = argparse.ArgumentParser()
here = os.path.dirname(os.path.abspath(__file__))
p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
args = p.parse_args()
rows = load_csv(args.csv)
os.makedirs(args.out_dir, exist_ok=True)
print(f"loaded {len(rows)} rows from {args.csv}")
plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
plot_actions(rows, os.path.join(args.out_dir, "actions.png"))
plot_obs(rows, os.path.join(args.out_dir, "obs.png"))
print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
if __name__ == "__main__":
main()
@@ -1,285 +0,0 @@
"""
Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
Setup
-----
1. Copy your trained files into this directory:
controllers/shepherd_dog_rl/final_model.zip
controllers/shepherd_dog_rl/vecnorm.pkl
2. In field.wbt, set the ShepherdDog robot's controller field to
"shepherd_dog_rl". You can do this in the Webots GUI:
click the robot → Controller → shepherd_dog_rl
3. Optional: set controllerArgs to ["5"] (number of sheep) if it differs
from the default of 5.
The controller reads GPS (dog position) and Receiver (sheep broadcasts),
builds the same 16-dim flock observation the training env used, normalises
it with the saved VecNormalize stats, and converts the (vx, vy) policy
output into differential wheel speeds.
Debug logging
-------------
Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
raw obs, normalised obs, action) to debug.csv alongside this script. Use
plot_debug.py to render trajectories from it.
"""
import sys
import os
import math
import struct
import numpy as np
# ── make training code importable ───────────────────────────────────────────
_HERE = os.path.dirname(os.path.abspath(__file__))
_TRAINING = os.path.join(_HERE, "..", "..", "training")
sys.path.insert(0, _TRAINING)
from controller import Robot
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
# ── constants (must match herding_env.py) ───────────────────────────────────
FIELD = 15.0
PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
PEN_X = (10.0, 13.0)
PEN_Y = (-15.0, -8.0)
DOG_SPEED = 2.5 # m/s
WHEEL_R = 0.038 # wheel radius (metres) — from ShepherdDog.proto
K_TURN = 4.0 # heading-error gain (rad/s per rad)
EAR_AMPLITUDE = 0.35
EAR_RATE = 8.0
# ── model paths ─────────────────────────────────────────────────────────────
MODEL_PATH = os.path.join(_HERE, "final_model.zip")
VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
DEBUG_ENABLED = True # set False to disable debug.csv logging
# ── action smoothing ─────────────────────────────────────────────────────────
# EMA on policy output to suppress the rapid oscillation (vx/vy flipping
# between -1 and +1 every step) that stalls the physical dog. 0 = no
# smoothing (raw policy), 1 = frozen. 0.3 keeps ~30% of previous action.
ACTION_SMOOTH = 0.3
prev_action = np.zeros(2, dtype=np.float32)
def norm_angle(a: float) -> float:
while a > math.pi: a -= 2 * math.pi
while a < -math.pi: a += 2 * math.pi
return a
def in_pen(x: float, y: float) -> bool:
return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
def build_obs(dog_pos: np.ndarray,
sheep_dict: dict,
n_sheep: int,
dog_heading: float = 0.0) -> np.ndarray:
"""
Build the 18-dim flock observation — identical to HerdingEnv._obs().
sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
dog_heading: dog's current world-frame heading in radians.
"""
D = 2 * FIELD
# Split active vs penned
active_pos = np.array(
[v for v in sheep_dict.values() if not in_pen(*v)],
dtype=np.float32
)
n_active = len(active_pos)
if n_active > 0:
com = active_pos.mean(axis=0)
d_from_com = np.linalg.norm(active_pos - com, axis=1)
sorted_idx = np.argsort(d_from_com)[::-1]
radius = float(d_from_com[sorted_idx[0]])
def nth(n):
return active_pos[sorted_idx[n]] if len(sorted_idx) > n else com
far1, far2, far3 = nth(0), nth(1), nth(2)
else:
com = PEN_CENTER.copy()
radius = 0.0
far1 = far2 = far3 = PEN_CENTER.copy()
frac_active = n_active / max(n_sheep, 1)
return np.array([
dog_pos[0] / FIELD, dog_pos[1] / FIELD,
(com[0] - dog_pos[0]) / D, (com[1] - dog_pos[1]) / D,
(far1[0] - com[0]) / D, (far1[1] - com[1]) / D,
(far2[0] - com[0]) / D, (far2[1] - com[1]) / D,
(far3[0] - com[0]) / D, (far3[1] - com[1]) / D,
(PEN_CENTER[0] - com[0]) / D, (PEN_CENTER[1] - com[1]) / D,
(PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
radius / D,
frac_active,
math.cos(dog_heading), math.sin(dog_heading),
], dtype=np.float32)
# ── Webots setup ─────────────────────────────────────────────────────────────
robot = Robot()
timestep = int(robot.getBasicTimeStep())
# Drive motors
left_motor = robot.getDevice("left wheel motor")
right_motor = robot.getDevice("right wheel motor")
left_motor.setPosition(float("inf"))
right_motor.setPosition(float("inf"))
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
MOTOR_MAX = left_motor.getMaxVelocity()
# Sensors
gps = robot.getDevice("gps"); gps.enable(timestep)
compass = robot.getDevice("compass"); compass.enable(timestep)
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
emitter = robot.getDevice("emitter")
# Cosmetic
left_ear = robot.getDevice("left ear motor")
right_ear = robot.getDevice("right ear motor")
left_ear.setPosition(float("inf")); right_ear.setPosition(float("inf"))
left_ear.setVelocity(0.0); right_ear.setVelocity(0.0)
ear_phase = 0.0
# Number of sheep (from controllerArgs or default)
try:
n_sheep = int(sys.argv[1])
except (IndexError, ValueError):
n_sheep = 3
# ── Load model ───────────────────────────────────────────────────────────────
print(f"[RL dog] Loading model from {MODEL_PATH}")
print(f"[RL dog] Loading vecnorm from {VECNORM_PATH}")
dummy_env = DummyVecEnv([lambda: HerdingEnv(n_sheep=n_sheep)])
vecnorm = VecNormalize.load(VECNORM_PATH, dummy_env)
vecnorm.training = False
vecnorm.norm_reward = False
model = PPO.load(MODEL_PATH, device="cpu")
print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
# ── Runtime state ─────────────────────────────────────────────────────────────
sheep_positions: dict = {} # {name: (x, y)} — updated every step from receiver
step_count = 0
# Debug CSV — written every step when DOG_DEBUG=1
debug_file = None
if DEBUG_ENABLED:
import csv
debug_file = open(DEBUG_CSV, "w", newline="")
debug_writer = csv.writer(debug_file)
debug_writer.writerow([
"step", "dog_x", "dog_y", "heading",
"sheep_xs", "sheep_ys", "n_active", "n_penned",
"raw_obs", "norm_obs", "vx", "vy",
])
print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
def bearing() -> float:
"""Current robot heading in world frame (radians)."""
n = compass.getValues()
return math.atan2(n[0], n[1])
def drive(action_vx: float, action_vy: float) -> None:
"""Convert (vx, vy) policy action to differential wheel speeds."""
speed_ms = math.sqrt(action_vx ** 2 + action_vy ** 2) * DOG_SPEED
if speed_ms < 0.05:
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
return
target_heading = math.atan2(action_vy, action_vx)
err = norm_angle(target_heading - bearing())
fwd_ms = speed_ms * max(0.0, math.cos(err))
fwd_rad = fwd_ms / WHEEL_R
turn = K_TURN * err # rad/s correction
l = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad - turn))
r = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad + turn))
left_motor.setVelocity(l)
right_motor.setVelocity(r)
# ── Main loop ─────────────────────────────────────────────────────────────────
while robot.step(timestep) != -1:
step_count += 1
# 1. Drain receiver — update sheep position table
while receiver.getQueueLength() > 0:
try:
msg = receiver.getString()
parts = msg.split(":")
if parts[0] == "sheep" and len(parts) == 4:
sheep_positions[parts[1]] = (float(parts[2]), float(parts[3]))
except Exception:
pass
receiver.nextPacket()
# 2. Dog GPS
gps_vals = gps.getValues()
dog_pos = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)
# 3. Build and normalise observation (heading from compass)
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep,
dog_heading=bearing())
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
# 4. Policy inference + smoothing
action, _ = model.predict(obs_norm, deterministic=True)
raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
if ACTION_SMOOTH > 0:
smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
prev_action[:] = smoothed
vx, vy = float(smoothed[0]), float(smoothed[1])
else:
vx, vy = float(raw_a[0]), float(raw_a[1])
# 5. Drive
drive(vx, vy)
# 6. Broadcast dog position so sheep can compute flee forces
emitter.send(f"dog:{dog_pos[0]:.4f}:{dog_pos[1]:.4f}")
# 7. Ear animation
ear_phase += 0.12
ep = EAR_AMPLITUDE * math.sin(ear_phase)
left_ear.setVelocity(EAR_RATE); right_ear.setVelocity(EAR_RATE)
left_ear.setPosition( ep); right_ear.setPosition(-ep)
# Periodic status
if step_count % 100 == 0:
n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
print(f"[RL dog] step={step_count} known_sheep={len(sheep_positions)}"
f" penned={n_in_pen}/{n_sheep} dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
f" action=({vx:.2f}, {vy:.2f})")
# Debug CSV row
if debug_file is not None:
n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
n_in_pen = len(sheep_positions) - n_active
debug_writer.writerow([
step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
f"{bearing():.4f}",
";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
n_active, n_in_pen,
";".join(f"{x:.4f}" for x in raw_obs),
";".join(f"{x:.4f}" for x in obs_norm[0]),
f"{vx:.4f}", f"{vy:.4f}",
])
if step_count % 200 == 0:
debug_file.flush()
Binary file not shown.