Checkpoint 2
This commit is contained in:
+78
-163
@@ -1,233 +1,148 @@
|
||||
"""
|
||||
Sheep flocking controller (Webots, Reynolds boids variant).
|
||||
"""Sheep flocking controller (Webots).
|
||||
|
||||
Each sheep broadcasts its GPS position every 3 steps on channel 1 and
|
||||
listens for the dog and peer sheep positions. Peers are keyed by robot
|
||||
name so each neighbour has exactly one current entry in the dict.
|
||||
listens for the dog and peer sheep positions. The behavioural step is
|
||||
delegated to ``herding.flocking_sim.compute_heading_speed`` so the
|
||||
training environment and Webots run identical sheep dynamics.
|
||||
|
||||
Force stack each step (summed then converted to a heading + speed):
|
||||
flee — away from dog, quadratic ramp, dominant when close
|
||||
cohesion — toward flock centre, halved while fleeing
|
||||
separation — inverse-distance push, prevents physical overlap
|
||||
walls — linear repulsion from field boundary
|
||||
wander — small persistent drift for natural idle motion
|
||||
|
||||
Pen behaviour: on first entry into the quarantine pen the sheep latches
|
||||
permanently — it turns pink (via the exposed woolColor PROTO field) and
|
||||
the normal force stack is replaced by pen-confinement forces only.
|
||||
Pen behaviour: a sheep latches to ``penned`` the first time it crosses
|
||||
the south-wall gate plane into the gate corridor. Once latched it turns
|
||||
pink (via the exposed ``woolColor`` PROTO field) and the force stack
|
||||
switches to in-pen containment.
|
||||
"""
|
||||
|
||||
import random
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
# --- Make the shared herding/ package importable from this controller dir ---
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
from controller import Supervisor
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tuning constants
|
||||
# ---------------------------------------------------------------------------
|
||||
from herding.diffdrive import heading_speed_to_wheels
|
||||
from herding.flocking_sim import MAX_SPEED, compute_heading_speed
|
||||
from herding.geometry import (
|
||||
SHEEP_MAX_WHEEL_OMEGA,
|
||||
is_penned_position,
|
||||
)
|
||||
|
||||
MAX_SPEED = 22.0 # rad/s hard clamp on both motors
|
||||
FLEE_SPEED = 20.0 # rad/s upper bound while panicking
|
||||
WANDER_SPEED = 3.0 # rad/s lower bound during calm wandering
|
||||
|
||||
X_MIN, X_MAX = -14.5, 14.5 # stone wall inner edges (metres)
|
||||
Y_MIN, Y_MAX = -14.5, 14.5
|
||||
WALL_MARGIN = 3.5 # avoidance starts this far from the wall
|
||||
|
||||
FLEE_DIST = 7.0 # dog within this radius triggers flee (metres)
|
||||
SEPARATION_DIST = 2.5 # inverse-distance push active inside this radius
|
||||
COHESION_DIST = 8.0 # pull toward flock centre active inside this radius
|
||||
|
||||
PEN_X_MIN, PEN_X_MAX = 10.0, 13.0 # quarantine pen extents (metres)
|
||||
PEN_Y_MIN, PEN_Y_MAX = -15.0, -8.0 # open entrance at y=-8, gate at y=-15
|
||||
PEN_MARGIN = 0.8 # confinement force starts this far from pen wall
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Device setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
robot = Supervisor()
|
||||
robot = Supervisor()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
name = robot.getName()
|
||||
name = robot.getName()
|
||||
self_node = robot.getSelf()
|
||||
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
right_motor = robot.getDevice("right wheel motor")
|
||||
left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
MOTOR_MAX = min(left_motor.getMaxVelocity(), SHEEP_MAX_WHEEL_OMEGA)
|
||||
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
emitter = robot.getDevice("emitter")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def norm_angle(a):
|
||||
return math.atan2(math.sin(a), math.cos(a))
|
||||
|
||||
|
||||
def bearing():
|
||||
# Compass returns north direction in sensor frame; for this Z-up world
|
||||
# with north = +Y, atan2(n[0], n[1]) gives the standard math angle
|
||||
# (0 = east, π/2 = north) matching atan2(fy, fx) used for heading.
|
||||
# (0 = east, π/2 = north) matching atan2(fy, fx) used for headings.
|
||||
n = compass.getValues()
|
||||
return math.atan2(n[0], n[1])
|
||||
|
||||
|
||||
def drive(heading, speed):
|
||||
err = norm_angle(heading - bearing())
|
||||
# Scale forward component by cos(err): at 90° error fwd→0 so the robot
|
||||
# spins in place to realign rather than driving sideways at full speed.
|
||||
fwd = speed * max(0.0, math.cos(err))
|
||||
k = 4.0
|
||||
left_motor.setVelocity( max(-MAX_SPEED, min(MAX_SPEED, fwd - k * err)))
|
||||
right_motor.setVelocity(max(-MAX_SPEED, min(MAX_SPEED, fwd + k * err)))
|
||||
def drive(heading, speed_motor):
|
||||
left_w, right_w = heading_speed_to_wheels(
|
||||
heading, min(speed_motor, MAX_SPEED), bearing(), MOTOR_MAX, k_turn=4.0
|
||||
)
|
||||
left_motor.setVelocity(left_w)
|
||||
right_motor.setVelocity(right_w)
|
||||
|
||||
|
||||
def paint_pink():
|
||||
# woolColor is declared as a PROTO field with IS binding to the DEF WOOL
|
||||
# PBRAppearance baseColor. Changing it here propagates to every USE WOOL
|
||||
# shape on the body. Direct field access avoids PROTO-internal opacity.
|
||||
# PBRAppearance baseColor; setting it propagates to every USE WOOL shape.
|
||||
self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
wander_angle = random.uniform(-math.pi, math.pi)
|
||||
step = 0
|
||||
dog_x = None
|
||||
dog_y = None
|
||||
peers = {} # name → (x, y), one entry per neighbour, cleared every 30 steps
|
||||
step_count = 0
|
||||
dog_x, dog_y = None, None
|
||||
peers = {} # name → (x, y), one entry per neighbour, cleared every 30 steps
|
||||
penned = False
|
||||
|
||||
# Stuck detection: differential-drive sheep can pin against a wall and need
|
||||
# a forced reverse-and-rotate to escape. If displacement < STUCK_DIST for
|
||||
# STUCK_STEPS consecutive steps, drive toward field centre.
|
||||
_prev_x, _prev_y = None, None
|
||||
_stuck_count = 0
|
||||
STUCK_STEPS = 20
|
||||
STUCK_DIST = 0.05
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main loop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
while robot.step(timestep) != -1:
|
||||
step += 1
|
||||
step_count += 1
|
||||
pos = gps.getValues()
|
||||
x, y = pos[0], pos[1]
|
||||
|
||||
# Pen entry: one-way latch, never unset
|
||||
if not penned and PEN_X_MIN < x < PEN_X_MAX and PEN_Y_MIN < y < PEN_Y_MAX:
|
||||
# Pen entry: one-way latch. Penned sheep get pink wool and switch behaviour.
|
||||
if not penned and is_penned_position(x, y):
|
||||
penned = True
|
||||
paint_pink()
|
||||
|
||||
# Refresh peer table (clear before receiving so fresh data is never lost)
|
||||
if step % 30 == 0:
|
||||
# Refresh peer table — clear before receiving so fresh data is never lost.
|
||||
if step_count % 30 == 0:
|
||||
peers.clear()
|
||||
while receiver.getQueueLength() > 0:
|
||||
msg = receiver.getString()
|
||||
receiver.nextPacket()
|
||||
p = msg.split(":")
|
||||
if p[0] == "dog" and len(p) >= 3:
|
||||
dog_x, dog_y = float(p[1]), float(p[2])
|
||||
elif p[0] == "sheep" and len(p) >= 4 and p[1] != name:
|
||||
peers[p[1]] = (float(p[2]), float(p[3]))
|
||||
parts = msg.split(":")
|
||||
if parts[0] == "dog" and len(parts) >= 3:
|
||||
dog_x, dog_y = float(parts[1]), float(parts[2])
|
||||
elif parts[0] == "sheep" and len(parts) >= 4 and parts[1] != name:
|
||||
peers[parts[1]] = (float(parts[2]), float(parts[3]))
|
||||
|
||||
fx, fy = 0.0, 0.0
|
||||
dog_xy = (dog_x, dog_y) if dog_x is not None and dog_y is not None else None
|
||||
heading, speed, wander_angle = compute_heading_speed(
|
||||
x=x, y=y, penned=penned, dog_xy=dog_xy, peers=peers,
|
||||
wander_angle=wander_angle,
|
||||
)
|
||||
|
||||
# Repel unpenned sheep from the exterior of the pen's side walls so they
|
||||
# don't get pinned by flee forces. Only fires when strictly outside the pen
|
||||
# (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range).
|
||||
# Entrance is open on the north (y > PEN_Y_MAX) — no force there.
|
||||
PEN_EXT_MARGIN = 0.8
|
||||
if not penned and PEN_Y_MIN < y < PEN_Y_MAX:
|
||||
if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN:
|
||||
fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0
|
||||
if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN:
|
||||
fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0
|
||||
# Stuck detection — safety net for differential-drive wall pinning.
|
||||
if _prev_x is not None:
|
||||
moved = math.hypot(x - _prev_x, y - _prev_y)
|
||||
_stuck_count = _stuck_count + 1 if moved < STUCK_DIST else 0
|
||||
if _stuck_count >= STUCK_STEPS:
|
||||
heading = math.atan2(-y, -x) # always points away from the boundary
|
||||
speed = MAX_SPEED
|
||||
_stuck_count = 0
|
||||
_prev_x, _prev_y = x, y
|
||||
|
||||
if penned:
|
||||
# Inside pen: wander freely, strong boundary forces prevent exit,
|
||||
# separation still active to avoid collisions with other penned sheep.
|
||||
|
||||
pm = PEN_MARGIN
|
||||
if x < PEN_X_MIN + pm: fx += ((PEN_X_MIN + pm - x) / pm) * 15.0
|
||||
if x > PEN_X_MAX - pm: fx -= ((x - (PEN_X_MAX - pm)) / pm) * 15.0
|
||||
if y < PEN_Y_MIN + pm: fy += ((PEN_Y_MIN + pm - y) / pm) * 15.0
|
||||
if y > PEN_Y_MAX - pm: fy -= ((y - (PEN_Y_MAX - pm)) / pm) * 15.0
|
||||
|
||||
for px, py in peers.values():
|
||||
dx, dy = px - x, py - y
|
||||
d = math.hypot(dx, dy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (dx / d) * push * 2.5
|
||||
fy -= (dy / d) * push * 2.5
|
||||
|
||||
if random.random() < 0.02:
|
||||
wander_angle += random.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
else:
|
||||
fleeing = False
|
||||
|
||||
# Flee — quadratic ramp so force grows rapidly as the dog closes in
|
||||
if dog_x is not None:
|
||||
dx = dog_x - x
|
||||
dy = dog_y - y
|
||||
dist = math.hypot(dx, dy)
|
||||
if 0.01 < dist < FLEE_DIST:
|
||||
fleeing = True
|
||||
t = 1.0 - dist / FLEE_DIST
|
||||
s = t * t * 20.0
|
||||
fx -= (dx / dist) * s
|
||||
fy -= (dy / dist) * s
|
||||
|
||||
# Cohesion — halved while fleeing to reduce mid-panic collisions
|
||||
cx, cy, cn = 0.0, 0.0, 0
|
||||
for px, py in peers.values():
|
||||
d = math.hypot(px - x, py - y)
|
||||
if 0.3 < d < COHESION_DIST:
|
||||
cx += px; cy += py; cn += 1
|
||||
if cn > 0:
|
||||
w = 0.08 if fleeing else 0.15
|
||||
fx += (cx / cn - x) * w
|
||||
fy += (cy / cn - y) * w
|
||||
|
||||
# Separation — inverse-distance: huge when nearly overlapping, fades quickly
|
||||
for px, py in peers.values():
|
||||
dx, dy = px - x, py - y
|
||||
d = math.hypot(dx, dy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (dx / d) * push * 2.5
|
||||
fy -= (dy / d) * push * 2.5
|
||||
|
||||
# Walls
|
||||
if x < X_MIN + WALL_MARGIN: fx += ((X_MIN + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
|
||||
if x > X_MAX - WALL_MARGIN: fx -= ((x - (X_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
if y < Y_MIN + WALL_MARGIN: fy += ((Y_MIN + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
|
||||
if y > Y_MAX - WALL_MARGIN: fy -= ((y - (Y_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
|
||||
# Wander — suppressed while fleeing so drift cannot deflect the flee heading
|
||||
if not fleeing:
|
||||
if random.random() < 0.02:
|
||||
wander_angle += random.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
# Hard-stop clamp: within 0.5 m of a wall, zero any force component that
|
||||
# would push further into it. Prevents the flee force from pinning a sheep
|
||||
# against the boundary when the dog approaches from outside.
|
||||
HS = 0.5
|
||||
if x < X_MIN + HS and fx < 0: fx = 0.0
|
||||
if x > X_MAX - HS and fx > 0: fx = 0.0
|
||||
if y < Y_MIN + HS and fy < 0: fy = 0.0
|
||||
if y > Y_MAX - HS and fy > 0: fy = 0.0
|
||||
|
||||
heading = math.atan2(fy, fx)
|
||||
mag = math.hypot(fx, fy)
|
||||
speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
|
||||
drive(heading, speed)
|
||||
|
||||
if step % 3 == 0:
|
||||
if step_count % 3 == 0:
|
||||
emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
"""Lazy loader for the SB3 PPO policy used by the dog controller.
|
||||
|
||||
Importing stable-baselines3 inside the Webots Python interpreter is only
|
||||
needed when ``HERDING_MODE=rl``; the Strömbom mode runs without it. This
|
||||
loader keeps SB3 out of the import path until you actually ask for the RL
|
||||
policy, so users without SB3 installed can still run the Strömbom
|
||||
baseline.
|
||||
|
||||
The policy + VecNormalize statistics are saved together by
|
||||
``training/train_ppo.py``:
|
||||
|
||||
runs/<name>/best/best_model.zip # SB3 PPO checkpoint
|
||||
runs/<name>/best/vecnormalize.pkl # observation-normaliser stats
|
||||
|
||||
Pass either the directory or the explicit zip path.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class PolicyHandle:
|
||||
"""Wrap a loaded PPO policy + VecNormalize so the controller can call
|
||||
``predict(obs)`` without thinking about either."""
|
||||
|
||||
def __init__(self, model, vecnorm):
|
||||
self.model = model
|
||||
self.vecnorm = vecnorm
|
||||
|
||||
def predict(self, obs):
|
||||
# VecNormalize expects a batched obs of shape (n_envs, obs_dim).
|
||||
if self.vecnorm is not None:
|
||||
import numpy as np
|
||||
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
|
||||
obs_b = self.vecnorm.normalize_obs(obs_b)
|
||||
else:
|
||||
import numpy as np
|
||||
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
|
||||
action, _ = self.model.predict(obs_b, deterministic=True)
|
||||
return action[0]
|
||||
|
||||
|
||||
def load(model_path: str, vecnorm_path: str | None = None) -> PolicyHandle:
|
||||
"""Load a PPO model (and optional VecNormalize) from disk.
|
||||
|
||||
``model_path`` may be the .zip checkpoint or a directory containing
|
||||
``best_model.zip`` (and optionally ``vecnormalize.pkl``).
|
||||
"""
|
||||
p = Path(model_path)
|
||||
if p.is_dir():
|
||||
zip_candidates = [p / "best_model.zip", p / "final.zip", p / "policy.zip"]
|
||||
zip_path = next((z for z in zip_candidates if z.exists()), None)
|
||||
if zip_path is None:
|
||||
raise FileNotFoundError(
|
||||
f"No PPO zip found in {p} (looked for best_model.zip, final.zip, policy.zip)"
|
||||
)
|
||||
if vecnorm_path is None:
|
||||
vn = p / "vecnormalize.pkl"
|
||||
if vn.exists():
|
||||
vecnorm_path = str(vn)
|
||||
else:
|
||||
zip_path = p
|
||||
|
||||
# Imports deferred so the Strömbom path doesn't require SB3.
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import VecNormalize
|
||||
|
||||
model = PPO.load(str(zip_path), device="auto")
|
||||
vecnorm = None
|
||||
if vecnorm_path and os.path.exists(vecnorm_path):
|
||||
# VecNormalize.load needs a venv to attach to; we only need its stats
|
||||
# at inference, so we reconstruct the wrapper manually.
|
||||
import pickle
|
||||
with open(vecnorm_path, "rb") as f:
|
||||
vecnorm = pickle.load(f)
|
||||
vecnorm.training = False
|
||||
vecnorm.norm_reward = False
|
||||
return PolicyHandle(model=model, vecnorm=vecnorm)
|
||||
@@ -1,88 +1,283 @@
|
||||
"""
|
||||
Shepherd Dog controller (Webots, manual keyboard control).
|
||||
"""Shepherd Dog controller (Webots).
|
||||
|
||||
WASD / arrow keys drive the robot. +/- adjust speed in 10 % increments.
|
||||
GPS position is broadcast every step on channel 1 so sheep controllers
|
||||
can compute flee forces. Ears wag continuously via sinusoidal position
|
||||
targets — purely cosmetic.
|
||||
Runs in one of two modes selected by the ``HERDING_MODE`` environment
|
||||
variable:
|
||||
|
||||
HERDING_MODE=rl → load an SB3 PPO policy from
|
||||
HERDING_POLICY_DIR (default
|
||||
training/runs/latest/best) and use its
|
||||
(vx, vy) action each step.
|
||||
HERDING_MODE=strombom → use the analytic Strömbom collect/drive
|
||||
heuristic. This is the fallback if the RL
|
||||
policy can't be loaded (e.g. SB3 not
|
||||
installed in the Webots Python env, or no
|
||||
checkpoint yet).
|
||||
|
||||
Both modes share the same low-level differential-drive controller
|
||||
(``herding.diffdrive.velocity_to_wheels`` + clamped forward speed), so
|
||||
switching modes does not retune the actuation layer.
|
||||
|
||||
A safety supervisor enforces the "dog stays out of the pen" invariant:
|
||||
if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
|
||||
overridden with a north-driving correction. This is a hard guarantee
|
||||
the policy cannot escape.
|
||||
"""
|
||||
|
||||
import math
|
||||
from controller import Robot, Keyboard
|
||||
import os
|
||||
import sys
|
||||
|
||||
robot = Robot()
|
||||
# --- Make the shared herding/ package importable from this controller dir ---
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
from controller import Robot
|
||||
|
||||
from herding.diffdrive import velocity_to_wheels
|
||||
from herding.geometry import (
|
||||
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
|
||||
DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
|
||||
PEN_ENTRY,
|
||||
)
|
||||
from herding.obs import build_obs
|
||||
from herding.sequential import compute_action_debug as sequential_action_debug
|
||||
from herding.strombom import compute_action_debug as strombom_action_debug
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mode selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_runtime_config():
|
||||
"""Read mode + policy_dir overrides from a runtime config file.
|
||||
|
||||
Webots strips HERDING_* env vars in some configurations, so the
|
||||
launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
|
||||
in the project root and the controller reads it here. Env vars
|
||||
win if both are present; the file is the fallback.
|
||||
"""
|
||||
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
|
||||
if not os.path.exists(cfg_path):
|
||||
return {}
|
||||
out = {}
|
||||
try:
|
||||
with open(cfg_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
out[k.strip().upper()] = v.strip()
|
||||
except OSError:
|
||||
return {}
|
||||
return out
|
||||
|
||||
|
||||
_runtime_cfg = _load_runtime_config()
|
||||
MODE = (os.environ.get("HERDING_MODE")
|
||||
or _runtime_cfg.get("HERDING_MODE")
|
||||
or "rl").lower()
|
||||
|
||||
|
||||
def _resolve_policy_dir() -> str:
|
||||
"""Where to look for the trained policy.
|
||||
|
||||
Priority:
|
||||
1. HERDING_POLICY_DIR env var (if set and points to a real dir)
|
||||
2. training/runs/bc_pretrained/ (BC-only checkpoint)
|
||||
3. training/runs/bc_ppo/best/ (PPO fine-tuned best)
|
||||
4. training/runs/latest/best/ (legacy default)
|
||||
"""
|
||||
env_dir = (os.environ.get("HERDING_POLICY_DIR")
|
||||
or _runtime_cfg.get("HERDING_POLICY_DIR"))
|
||||
if env_dir and os.path.isdir(env_dir):
|
||||
return env_dir
|
||||
candidates = [
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_pretrained"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_ppo", "best"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "latest", "best"),
|
||||
]
|
||||
for c in candidates:
|
||||
if os.path.isdir(c):
|
||||
return c
|
||||
# Last resort — return env var anyway so error message is informative.
|
||||
return env_dir or candidates[0]
|
||||
|
||||
|
||||
POLICY_DIR = _resolve_policy_dir()
|
||||
|
||||
policy_handle = None
|
||||
if MODE == "rl":
|
||||
print(f"[dog] HERDING_MODE={MODE} HERDING_POLICY_DIR(env)="
|
||||
f"{os.environ.get('HERDING_POLICY_DIR', '<unset>')}")
|
||||
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists="
|
||||
f"{os.path.isdir(POLICY_DIR)}")
|
||||
if os.path.isdir(POLICY_DIR):
|
||||
try:
|
||||
entries = sorted(os.listdir(POLICY_DIR))
|
||||
except OSError:
|
||||
entries = []
|
||||
print(f"[dog] dir contents: {entries}")
|
||||
try:
|
||||
from policy_loader import load as _load_policy
|
||||
policy_handle = _load_policy(POLICY_DIR)
|
||||
print(f"[dog] RL policy loaded from {POLICY_DIR}")
|
||||
except Exception as exc:
|
||||
print(f"[dog] RL policy load failed ({exc!r}); falling back to Strömbom.")
|
||||
MODE = "strombom"
|
||||
if MODE not in ("rl", "strombom", "sequential"):
|
||||
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
||||
MODE = "strombom"
|
||||
print(f"[dog] running in mode={MODE}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Action smoothing + safety supervisor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ACTION_SMOOTH = 0.35
|
||||
prev_action = (0.0, 0.0)
|
||||
|
||||
|
||||
def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
|
||||
"""If the dog is near the south barrier and the action would push it
|
||||
further south, override with a northward action. Hard invariant: the
|
||||
dog never enters the pen."""
|
||||
if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
|
||||
return (0.0, 1.0)
|
||||
if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
|
||||
return (vx * 0.5, max(0.0, vy + 0.5))
|
||||
return (vx, vy)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Driving
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
|
||||
if math.hypot(vx, vy) < 1e-3:
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
return
|
||||
n = compass.getValues()
|
||||
h = math.atan2(n[0], n[1])
|
||||
left, right = velocity_to_wheels(
|
||||
vx, vy, h,
|
||||
max_linear=DOG_MAX_LINEAR,
|
||||
wheel_radius=DOG_WHEEL_RADIUS,
|
||||
max_wheel_omega=motor_max,
|
||||
k_turn=4.0,
|
||||
)
|
||||
left_motor.setVelocity(left)
|
||||
right_motor.setVelocity(right)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Webots devices
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
robot = Robot()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
right_motor = robot.getDevice("right wheel motor")
|
||||
left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
||||
|
||||
lidar = robot.getDevice("lidar")
|
||||
lidar.enable(timestep)
|
||||
lidar.enablePointCloud()
|
||||
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
|
||||
left_ear = robot.getDevice("left ear motor")
|
||||
# Cosmetic ear motors — ignored by control logic but keep them animated.
|
||||
left_ear = robot.getDevice("left ear motor")
|
||||
right_ear = robot.getDevice("right ear motor")
|
||||
left_ear.setPosition(float("inf"))
|
||||
right_ear.setPosition(float("inf"))
|
||||
left_ear.setVelocity(0.0)
|
||||
right_ear.setVelocity(0.0)
|
||||
ear_phase = 0.0
|
||||
EAR_AMPLITUDE = 0.35
|
||||
EAR_RATE = 8.0
|
||||
|
||||
keyboard = robot.getKeyboard()
|
||||
keyboard.enable(timestep)
|
||||
|
||||
MOTOR_MAX = left_motor.getMaxVelocity()
|
||||
speed_level = 0.5 # fraction of MOTOR_MAX; adjusted by +/-
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main loop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
EAR_AMPLITUDE = 0.35 # rad, peak ear deflection
|
||||
EAR_RATE = 8.0 # rad/s, how fast the ears are driven
|
||||
ear_phase = 0.0
|
||||
# {name: (x, y)} — kept across all sheep ever heard from. Sheep that drift
|
||||
# into the pen are tracked by ``penned`` so observations and Strömbom
|
||||
# agree on which ones still need herding.
|
||||
sheep_positions: dict = {}
|
||||
penned_set: set = set()
|
||||
step_count = 0
|
||||
|
||||
from herding.geometry import is_penned_position
|
||||
|
||||
while robot.step(timestep) != -1:
|
||||
speed = MOTOR_MAX * speed_level
|
||||
turn = speed * 0.6 # differential turn radius
|
||||
step_count += 1
|
||||
|
||||
left_vel = 0.0
|
||||
right_vel = 0.0
|
||||
key = keyboard.getKey()
|
||||
while key > 0:
|
||||
if key in (ord('W'), Keyboard.UP):
|
||||
left_vel = speed
|
||||
right_vel = speed
|
||||
elif key in (ord('S'), Keyboard.DOWN):
|
||||
left_vel = -speed
|
||||
right_vel = -speed
|
||||
elif key in (ord('A'), Keyboard.LEFT):
|
||||
left_vel = -turn
|
||||
right_vel = turn
|
||||
elif key in (ord('D'), Keyboard.RIGHT):
|
||||
left_vel = turn
|
||||
right_vel = -turn
|
||||
elif key in (ord('+'), ord('=')):
|
||||
speed_level = min(1.0, speed_level + 0.1)
|
||||
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
|
||||
elif key in (ord('-'), ord('_')):
|
||||
speed_level = max(0.1, speed_level - 0.1)
|
||||
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
|
||||
key = keyboard.getKey()
|
||||
|
||||
left_motor.setVelocity(left_vel)
|
||||
right_motor.setVelocity(right_vel)
|
||||
while receiver.getQueueLength() > 0:
|
||||
msg = receiver.getString()
|
||||
receiver.nextPacket()
|
||||
parts = msg.split(":")
|
||||
if len(parts) == 4 and parts[0] == "sheep":
|
||||
try:
|
||||
x, y = float(parts[2]), float(parts[3])
|
||||
except ValueError:
|
||||
continue
|
||||
sheep_positions[parts[1]] = (x, y)
|
||||
if parts[1] not in penned_set and is_penned_position(x, y):
|
||||
penned_set.add(parts[1])
|
||||
|
||||
pos = gps.getValues()
|
||||
emitter.send(f"dog:{pos[0]}:{pos[1]}")
|
||||
dog_xy = (pos[0], pos[1])
|
||||
n = compass.getValues()
|
||||
dog_heading = math.atan2(n[0], n[1])
|
||||
|
||||
# ---- Action selection ----
|
||||
if MODE == "rl" and policy_handle is not None:
|
||||
sheep_xy_list = list(sheep_positions.values())
|
||||
sheep_names = list(sheep_positions.keys())
|
||||
sheep_penned_list = [s in penned_set for s in sheep_names]
|
||||
obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
|
||||
action = policy_handle.predict(obs)
|
||||
vx, vy = float(action[0]), float(action[1])
|
||||
elif MODE == "sequential":
|
||||
vx, vy, _mode_str, _dbg = sequential_action_debug(
|
||||
dog_xy, sheep_positions, PEN_ENTRY,
|
||||
)
|
||||
else:
|
||||
# Strömbom (canonical baseline).
|
||||
vx, vy, _mode_str, _dbg = strombom_action_debug(
|
||||
dog_xy, sheep_positions, PEN_ENTRY,
|
||||
)
|
||||
|
||||
# EMA smoothing — reduces oscillation from policy or Strömbom flips.
|
||||
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
||||
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
||||
|
||||
# Safety: dog must never enter the pen.
|
||||
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
|
||||
prev_action = (vx, vy)
|
||||
|
||||
drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
|
||||
emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
|
||||
|
||||
# Cosmetic ear wiggle — purely visual.
|
||||
ear_phase += 0.12
|
||||
ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
|
||||
left_ear.setVelocity(EAR_RATE)
|
||||
right_ear.setVelocity(EAR_RATE)
|
||||
left_ear.setPosition( ear_pos)
|
||||
left_ear.setPosition(ear_pos)
|
||||
right_ear.setPosition(-ear_pos)
|
||||
|
||||
if step_count % 200 == 0:
|
||||
n_active = sum(1 for s in sheep_positions if s not in penned_set)
|
||||
print(f"[dog mode={MODE}] step={step_count} known={len(sheep_positions)} "
|
||||
f"penned={len(penned_set)} active={n_active} action=({vx:+.2f}, {vy:+.2f})")
|
||||
|
||||
Binary file not shown.
@@ -1,153 +0,0 @@
|
||||
"""
|
||||
Render Webots-side debug trajectory from debug.csv.
|
||||
|
||||
The shepherd_dog_rl controller writes per-step state to debug.csv when
|
||||
DOG_DEBUG=1. This script reads it and produces:
|
||||
|
||||
trajectory.png — dog path + sheep paths overlaid on the field
|
||||
obs_drift.png — normalized observation distribution over time
|
||||
actions.png — vx, vy time series
|
||||
|
||||
Run:
|
||||
python plot_debug.py # uses debug.csv next to this file
|
||||
python plot_debug.py --csv path/to.csv --out-dir somewhere/
|
||||
"""
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as mpatches
|
||||
import numpy as np
|
||||
|
||||
|
||||
def load_csv(path):
|
||||
rows = []
|
||||
with open(path) as f:
|
||||
rd = csv.DictReader(f)
|
||||
for r in rd:
|
||||
rows.append(r)
|
||||
if not rows:
|
||||
sys.exit(f"empty CSV: {path}")
|
||||
return rows
|
||||
|
||||
|
||||
def parse_floats(s):
|
||||
return [float(x) for x in s.split(";") if x]
|
||||
|
||||
|
||||
def plot_trajectory(rows, out_path):
|
||||
fig, ax = plt.subplots(figsize=(7, 7))
|
||||
ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
|
||||
ax.set_facecolor("#dcedc8")
|
||||
ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
|
||||
fill=False, edgecolor="#795548", lw=2))
|
||||
ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
|
||||
facecolor="#ffe082", edgecolor="#795548", lw=2))
|
||||
ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
|
||||
|
||||
dog_x = [float(r["dog_x"]) for r in rows]
|
||||
dog_y = [float(r["dog_y"]) for r in rows]
|
||||
ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
|
||||
ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
|
||||
ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
|
||||
|
||||
# Sheep — re-shape into per-sheep tracks
|
||||
sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
|
||||
sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
|
||||
if sx_all and sx_all[-1]:
|
||||
n_sheep = len(sx_all[-1])
|
||||
palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
|
||||
"#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
|
||||
for i in range(n_sheep):
|
||||
xs = [r[i] if i < len(r) else None for r in sx_all]
|
||||
ys = [r[i] if i < len(r) else None for r in sy_all]
|
||||
xs = [x for x in xs if x is not None]
|
||||
ys = [y for y in ys if y is not None]
|
||||
if xs:
|
||||
c = palette[i % len(palette)]
|
||||
ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
|
||||
ax.plot(xs[0], ys[0], "o", color=c, ms=6)
|
||||
ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
|
||||
|
||||
n_in_pen = int(rows[-1]["n_penned"])
|
||||
ax.set_title(f"Webots trajectory {len(rows)} steps penned={n_in_pen}",
|
||||
fontsize=12)
|
||||
ax.legend(loc="upper left", fontsize=7, ncol=2)
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_actions(rows, out_path):
|
||||
t = np.arange(len(rows))
|
||||
vx = np.array([float(r["vx"]) for r in rows])
|
||||
vy = np.array([float(r["vy"]) for r in rows])
|
||||
mag = np.sqrt(vx ** 2 + vy ** 2)
|
||||
|
||||
fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
|
||||
axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
|
||||
axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
|
||||
axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
|
||||
axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
|
||||
axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
|
||||
axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
|
||||
axes[2].axhline(1.0, color="gray", ls="--", lw=1)
|
||||
axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
|
||||
fig.suptitle("Webots action time series")
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_obs(rows, out_path):
|
||||
norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
|
||||
raw = np.array([parse_floats(r["raw_obs"]) for r in rows])
|
||||
if norm.size == 0:
|
||||
return
|
||||
n_dims = norm.shape[1]
|
||||
labels = [
|
||||
"dog_x", "dog_y", "com-dog_x", "com-dog_y",
|
||||
"far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
|
||||
"far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
|
||||
"pen-far1_x", "pen-far1_y", "radius", "frac_active",
|
||||
][:n_dims]
|
||||
|
||||
t = np.arange(norm.shape[0])
|
||||
fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
|
||||
if n_dims == 1: axes = [axes]
|
||||
for i in range(n_dims):
|
||||
axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
|
||||
axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
|
||||
axes[i].set_ylabel(labels[i], fontsize=8)
|
||||
axes[i].tick_params(labelsize=7)
|
||||
if i == 0:
|
||||
axes[i].legend(fontsize=7, loc="upper right")
|
||||
axes[-1].set_xlabel("step")
|
||||
fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=110)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser()
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
|
||||
p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
|
||||
args = p.parse_args()
|
||||
|
||||
rows = load_csv(args.csv)
|
||||
os.makedirs(args.out_dir, exist_ok=True)
|
||||
print(f"loaded {len(rows)} rows from {args.csv}")
|
||||
plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
|
||||
plot_actions(rows, os.path.join(args.out_dir, "actions.png"))
|
||||
plot_obs(rows, os.path.join(args.out_dir, "obs.png"))
|
||||
print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,285 +0,0 @@
|
||||
"""
|
||||
Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
|
||||
|
||||
Setup
|
||||
-----
|
||||
1. Copy your trained files into this directory:
|
||||
controllers/shepherd_dog_rl/final_model.zip
|
||||
controllers/shepherd_dog_rl/vecnorm.pkl
|
||||
|
||||
2. In field.wbt, set the ShepherdDog robot's controller field to
|
||||
"shepherd_dog_rl". You can do this in the Webots GUI:
|
||||
click the robot → Controller → shepherd_dog_rl
|
||||
|
||||
3. Optional: set controllerArgs to ["5"] (number of sheep) if it differs
|
||||
from the default of 5.
|
||||
|
||||
The controller reads GPS (dog position) and Receiver (sheep broadcasts),
|
||||
builds the same 16-dim flock observation the training env used, normalises
|
||||
it with the saved VecNormalize stats, and converts the (vx, vy) policy
|
||||
output into differential wheel speeds.
|
||||
|
||||
Debug logging
|
||||
-------------
|
||||
Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
|
||||
raw obs, normalised obs, action) to debug.csv alongside this script. Use
|
||||
plot_debug.py to render trajectories from it.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import math
|
||||
import struct
|
||||
import numpy as np
|
||||
|
||||
# ── make training code importable ───────────────────────────────────────────
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_TRAINING = os.path.join(_HERE, "..", "..", "training")
|
||||
sys.path.insert(0, _TRAINING)
|
||||
|
||||
from controller import Robot
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
|
||||
from herding_env import HerdingEnv
|
||||
|
||||
# ── constants (must match herding_env.py) ───────────────────────────────────
|
||||
FIELD = 15.0
|
||||
PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
|
||||
PEN_X = (10.0, 13.0)
|
||||
PEN_Y = (-15.0, -8.0)
|
||||
DOG_SPEED = 2.5 # m/s
|
||||
WHEEL_R = 0.038 # wheel radius (metres) — from ShepherdDog.proto
|
||||
K_TURN = 4.0 # heading-error gain (rad/s per rad)
|
||||
EAR_AMPLITUDE = 0.35
|
||||
EAR_RATE = 8.0
|
||||
|
||||
# ── model paths ─────────────────────────────────────────────────────────────
|
||||
MODEL_PATH = os.path.join(_HERE, "final_model.zip")
|
||||
VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
|
||||
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
|
||||
DEBUG_ENABLED = True # set False to disable debug.csv logging
|
||||
|
||||
# ── action smoothing ─────────────────────────────────────────────────────────
|
||||
# EMA on policy output to suppress the rapid oscillation (vx/vy flipping
|
||||
# between -1 and +1 every step) that stalls the physical dog. 0 = no
|
||||
# smoothing (raw policy), 1 = frozen. 0.3 keeps ~30% of previous action.
|
||||
ACTION_SMOOTH = 0.3
|
||||
prev_action = np.zeros(2, dtype=np.float32)
|
||||
|
||||
|
||||
def norm_angle(a: float) -> float:
|
||||
while a > math.pi: a -= 2 * math.pi
|
||||
while a < -math.pi: a += 2 * math.pi
|
||||
return a
|
||||
|
||||
|
||||
def in_pen(x: float, y: float) -> bool:
|
||||
return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
|
||||
|
||||
|
||||
def build_obs(dog_pos: np.ndarray,
|
||||
sheep_dict: dict,
|
||||
n_sheep: int,
|
||||
dog_heading: float = 0.0) -> np.ndarray:
|
||||
"""
|
||||
Build the 18-dim flock observation — identical to HerdingEnv._obs().
|
||||
|
||||
sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
|
||||
dog_heading: dog's current world-frame heading in radians.
|
||||
"""
|
||||
D = 2 * FIELD
|
||||
|
||||
# Split active vs penned
|
||||
active_pos = np.array(
|
||||
[v for v in sheep_dict.values() if not in_pen(*v)],
|
||||
dtype=np.float32
|
||||
)
|
||||
n_active = len(active_pos)
|
||||
|
||||
if n_active > 0:
|
||||
com = active_pos.mean(axis=0)
|
||||
d_from_com = np.linalg.norm(active_pos - com, axis=1)
|
||||
sorted_idx = np.argsort(d_from_com)[::-1]
|
||||
radius = float(d_from_com[sorted_idx[0]])
|
||||
def nth(n):
|
||||
return active_pos[sorted_idx[n]] if len(sorted_idx) > n else com
|
||||
far1, far2, far3 = nth(0), nth(1), nth(2)
|
||||
else:
|
||||
com = PEN_CENTER.copy()
|
||||
radius = 0.0
|
||||
far1 = far2 = far3 = PEN_CENTER.copy()
|
||||
|
||||
frac_active = n_active / max(n_sheep, 1)
|
||||
|
||||
return np.array([
|
||||
dog_pos[0] / FIELD, dog_pos[1] / FIELD,
|
||||
(com[0] - dog_pos[0]) / D, (com[1] - dog_pos[1]) / D,
|
||||
(far1[0] - com[0]) / D, (far1[1] - com[1]) / D,
|
||||
(far2[0] - com[0]) / D, (far2[1] - com[1]) / D,
|
||||
(far3[0] - com[0]) / D, (far3[1] - com[1]) / D,
|
||||
(PEN_CENTER[0] - com[0]) / D, (PEN_CENTER[1] - com[1]) / D,
|
||||
(PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
|
||||
radius / D,
|
||||
frac_active,
|
||||
math.cos(dog_heading), math.sin(dog_heading),
|
||||
], dtype=np.float32)
|
||||
|
||||
|
||||
# ── Webots setup ─────────────────────────────────────────────────────────────
|
||||
robot = Robot()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
|
||||
# Drive motors
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
right_motor = robot.getDevice("right wheel motor")
|
||||
left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
MOTOR_MAX = left_motor.getMaxVelocity()
|
||||
|
||||
# Sensors
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
|
||||
# Cosmetic
|
||||
left_ear = robot.getDevice("left ear motor")
|
||||
right_ear = robot.getDevice("right ear motor")
|
||||
left_ear.setPosition(float("inf")); right_ear.setPosition(float("inf"))
|
||||
left_ear.setVelocity(0.0); right_ear.setVelocity(0.0)
|
||||
ear_phase = 0.0
|
||||
|
||||
# Number of sheep (from controllerArgs or default)
|
||||
try:
|
||||
n_sheep = int(sys.argv[1])
|
||||
except (IndexError, ValueError):
|
||||
n_sheep = 3
|
||||
|
||||
# ── Load model ───────────────────────────────────────────────────────────────
|
||||
print(f"[RL dog] Loading model from {MODEL_PATH}")
|
||||
print(f"[RL dog] Loading vecnorm from {VECNORM_PATH}")
|
||||
|
||||
dummy_env = DummyVecEnv([lambda: HerdingEnv(n_sheep=n_sheep)])
|
||||
vecnorm = VecNormalize.load(VECNORM_PATH, dummy_env)
|
||||
vecnorm.training = False
|
||||
vecnorm.norm_reward = False
|
||||
|
||||
model = PPO.load(MODEL_PATH, device="cpu")
|
||||
print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
|
||||
|
||||
# ── Runtime state ─────────────────────────────────────────────────────────────
|
||||
sheep_positions: dict = {} # {name: (x, y)} — updated every step from receiver
|
||||
step_count = 0
|
||||
|
||||
# Debug CSV — written every step when DOG_DEBUG=1
|
||||
debug_file = None
|
||||
if DEBUG_ENABLED:
|
||||
import csv
|
||||
debug_file = open(DEBUG_CSV, "w", newline="")
|
||||
debug_writer = csv.writer(debug_file)
|
||||
debug_writer.writerow([
|
||||
"step", "dog_x", "dog_y", "heading",
|
||||
"sheep_xs", "sheep_ys", "n_active", "n_penned",
|
||||
"raw_obs", "norm_obs", "vx", "vy",
|
||||
])
|
||||
print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
|
||||
|
||||
|
||||
def bearing() -> float:
|
||||
"""Current robot heading in world frame (radians)."""
|
||||
n = compass.getValues()
|
||||
return math.atan2(n[0], n[1])
|
||||
|
||||
|
||||
def drive(action_vx: float, action_vy: float) -> None:
|
||||
"""Convert (vx, vy) policy action to differential wheel speeds."""
|
||||
speed_ms = math.sqrt(action_vx ** 2 + action_vy ** 2) * DOG_SPEED
|
||||
if speed_ms < 0.05:
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
return
|
||||
|
||||
target_heading = math.atan2(action_vy, action_vx)
|
||||
err = norm_angle(target_heading - bearing())
|
||||
|
||||
fwd_ms = speed_ms * max(0.0, math.cos(err))
|
||||
fwd_rad = fwd_ms / WHEEL_R
|
||||
turn = K_TURN * err # rad/s correction
|
||||
|
||||
l = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad - turn))
|
||||
r = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad + turn))
|
||||
left_motor.setVelocity(l)
|
||||
right_motor.setVelocity(r)
|
||||
|
||||
|
||||
# ── Main loop ─────────────────────────────────────────────────────────────────
|
||||
while robot.step(timestep) != -1:
|
||||
step_count += 1
|
||||
|
||||
# 1. Drain receiver — update sheep position table
|
||||
while receiver.getQueueLength() > 0:
|
||||
try:
|
||||
msg = receiver.getString()
|
||||
parts = msg.split(":")
|
||||
if parts[0] == "sheep" and len(parts) == 4:
|
||||
sheep_positions[parts[1]] = (float(parts[2]), float(parts[3]))
|
||||
except Exception:
|
||||
pass
|
||||
receiver.nextPacket()
|
||||
|
||||
# 2. Dog GPS
|
||||
gps_vals = gps.getValues()
|
||||
dog_pos = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)
|
||||
|
||||
# 3. Build and normalise observation (heading from compass)
|
||||
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep,
|
||||
dog_heading=bearing())
|
||||
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
|
||||
|
||||
# 4. Policy inference + smoothing
|
||||
action, _ = model.predict(obs_norm, deterministic=True)
|
||||
raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
|
||||
if ACTION_SMOOTH > 0:
|
||||
smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
|
||||
prev_action[:] = smoothed
|
||||
vx, vy = float(smoothed[0]), float(smoothed[1])
|
||||
else:
|
||||
vx, vy = float(raw_a[0]), float(raw_a[1])
|
||||
|
||||
# 5. Drive
|
||||
drive(vx, vy)
|
||||
|
||||
# 6. Broadcast dog position so sheep can compute flee forces
|
||||
emitter.send(f"dog:{dog_pos[0]:.4f}:{dog_pos[1]:.4f}")
|
||||
|
||||
# 7. Ear animation
|
||||
ear_phase += 0.12
|
||||
ep = EAR_AMPLITUDE * math.sin(ear_phase)
|
||||
left_ear.setVelocity(EAR_RATE); right_ear.setVelocity(EAR_RATE)
|
||||
left_ear.setPosition( ep); right_ear.setPosition(-ep)
|
||||
|
||||
# Periodic status
|
||||
if step_count % 100 == 0:
|
||||
n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
|
||||
print(f"[RL dog] step={step_count} known_sheep={len(sheep_positions)}"
|
||||
f" penned={n_in_pen}/{n_sheep} dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
|
||||
f" action=({vx:.2f}, {vy:.2f})")
|
||||
|
||||
# Debug CSV row
|
||||
if debug_file is not None:
|
||||
n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
|
||||
n_in_pen = len(sheep_positions) - n_active
|
||||
debug_writer.writerow([
|
||||
step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
|
||||
f"{bearing():.4f}",
|
||||
";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
|
||||
";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
|
||||
n_active, n_in_pen,
|
||||
";".join(f"{x:.4f}" for x in raw_obs),
|
||||
";".join(f"{x:.4f}" for x in obs_norm[0]),
|
||||
f"{vx:.4f}", f"{vy:.4f}",
|
||||
])
|
||||
if step_count % 200 == 0:
|
||||
debug_file.flush()
|
||||
Binary file not shown.
Reference in New Issue
Block a user