Checkpoint 2
This commit is contained in:
+16
-12
@@ -4,18 +4,22 @@
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
|
||||
# Training
|
||||
training/**/events.out.tfevents.*
|
||||
training/**/checkpoints/
|
||||
training/runs/**
|
||||
!training/runs/.gitkeep
|
||||
|
||||
# Controller runtime artefacts
|
||||
controllers/shepherd_dog_rl/debug*.csv
|
||||
controllers/shepherd_dog_rl/debug_out*/
|
||||
controllers/shepherd_dog_rl/final_model*.zip
|
||||
controllers/shepherd_dog_rl/vecnorm*.pkl
|
||||
*.pyc
|
||||
.venv/
|
||||
|
||||
# Optional env parity debug
|
||||
dog_debug.csv
|
||||
|
||||
# Webots controller scratch
|
||||
controllers/shepherd_dog/dog_behavior_log.csv
|
||||
|
||||
# Training artefacts
|
||||
training/runs/*
|
||||
!training/runs/.gitkeep
|
||||
*.zip
|
||||
*.pkl
|
||||
|
||||
# TensorBoard
|
||||
events.out.tfevents.*
|
||||
worlds/field_test.wbt
|
||||
herding_runtime.cfg
|
||||
|
||||
+71
-156
@@ -1,45 +1,36 @@
|
||||
"""
|
||||
Sheep flocking controller (Webots, Reynolds boids variant).
|
||||
"""Sheep flocking controller (Webots).
|
||||
|
||||
Each sheep broadcasts its GPS position every 3 steps on channel 1 and
|
||||
listens for the dog and peer sheep positions. Peers are keyed by robot
|
||||
name so each neighbour has exactly one current entry in the dict.
|
||||
listens for the dog and peer sheep positions. The behavioural step is
|
||||
delegated to ``herding.flocking_sim.compute_heading_speed`` so the
|
||||
training environment and Webots run identical sheep dynamics.
|
||||
|
||||
Force stack each step (summed then converted to a heading + speed):
|
||||
flee — away from dog, quadratic ramp, dominant when close
|
||||
cohesion — toward flock centre, halved while fleeing
|
||||
separation — inverse-distance push, prevents physical overlap
|
||||
walls — linear repulsion from field boundary
|
||||
wander — small persistent drift for natural idle motion
|
||||
|
||||
Pen behaviour: on first entry into the quarantine pen the sheep latches
|
||||
permanently — it turns pink (via the exposed woolColor PROTO field) and
|
||||
the normal force stack is replaced by pen-confinement forces only.
|
||||
Pen behaviour: a sheep latches to ``penned`` the first time it crosses
|
||||
the south-wall gate plane into the gate corridor. Once latched it turns
|
||||
pink (via the exposed ``woolColor`` PROTO field) and the force stack
|
||||
switches to in-pen containment.
|
||||
"""
|
||||
|
||||
import random
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
# --- Make the shared herding/ package importable from this controller dir ---
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
from controller import Supervisor
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tuning constants
|
||||
# ---------------------------------------------------------------------------
|
||||
from herding.diffdrive import heading_speed_to_wheels
|
||||
from herding.flocking_sim import MAX_SPEED, compute_heading_speed
|
||||
from herding.geometry import (
|
||||
SHEEP_MAX_WHEEL_OMEGA,
|
||||
is_penned_position,
|
||||
)
|
||||
|
||||
MAX_SPEED = 22.0 # rad/s hard clamp on both motors
|
||||
FLEE_SPEED = 20.0 # rad/s upper bound while panicking
|
||||
WANDER_SPEED = 3.0 # rad/s lower bound during calm wandering
|
||||
|
||||
X_MIN, X_MAX = -14.5, 14.5 # stone wall inner edges (metres)
|
||||
Y_MIN, Y_MAX = -14.5, 14.5
|
||||
WALL_MARGIN = 3.5 # avoidance starts this far from the wall
|
||||
|
||||
FLEE_DIST = 7.0 # dog within this radius triggers flee (metres)
|
||||
SEPARATION_DIST = 2.5 # inverse-distance push active inside this radius
|
||||
COHESION_DIST = 8.0 # pull toward flock centre active inside this radius
|
||||
|
||||
PEN_X_MIN, PEN_X_MAX = 10.0, 13.0 # quarantine pen extents (metres)
|
||||
PEN_Y_MIN, PEN_Y_MAX = -15.0, -8.0 # open entrance at y=-8, gate at y=-15
|
||||
PEN_MARGIN = 0.8 # confinement force starts this far from pen wall
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Device setup
|
||||
@@ -56,178 +47,102 @@ left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
MOTOR_MAX = min(left_motor.getMaxVelocity(), SHEEP_MAX_WHEEL_OMEGA)
|
||||
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def norm_angle(a):
|
||||
return math.atan2(math.sin(a), math.cos(a))
|
||||
|
||||
|
||||
def bearing():
|
||||
# Compass returns north direction in sensor frame; for this Z-up world
|
||||
# with north = +Y, atan2(n[0], n[1]) gives the standard math angle
|
||||
# (0 = east, π/2 = north) matching atan2(fy, fx) used for heading.
|
||||
# (0 = east, π/2 = north) matching atan2(fy, fx) used for headings.
|
||||
n = compass.getValues()
|
||||
return math.atan2(n[0], n[1])
|
||||
|
||||
|
||||
def drive(heading, speed):
|
||||
err = norm_angle(heading - bearing())
|
||||
# Scale forward component by cos(err): at 90° error fwd→0 so the robot
|
||||
# spins in place to realign rather than driving sideways at full speed.
|
||||
fwd = speed * max(0.0, math.cos(err))
|
||||
k = 4.0
|
||||
left_motor.setVelocity( max(-MAX_SPEED, min(MAX_SPEED, fwd - k * err)))
|
||||
right_motor.setVelocity(max(-MAX_SPEED, min(MAX_SPEED, fwd + k * err)))
|
||||
def drive(heading, speed_motor):
|
||||
left_w, right_w = heading_speed_to_wheels(
|
||||
heading, min(speed_motor, MAX_SPEED), bearing(), MOTOR_MAX, k_turn=4.0
|
||||
)
|
||||
left_motor.setVelocity(left_w)
|
||||
right_motor.setVelocity(right_w)
|
||||
|
||||
|
||||
def paint_pink():
|
||||
# woolColor is declared as a PROTO field with IS binding to the DEF WOOL
|
||||
# PBRAppearance baseColor. Changing it here propagates to every USE WOOL
|
||||
# shape on the body. Direct field access avoids PROTO-internal opacity.
|
||||
# PBRAppearance baseColor; setting it propagates to every USE WOOL shape.
|
||||
self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
wander_angle = random.uniform(-math.pi, math.pi)
|
||||
step = 0
|
||||
dog_x = None
|
||||
dog_y = None
|
||||
step_count = 0
|
||||
dog_x, dog_y = None, None
|
||||
peers = {} # name → (x, y), one entry per neighbour, cleared every 30 steps
|
||||
penned = False
|
||||
|
||||
# Stuck detection: differential-drive sheep can pin against a wall and need
|
||||
# a forced reverse-and-rotate to escape. If displacement < STUCK_DIST for
|
||||
# STUCK_STEPS consecutive steps, drive toward field centre.
|
||||
_prev_x, _prev_y = None, None
|
||||
_stuck_count = 0
|
||||
STUCK_STEPS = 20
|
||||
STUCK_DIST = 0.05
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main loop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
while robot.step(timestep) != -1:
|
||||
step += 1
|
||||
step_count += 1
|
||||
pos = gps.getValues()
|
||||
x, y = pos[0], pos[1]
|
||||
|
||||
# Pen entry: one-way latch, never unset
|
||||
if not penned and PEN_X_MIN < x < PEN_X_MAX and PEN_Y_MIN < y < PEN_Y_MAX:
|
||||
# Pen entry: one-way latch. Penned sheep get pink wool and switch behaviour.
|
||||
if not penned and is_penned_position(x, y):
|
||||
penned = True
|
||||
paint_pink()
|
||||
|
||||
# Refresh peer table (clear before receiving so fresh data is never lost)
|
||||
if step % 30 == 0:
|
||||
# Refresh peer table — clear before receiving so fresh data is never lost.
|
||||
if step_count % 30 == 0:
|
||||
peers.clear()
|
||||
while receiver.getQueueLength() > 0:
|
||||
msg = receiver.getString()
|
||||
receiver.nextPacket()
|
||||
p = msg.split(":")
|
||||
if p[0] == "dog" and len(p) >= 3:
|
||||
dog_x, dog_y = float(p[1]), float(p[2])
|
||||
elif p[0] == "sheep" and len(p) >= 4 and p[1] != name:
|
||||
peers[p[1]] = (float(p[2]), float(p[3]))
|
||||
parts = msg.split(":")
|
||||
if parts[0] == "dog" and len(parts) >= 3:
|
||||
dog_x, dog_y = float(parts[1]), float(parts[2])
|
||||
elif parts[0] == "sheep" and len(parts) >= 4 and parts[1] != name:
|
||||
peers[parts[1]] = (float(parts[2]), float(parts[3]))
|
||||
|
||||
fx, fy = 0.0, 0.0
|
||||
dog_xy = (dog_x, dog_y) if dog_x is not None and dog_y is not None else None
|
||||
heading, speed, wander_angle = compute_heading_speed(
|
||||
x=x, y=y, penned=penned, dog_xy=dog_xy, peers=peers,
|
||||
wander_angle=wander_angle,
|
||||
)
|
||||
|
||||
# Repel unpenned sheep from the exterior of the pen's side walls so they
|
||||
# don't get pinned by flee forces. Only fires when strictly outside the pen
|
||||
# (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range).
|
||||
# Entrance is open on the north (y > PEN_Y_MAX) — no force there.
|
||||
PEN_EXT_MARGIN = 0.8
|
||||
if not penned and PEN_Y_MIN < y < PEN_Y_MAX:
|
||||
if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN:
|
||||
fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0
|
||||
if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN:
|
||||
fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0
|
||||
# Stuck detection — safety net for differential-drive wall pinning.
|
||||
if _prev_x is not None:
|
||||
moved = math.hypot(x - _prev_x, y - _prev_y)
|
||||
_stuck_count = _stuck_count + 1 if moved < STUCK_DIST else 0
|
||||
if _stuck_count >= STUCK_STEPS:
|
||||
heading = math.atan2(-y, -x) # always points away from the boundary
|
||||
speed = MAX_SPEED
|
||||
_stuck_count = 0
|
||||
_prev_x, _prev_y = x, y
|
||||
|
||||
if penned:
|
||||
# Inside pen: wander freely, strong boundary forces prevent exit,
|
||||
# separation still active to avoid collisions with other penned sheep.
|
||||
|
||||
pm = PEN_MARGIN
|
||||
if x < PEN_X_MIN + pm: fx += ((PEN_X_MIN + pm - x) / pm) * 15.0
|
||||
if x > PEN_X_MAX - pm: fx -= ((x - (PEN_X_MAX - pm)) / pm) * 15.0
|
||||
if y < PEN_Y_MIN + pm: fy += ((PEN_Y_MIN + pm - y) / pm) * 15.0
|
||||
if y > PEN_Y_MAX - pm: fy -= ((y - (PEN_Y_MAX - pm)) / pm) * 15.0
|
||||
|
||||
for px, py in peers.values():
|
||||
dx, dy = px - x, py - y
|
||||
d = math.hypot(dx, dy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (dx / d) * push * 2.5
|
||||
fy -= (dy / d) * push * 2.5
|
||||
|
||||
if random.random() < 0.02:
|
||||
wander_angle += random.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
else:
|
||||
fleeing = False
|
||||
|
||||
# Flee — quadratic ramp so force grows rapidly as the dog closes in
|
||||
if dog_x is not None:
|
||||
dx = dog_x - x
|
||||
dy = dog_y - y
|
||||
dist = math.hypot(dx, dy)
|
||||
if 0.01 < dist < FLEE_DIST:
|
||||
fleeing = True
|
||||
t = 1.0 - dist / FLEE_DIST
|
||||
s = t * t * 20.0
|
||||
fx -= (dx / dist) * s
|
||||
fy -= (dy / dist) * s
|
||||
|
||||
# Cohesion — halved while fleeing to reduce mid-panic collisions
|
||||
cx, cy, cn = 0.0, 0.0, 0
|
||||
for px, py in peers.values():
|
||||
d = math.hypot(px - x, py - y)
|
||||
if 0.3 < d < COHESION_DIST:
|
||||
cx += px; cy += py; cn += 1
|
||||
if cn > 0:
|
||||
w = 0.08 if fleeing else 0.15
|
||||
fx += (cx / cn - x) * w
|
||||
fy += (cy / cn - y) * w
|
||||
|
||||
# Separation — inverse-distance: huge when nearly overlapping, fades quickly
|
||||
for px, py in peers.values():
|
||||
dx, dy = px - x, py - y
|
||||
d = math.hypot(dx, dy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (dx / d) * push * 2.5
|
||||
fy -= (dy / d) * push * 2.5
|
||||
|
||||
# Walls
|
||||
if x < X_MIN + WALL_MARGIN: fx += ((X_MIN + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
|
||||
if x > X_MAX - WALL_MARGIN: fx -= ((x - (X_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
if y < Y_MIN + WALL_MARGIN: fy += ((Y_MIN + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
|
||||
if y > Y_MAX - WALL_MARGIN: fy -= ((y - (Y_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
|
||||
# Wander — suppressed while fleeing so drift cannot deflect the flee heading
|
||||
if not fleeing:
|
||||
if random.random() < 0.02:
|
||||
wander_angle += random.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
# Hard-stop clamp: within 0.5 m of a wall, zero any force component that
|
||||
# would push further into it. Prevents the flee force from pinning a sheep
|
||||
# against the boundary when the dog approaches from outside.
|
||||
HS = 0.5
|
||||
if x < X_MIN + HS and fx < 0: fx = 0.0
|
||||
if x > X_MAX - HS and fx > 0: fx = 0.0
|
||||
if y < Y_MIN + HS and fy < 0: fy = 0.0
|
||||
if y > Y_MAX - HS and fy > 0: fy = 0.0
|
||||
|
||||
heading = math.atan2(fy, fx)
|
||||
mag = math.hypot(fx, fy)
|
||||
speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
|
||||
drive(heading, speed)
|
||||
|
||||
if step % 3 == 0:
|
||||
if step_count % 3 == 0:
|
||||
emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
"""Lazy loader for the SB3 PPO policy used by the dog controller.
|
||||
|
||||
Importing stable-baselines3 inside the Webots Python interpreter is only
|
||||
needed when ``HERDING_MODE=rl``; the Strömbom mode runs without it. This
|
||||
loader keeps SB3 out of the import path until you actually ask for the RL
|
||||
policy, so users without SB3 installed can still run the Strömbom
|
||||
baseline.
|
||||
|
||||
The policy + VecNormalize statistics are saved together by
|
||||
``training/train_ppo.py``:
|
||||
|
||||
runs/<name>/best/best_model.zip # SB3 PPO checkpoint
|
||||
runs/<name>/best/vecnormalize.pkl # observation-normaliser stats
|
||||
|
||||
Pass either the directory or the explicit zip path.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class PolicyHandle:
|
||||
"""Wrap a loaded PPO policy + VecNormalize so the controller can call
|
||||
``predict(obs)`` without thinking about either."""
|
||||
|
||||
def __init__(self, model, vecnorm):
|
||||
self.model = model
|
||||
self.vecnorm = vecnorm
|
||||
|
||||
def predict(self, obs):
|
||||
# VecNormalize expects a batched obs of shape (n_envs, obs_dim).
|
||||
if self.vecnorm is not None:
|
||||
import numpy as np
|
||||
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
|
||||
obs_b = self.vecnorm.normalize_obs(obs_b)
|
||||
else:
|
||||
import numpy as np
|
||||
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
|
||||
action, _ = self.model.predict(obs_b, deterministic=True)
|
||||
return action[0]
|
||||
|
||||
|
||||
def load(model_path: str, vecnorm_path: str | None = None) -> PolicyHandle:
|
||||
"""Load a PPO model (and optional VecNormalize) from disk.
|
||||
|
||||
``model_path`` may be the .zip checkpoint or a directory containing
|
||||
``best_model.zip`` (and optionally ``vecnormalize.pkl``).
|
||||
"""
|
||||
p = Path(model_path)
|
||||
if p.is_dir():
|
||||
zip_candidates = [p / "best_model.zip", p / "final.zip", p / "policy.zip"]
|
||||
zip_path = next((z for z in zip_candidates if z.exists()), None)
|
||||
if zip_path is None:
|
||||
raise FileNotFoundError(
|
||||
f"No PPO zip found in {p} (looked for best_model.zip, final.zip, policy.zip)"
|
||||
)
|
||||
if vecnorm_path is None:
|
||||
vn = p / "vecnormalize.pkl"
|
||||
if vn.exists():
|
||||
vecnorm_path = str(vn)
|
||||
else:
|
||||
zip_path = p
|
||||
|
||||
# Imports deferred so the Strömbom path doesn't require SB3.
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import VecNormalize
|
||||
|
||||
model = PPO.load(str(zip_path), device="auto")
|
||||
vecnorm = None
|
||||
if vecnorm_path and os.path.exists(vecnorm_path):
|
||||
# VecNormalize.load needs a venv to attach to; we only need its stats
|
||||
# at inference, so we reconstruct the wrapper manually.
|
||||
import pickle
|
||||
with open(vecnorm_path, "rb") as f:
|
||||
vecnorm = pickle.load(f)
|
||||
vecnorm.training = False
|
||||
vecnorm.norm_reward = False
|
||||
return PolicyHandle(model=model, vecnorm=vecnorm)
|
||||
@@ -1,14 +1,182 @@
|
||||
"""
|
||||
Shepherd Dog controller (Webots, manual keyboard control).
|
||||
"""Shepherd Dog controller (Webots).
|
||||
|
||||
WASD / arrow keys drive the robot. +/- adjust speed in 10 % increments.
|
||||
GPS position is broadcast every step on channel 1 so sheep controllers
|
||||
can compute flee forces. Ears wag continuously via sinusoidal position
|
||||
targets — purely cosmetic.
|
||||
Runs in one of two modes selected by the ``HERDING_MODE`` environment
|
||||
variable:
|
||||
|
||||
HERDING_MODE=rl → load an SB3 PPO policy from
|
||||
HERDING_POLICY_DIR (default
|
||||
training/runs/latest/best) and use its
|
||||
(vx, vy) action each step.
|
||||
HERDING_MODE=strombom → use the analytic Strömbom collect/drive
|
||||
heuristic. This is the fallback if the RL
|
||||
policy can't be loaded (e.g. SB3 not
|
||||
installed in the Webots Python env, or no
|
||||
checkpoint yet).
|
||||
|
||||
Both modes share the same low-level differential-drive controller
|
||||
(``herding.diffdrive.velocity_to_wheels`` + clamped forward speed), so
|
||||
switching modes does not retune the actuation layer.
|
||||
|
||||
A safety supervisor enforces the "dog stays out of the pen" invariant:
|
||||
if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
|
||||
overridden with a north-driving correction. This is a hard guarantee
|
||||
the policy cannot escape.
|
||||
"""
|
||||
|
||||
import math
|
||||
from controller import Robot, Keyboard
|
||||
import os
|
||||
import sys
|
||||
|
||||
# --- Make the shared herding/ package importable from this controller dir ---
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
from controller import Robot
|
||||
|
||||
from herding.diffdrive import velocity_to_wheels
|
||||
from herding.geometry import (
|
||||
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
|
||||
DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
|
||||
PEN_ENTRY,
|
||||
)
|
||||
from herding.obs import build_obs
|
||||
from herding.sequential import compute_action_debug as sequential_action_debug
|
||||
from herding.strombom import compute_action_debug as strombom_action_debug
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mode selection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_runtime_config():
|
||||
"""Read mode + policy_dir overrides from a runtime config file.
|
||||
|
||||
Webots strips HERDING_* env vars in some configurations, so the
|
||||
launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
|
||||
in the project root and the controller reads it here. Env vars
|
||||
win if both are present; the file is the fallback.
|
||||
"""
|
||||
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
|
||||
if not os.path.exists(cfg_path):
|
||||
return {}
|
||||
out = {}
|
||||
try:
|
||||
with open(cfg_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
out[k.strip().upper()] = v.strip()
|
||||
except OSError:
|
||||
return {}
|
||||
return out
|
||||
|
||||
|
||||
_runtime_cfg = _load_runtime_config()
|
||||
MODE = (os.environ.get("HERDING_MODE")
|
||||
or _runtime_cfg.get("HERDING_MODE")
|
||||
or "rl").lower()
|
||||
|
||||
|
||||
def _resolve_policy_dir() -> str:
|
||||
"""Where to look for the trained policy.
|
||||
|
||||
Priority:
|
||||
1. HERDING_POLICY_DIR env var (if set and points to a real dir)
|
||||
2. training/runs/bc_pretrained/ (BC-only checkpoint)
|
||||
3. training/runs/bc_ppo/best/ (PPO fine-tuned best)
|
||||
4. training/runs/latest/best/ (legacy default)
|
||||
"""
|
||||
env_dir = (os.environ.get("HERDING_POLICY_DIR")
|
||||
or _runtime_cfg.get("HERDING_POLICY_DIR"))
|
||||
if env_dir and os.path.isdir(env_dir):
|
||||
return env_dir
|
||||
candidates = [
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_pretrained"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_ppo", "best"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "latest", "best"),
|
||||
]
|
||||
for c in candidates:
|
||||
if os.path.isdir(c):
|
||||
return c
|
||||
# Last resort — return env var anyway so error message is informative.
|
||||
return env_dir or candidates[0]
|
||||
|
||||
|
||||
POLICY_DIR = _resolve_policy_dir()
|
||||
|
||||
policy_handle = None
|
||||
if MODE == "rl":
|
||||
print(f"[dog] HERDING_MODE={MODE} HERDING_POLICY_DIR(env)="
|
||||
f"{os.environ.get('HERDING_POLICY_DIR', '<unset>')}")
|
||||
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists="
|
||||
f"{os.path.isdir(POLICY_DIR)}")
|
||||
if os.path.isdir(POLICY_DIR):
|
||||
try:
|
||||
entries = sorted(os.listdir(POLICY_DIR))
|
||||
except OSError:
|
||||
entries = []
|
||||
print(f"[dog] dir contents: {entries}")
|
||||
try:
|
||||
from policy_loader import load as _load_policy
|
||||
policy_handle = _load_policy(POLICY_DIR)
|
||||
print(f"[dog] RL policy loaded from {POLICY_DIR}")
|
||||
except Exception as exc:
|
||||
print(f"[dog] RL policy load failed ({exc!r}); falling back to Strömbom.")
|
||||
MODE = "strombom"
|
||||
if MODE not in ("rl", "strombom", "sequential"):
|
||||
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
||||
MODE = "strombom"
|
||||
print(f"[dog] running in mode={MODE}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Action smoothing + safety supervisor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ACTION_SMOOTH = 0.35
|
||||
prev_action = (0.0, 0.0)
|
||||
|
||||
|
||||
def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
|
||||
"""If the dog is near the south barrier and the action would push it
|
||||
further south, override with a northward action. Hard invariant: the
|
||||
dog never enters the pen."""
|
||||
if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
|
||||
return (0.0, 1.0)
|
||||
if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
|
||||
return (vx * 0.5, max(0.0, vy + 0.5))
|
||||
return (vx, vy)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Driving
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
|
||||
if math.hypot(vx, vy) < 1e-3:
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
return
|
||||
n = compass.getValues()
|
||||
h = math.atan2(n[0], n[1])
|
||||
left, right = velocity_to_wheels(
|
||||
vx, vy, h,
|
||||
max_linear=DOG_MAX_LINEAR,
|
||||
wheel_radius=DOG_WHEEL_RADIUS,
|
||||
max_wheel_omega=motor_max,
|
||||
k_turn=4.0,
|
||||
)
|
||||
left_motor.setVelocity(left)
|
||||
right_motor.setVelocity(right)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Webots devices
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
robot = Robot()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
@@ -19,70 +187,97 @@ left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
|
||||
lidar = robot.getDevice("lidar")
|
||||
lidar.enable(timestep)
|
||||
lidar.enablePointCloud()
|
||||
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
||||
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
|
||||
# Cosmetic ear motors — ignored by control logic but keep them animated.
|
||||
left_ear = robot.getDevice("left ear motor")
|
||||
right_ear = robot.getDevice("right ear motor")
|
||||
left_ear.setPosition(float("inf"))
|
||||
right_ear.setPosition(float("inf"))
|
||||
left_ear.setVelocity(0.0)
|
||||
right_ear.setVelocity(0.0)
|
||||
|
||||
keyboard = robot.getKeyboard()
|
||||
keyboard.enable(timestep)
|
||||
|
||||
MOTOR_MAX = left_motor.getMaxVelocity()
|
||||
speed_level = 0.5 # fraction of MOTOR_MAX; adjusted by +/-
|
||||
|
||||
EAR_AMPLITUDE = 0.35 # rad, peak ear deflection
|
||||
EAR_RATE = 8.0 # rad/s, how fast the ears are driven
|
||||
ear_phase = 0.0
|
||||
EAR_AMPLITUDE = 0.35
|
||||
EAR_RATE = 8.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main loop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# {name: (x, y)} — kept across all sheep ever heard from. Sheep that drift
|
||||
# into the pen are tracked by ``penned`` so observations and Strömbom
|
||||
# agree on which ones still need herding.
|
||||
sheep_positions: dict = {}
|
||||
penned_set: set = set()
|
||||
step_count = 0
|
||||
|
||||
from herding.geometry import is_penned_position
|
||||
|
||||
while robot.step(timestep) != -1:
|
||||
speed = MOTOR_MAX * speed_level
|
||||
turn = speed * 0.6 # differential turn radius
|
||||
step_count += 1
|
||||
|
||||
left_vel = 0.0
|
||||
right_vel = 0.0
|
||||
key = keyboard.getKey()
|
||||
while key > 0:
|
||||
if key in (ord('W'), Keyboard.UP):
|
||||
left_vel = speed
|
||||
right_vel = speed
|
||||
elif key in (ord('S'), Keyboard.DOWN):
|
||||
left_vel = -speed
|
||||
right_vel = -speed
|
||||
elif key in (ord('A'), Keyboard.LEFT):
|
||||
left_vel = -turn
|
||||
right_vel = turn
|
||||
elif key in (ord('D'), Keyboard.RIGHT):
|
||||
left_vel = turn
|
||||
right_vel = -turn
|
||||
elif key in (ord('+'), ord('=')):
|
||||
speed_level = min(1.0, speed_level + 0.1)
|
||||
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
|
||||
elif key in (ord('-'), ord('_')):
|
||||
speed_level = max(0.1, speed_level - 0.1)
|
||||
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
|
||||
key = keyboard.getKey()
|
||||
|
||||
left_motor.setVelocity(left_vel)
|
||||
right_motor.setVelocity(right_vel)
|
||||
while receiver.getQueueLength() > 0:
|
||||
msg = receiver.getString()
|
||||
receiver.nextPacket()
|
||||
parts = msg.split(":")
|
||||
if len(parts) == 4 and parts[0] == "sheep":
|
||||
try:
|
||||
x, y = float(parts[2]), float(parts[3])
|
||||
except ValueError:
|
||||
continue
|
||||
sheep_positions[parts[1]] = (x, y)
|
||||
if parts[1] not in penned_set and is_penned_position(x, y):
|
||||
penned_set.add(parts[1])
|
||||
|
||||
pos = gps.getValues()
|
||||
emitter.send(f"dog:{pos[0]}:{pos[1]}")
|
||||
dog_xy = (pos[0], pos[1])
|
||||
n = compass.getValues()
|
||||
dog_heading = math.atan2(n[0], n[1])
|
||||
|
||||
# ---- Action selection ----
|
||||
if MODE == "rl" and policy_handle is not None:
|
||||
sheep_xy_list = list(sheep_positions.values())
|
||||
sheep_names = list(sheep_positions.keys())
|
||||
sheep_penned_list = [s in penned_set for s in sheep_names]
|
||||
obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
|
||||
action = policy_handle.predict(obs)
|
||||
vx, vy = float(action[0]), float(action[1])
|
||||
elif MODE == "sequential":
|
||||
vx, vy, _mode_str, _dbg = sequential_action_debug(
|
||||
dog_xy, sheep_positions, PEN_ENTRY,
|
||||
)
|
||||
else:
|
||||
# Strömbom (canonical baseline).
|
||||
vx, vy, _mode_str, _dbg = strombom_action_debug(
|
||||
dog_xy, sheep_positions, PEN_ENTRY,
|
||||
)
|
||||
|
||||
# EMA smoothing — reduces oscillation from policy or Strömbom flips.
|
||||
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
||||
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
||||
|
||||
# Safety: dog must never enter the pen.
|
||||
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
|
||||
prev_action = (vx, vy)
|
||||
|
||||
drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
|
||||
emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
|
||||
|
||||
# Cosmetic ear wiggle — purely visual.
|
||||
ear_phase += 0.12
|
||||
ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
|
||||
left_ear.setVelocity(EAR_RATE)
|
||||
right_ear.setVelocity(EAR_RATE)
|
||||
left_ear.setPosition( ear_pos)
|
||||
left_ear.setPosition(ear_pos)
|
||||
right_ear.setPosition(-ear_pos)
|
||||
|
||||
if step_count % 200 == 0:
|
||||
n_active = sum(1 for s in sheep_positions if s not in penned_set)
|
||||
print(f"[dog mode={MODE}] step={step_count} known={len(sheep_positions)} "
|
||||
f"penned={len(penned_set)} active={n_active} action=({vx:+.2f}, {vy:+.2f})")
|
||||
|
||||
Binary file not shown.
@@ -1,153 +0,0 @@
|
||||
"""
|
||||
Render Webots-side debug trajectory from debug.csv.
|
||||
|
||||
The shepherd_dog_rl controller writes per-step state to debug.csv when
|
||||
DOG_DEBUG=1. This script reads it and produces:
|
||||
|
||||
trajectory.png — dog path + sheep paths overlaid on the field
|
||||
obs_drift.png — normalized observation distribution over time
|
||||
actions.png — vx, vy time series
|
||||
|
||||
Run:
|
||||
python plot_debug.py # uses debug.csv next to this file
|
||||
python plot_debug.py --csv path/to.csv --out-dir somewhere/
|
||||
"""
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import sys
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as mpatches
|
||||
import numpy as np
|
||||
|
||||
|
||||
def load_csv(path):
|
||||
rows = []
|
||||
with open(path) as f:
|
||||
rd = csv.DictReader(f)
|
||||
for r in rd:
|
||||
rows.append(r)
|
||||
if not rows:
|
||||
sys.exit(f"empty CSV: {path}")
|
||||
return rows
|
||||
|
||||
|
||||
def parse_floats(s):
|
||||
return [float(x) for x in s.split(";") if x]
|
||||
|
||||
|
||||
def plot_trajectory(rows, out_path):
|
||||
fig, ax = plt.subplots(figsize=(7, 7))
|
||||
ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
|
||||
ax.set_facecolor("#dcedc8")
|
||||
ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
|
||||
fill=False, edgecolor="#795548", lw=2))
|
||||
ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
|
||||
facecolor="#ffe082", edgecolor="#795548", lw=2))
|
||||
ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
|
||||
|
||||
dog_x = [float(r["dog_x"]) for r in rows]
|
||||
dog_y = [float(r["dog_y"]) for r in rows]
|
||||
ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
|
||||
ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
|
||||
ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
|
||||
|
||||
# Sheep — re-shape into per-sheep tracks
|
||||
sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
|
||||
sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
|
||||
if sx_all and sx_all[-1]:
|
||||
n_sheep = len(sx_all[-1])
|
||||
palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
|
||||
"#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
|
||||
for i in range(n_sheep):
|
||||
xs = [r[i] if i < len(r) else None for r in sx_all]
|
||||
ys = [r[i] if i < len(r) else None for r in sy_all]
|
||||
xs = [x for x in xs if x is not None]
|
||||
ys = [y for y in ys if y is not None]
|
||||
if xs:
|
||||
c = palette[i % len(palette)]
|
||||
ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
|
||||
ax.plot(xs[0], ys[0], "o", color=c, ms=6)
|
||||
ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
|
||||
|
||||
n_in_pen = int(rows[-1]["n_penned"])
|
||||
ax.set_title(f"Webots trajectory {len(rows)} steps penned={n_in_pen}",
|
||||
fontsize=12)
|
||||
ax.legend(loc="upper left", fontsize=7, ncol=2)
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_actions(rows, out_path):
|
||||
t = np.arange(len(rows))
|
||||
vx = np.array([float(r["vx"]) for r in rows])
|
||||
vy = np.array([float(r["vy"]) for r in rows])
|
||||
mag = np.sqrt(vx ** 2 + vy ** 2)
|
||||
|
||||
fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
|
||||
axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
|
||||
axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
|
||||
axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
|
||||
axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
|
||||
axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
|
||||
axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
|
||||
axes[2].axhline(1.0, color="gray", ls="--", lw=1)
|
||||
axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
|
||||
fig.suptitle("Webots action time series")
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_obs(rows, out_path):
|
||||
norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
|
||||
raw = np.array([parse_floats(r["raw_obs"]) for r in rows])
|
||||
if norm.size == 0:
|
||||
return
|
||||
n_dims = norm.shape[1]
|
||||
labels = [
|
||||
"dog_x", "dog_y", "com-dog_x", "com-dog_y",
|
||||
"far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
|
||||
"far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
|
||||
"pen-far1_x", "pen-far1_y", "radius", "frac_active",
|
||||
][:n_dims]
|
||||
|
||||
t = np.arange(norm.shape[0])
|
||||
fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
|
||||
if n_dims == 1: axes = [axes]
|
||||
for i in range(n_dims):
|
||||
axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
|
||||
axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
|
||||
axes[i].set_ylabel(labels[i], fontsize=8)
|
||||
axes[i].tick_params(labelsize=7)
|
||||
if i == 0:
|
||||
axes[i].legend(fontsize=7, loc="upper right")
|
||||
axes[-1].set_xlabel("step")
|
||||
fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=110)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser()
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
|
||||
p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
|
||||
args = p.parse_args()
|
||||
|
||||
rows = load_csv(args.csv)
|
||||
os.makedirs(args.out_dir, exist_ok=True)
|
||||
print(f"loaded {len(rows)} rows from {args.csv}")
|
||||
plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
|
||||
plot_actions(rows, os.path.join(args.out_dir, "actions.png"))
|
||||
plot_obs(rows, os.path.join(args.out_dir, "obs.png"))
|
||||
print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,285 +0,0 @@
|
||||
"""
|
||||
Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
|
||||
|
||||
Setup
|
||||
-----
|
||||
1. Copy your trained files into this directory:
|
||||
controllers/shepherd_dog_rl/final_model.zip
|
||||
controllers/shepherd_dog_rl/vecnorm.pkl
|
||||
|
||||
2. In field.wbt, set the ShepherdDog robot's controller field to
|
||||
"shepherd_dog_rl". You can do this in the Webots GUI:
|
||||
click the robot → Controller → shepherd_dog_rl
|
||||
|
||||
3. Optional: set controllerArgs to ["5"] (number of sheep) if it differs
|
||||
from the default of 5.
|
||||
|
||||
The controller reads GPS (dog position) and Receiver (sheep broadcasts),
|
||||
builds the same 16-dim flock observation the training env used, normalises
|
||||
it with the saved VecNormalize stats, and converts the (vx, vy) policy
|
||||
output into differential wheel speeds.
|
||||
|
||||
Debug logging
|
||||
-------------
|
||||
Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
|
||||
raw obs, normalised obs, action) to debug.csv alongside this script. Use
|
||||
plot_debug.py to render trajectories from it.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import math
|
||||
import struct
|
||||
import numpy as np
|
||||
|
||||
# ── make training code importable ───────────────────────────────────────────
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_TRAINING = os.path.join(_HERE, "..", "..", "training")
|
||||
sys.path.insert(0, _TRAINING)
|
||||
|
||||
from controller import Robot
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
|
||||
from herding_env import HerdingEnv
|
||||
|
||||
# ── constants (must match herding_env.py) ───────────────────────────────────
|
||||
FIELD = 15.0
|
||||
PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
|
||||
PEN_X = (10.0, 13.0)
|
||||
PEN_Y = (-15.0, -8.0)
|
||||
DOG_SPEED = 2.5 # m/s
|
||||
WHEEL_R = 0.038 # wheel radius (metres) — from ShepherdDog.proto
|
||||
K_TURN = 4.0 # heading-error gain (rad/s per rad)
|
||||
EAR_AMPLITUDE = 0.35
|
||||
EAR_RATE = 8.0
|
||||
|
||||
# ── model paths ─────────────────────────────────────────────────────────────
|
||||
MODEL_PATH = os.path.join(_HERE, "final_model.zip")
|
||||
VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
|
||||
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
|
||||
DEBUG_ENABLED = True # set False to disable debug.csv logging
|
||||
|
||||
# ── action smoothing ─────────────────────────────────────────────────────────
|
||||
# EMA on policy output to suppress the rapid oscillation (vx/vy flipping
|
||||
# between -1 and +1 every step) that stalls the physical dog. 0 = no
|
||||
# smoothing (raw policy), 1 = frozen. 0.3 keeps ~30% of previous action.
|
||||
ACTION_SMOOTH = 0.3
|
||||
prev_action = np.zeros(2, dtype=np.float32)
|
||||
|
||||
|
||||
def norm_angle(a: float) -> float:
|
||||
while a > math.pi: a -= 2 * math.pi
|
||||
while a < -math.pi: a += 2 * math.pi
|
||||
return a
|
||||
|
||||
|
||||
def in_pen(x: float, y: float) -> bool:
|
||||
return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
|
||||
|
||||
|
||||
def build_obs(dog_pos: np.ndarray,
|
||||
sheep_dict: dict,
|
||||
n_sheep: int,
|
||||
dog_heading: float = 0.0) -> np.ndarray:
|
||||
"""
|
||||
Build the 18-dim flock observation — identical to HerdingEnv._obs().
|
||||
|
||||
sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
|
||||
dog_heading: dog's current world-frame heading in radians.
|
||||
"""
|
||||
D = 2 * FIELD
|
||||
|
||||
# Split active vs penned
|
||||
active_pos = np.array(
|
||||
[v for v in sheep_dict.values() if not in_pen(*v)],
|
||||
dtype=np.float32
|
||||
)
|
||||
n_active = len(active_pos)
|
||||
|
||||
if n_active > 0:
|
||||
com = active_pos.mean(axis=0)
|
||||
d_from_com = np.linalg.norm(active_pos - com, axis=1)
|
||||
sorted_idx = np.argsort(d_from_com)[::-1]
|
||||
radius = float(d_from_com[sorted_idx[0]])
|
||||
def nth(n):
|
||||
return active_pos[sorted_idx[n]] if len(sorted_idx) > n else com
|
||||
far1, far2, far3 = nth(0), nth(1), nth(2)
|
||||
else:
|
||||
com = PEN_CENTER.copy()
|
||||
radius = 0.0
|
||||
far1 = far2 = far3 = PEN_CENTER.copy()
|
||||
|
||||
frac_active = n_active / max(n_sheep, 1)
|
||||
|
||||
return np.array([
|
||||
dog_pos[0] / FIELD, dog_pos[1] / FIELD,
|
||||
(com[0] - dog_pos[0]) / D, (com[1] - dog_pos[1]) / D,
|
||||
(far1[0] - com[0]) / D, (far1[1] - com[1]) / D,
|
||||
(far2[0] - com[0]) / D, (far2[1] - com[1]) / D,
|
||||
(far3[0] - com[0]) / D, (far3[1] - com[1]) / D,
|
||||
(PEN_CENTER[0] - com[0]) / D, (PEN_CENTER[1] - com[1]) / D,
|
||||
(PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
|
||||
radius / D,
|
||||
frac_active,
|
||||
math.cos(dog_heading), math.sin(dog_heading),
|
||||
], dtype=np.float32)
|
||||
|
||||
|
||||
# ── Webots setup ─────────────────────────────────────────────────────────────
|
||||
robot = Robot()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
|
||||
# Drive motors
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
right_motor = robot.getDevice("right wheel motor")
|
||||
left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
MOTOR_MAX = left_motor.getMaxVelocity()
|
||||
|
||||
# Sensors
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||
emitter = robot.getDevice("emitter")
|
||||
|
||||
# Cosmetic
|
||||
left_ear = robot.getDevice("left ear motor")
|
||||
right_ear = robot.getDevice("right ear motor")
|
||||
left_ear.setPosition(float("inf")); right_ear.setPosition(float("inf"))
|
||||
left_ear.setVelocity(0.0); right_ear.setVelocity(0.0)
|
||||
ear_phase = 0.0
|
||||
|
||||
# Number of sheep (from controllerArgs or default)
|
||||
try:
|
||||
n_sheep = int(sys.argv[1])
|
||||
except (IndexError, ValueError):
|
||||
n_sheep = 3
|
||||
|
||||
# ── Load model ───────────────────────────────────────────────────────────────
|
||||
print(f"[RL dog] Loading model from {MODEL_PATH}")
|
||||
print(f"[RL dog] Loading vecnorm from {VECNORM_PATH}")
|
||||
|
||||
dummy_env = DummyVecEnv([lambda: HerdingEnv(n_sheep=n_sheep)])
|
||||
vecnorm = VecNormalize.load(VECNORM_PATH, dummy_env)
|
||||
vecnorm.training = False
|
||||
vecnorm.norm_reward = False
|
||||
|
||||
model = PPO.load(MODEL_PATH, device="cpu")
|
||||
print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
|
||||
|
||||
# ── Runtime state ─────────────────────────────────────────────────────────────
|
||||
sheep_positions: dict = {} # {name: (x, y)} — updated every step from receiver
|
||||
step_count = 0
|
||||
|
||||
# Debug CSV — written every step when DOG_DEBUG=1
|
||||
debug_file = None
|
||||
if DEBUG_ENABLED:
|
||||
import csv
|
||||
debug_file = open(DEBUG_CSV, "w", newline="")
|
||||
debug_writer = csv.writer(debug_file)
|
||||
debug_writer.writerow([
|
||||
"step", "dog_x", "dog_y", "heading",
|
||||
"sheep_xs", "sheep_ys", "n_active", "n_penned",
|
||||
"raw_obs", "norm_obs", "vx", "vy",
|
||||
])
|
||||
print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
|
||||
|
||||
|
||||
def bearing() -> float:
|
||||
"""Current robot heading in world frame (radians)."""
|
||||
n = compass.getValues()
|
||||
return math.atan2(n[0], n[1])
|
||||
|
||||
|
||||
def drive(action_vx: float, action_vy: float) -> None:
|
||||
"""Convert (vx, vy) policy action to differential wheel speeds."""
|
||||
speed_ms = math.sqrt(action_vx ** 2 + action_vy ** 2) * DOG_SPEED
|
||||
if speed_ms < 0.05:
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
return
|
||||
|
||||
target_heading = math.atan2(action_vy, action_vx)
|
||||
err = norm_angle(target_heading - bearing())
|
||||
|
||||
fwd_ms = speed_ms * max(0.0, math.cos(err))
|
||||
fwd_rad = fwd_ms / WHEEL_R
|
||||
turn = K_TURN * err # rad/s correction
|
||||
|
||||
l = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad - turn))
|
||||
r = max(-MOTOR_MAX, min(MOTOR_MAX, fwd_rad + turn))
|
||||
left_motor.setVelocity(l)
|
||||
right_motor.setVelocity(r)
|
||||
|
||||
|
||||
# ── Main loop ─────────────────────────────────────────────────────────────────
|
||||
while robot.step(timestep) != -1:
|
||||
step_count += 1
|
||||
|
||||
# 1. Drain receiver — update sheep position table
|
||||
while receiver.getQueueLength() > 0:
|
||||
try:
|
||||
msg = receiver.getString()
|
||||
parts = msg.split(":")
|
||||
if parts[0] == "sheep" and len(parts) == 4:
|
||||
sheep_positions[parts[1]] = (float(parts[2]), float(parts[3]))
|
||||
except Exception:
|
||||
pass
|
||||
receiver.nextPacket()
|
||||
|
||||
# 2. Dog GPS
|
||||
gps_vals = gps.getValues()
|
||||
dog_pos = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)
|
||||
|
||||
# 3. Build and normalise observation (heading from compass)
|
||||
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep,
|
||||
dog_heading=bearing())
|
||||
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
|
||||
|
||||
# 4. Policy inference + smoothing
|
||||
action, _ = model.predict(obs_norm, deterministic=True)
|
||||
raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
|
||||
if ACTION_SMOOTH > 0:
|
||||
smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
|
||||
prev_action[:] = smoothed
|
||||
vx, vy = float(smoothed[0]), float(smoothed[1])
|
||||
else:
|
||||
vx, vy = float(raw_a[0]), float(raw_a[1])
|
||||
|
||||
# 5. Drive
|
||||
drive(vx, vy)
|
||||
|
||||
# 6. Broadcast dog position so sheep can compute flee forces
|
||||
emitter.send(f"dog:{dog_pos[0]:.4f}:{dog_pos[1]:.4f}")
|
||||
|
||||
# 7. Ear animation
|
||||
ear_phase += 0.12
|
||||
ep = EAR_AMPLITUDE * math.sin(ear_phase)
|
||||
left_ear.setVelocity(EAR_RATE); right_ear.setVelocity(EAR_RATE)
|
||||
left_ear.setPosition( ep); right_ear.setPosition(-ep)
|
||||
|
||||
# Periodic status
|
||||
if step_count % 100 == 0:
|
||||
n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
|
||||
print(f"[RL dog] step={step_count} known_sheep={len(sheep_positions)}"
|
||||
f" penned={n_in_pen}/{n_sheep} dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
|
||||
f" action=({vx:.2f}, {vy:.2f})")
|
||||
|
||||
# Debug CSV row
|
||||
if debug_file is not None:
|
||||
n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
|
||||
n_in_pen = len(sheep_positions) - n_active
|
||||
debug_writer.writerow([
|
||||
step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
|
||||
f"{bearing():.4f}",
|
||||
";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
|
||||
";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
|
||||
n_active, n_in_pen,
|
||||
";".join(f"{x:.4f}" for x in raw_obs),
|
||||
";".join(f"{x:.4f}" for x in obs_norm[0]),
|
||||
f"{vx:.4f}", f"{vy:.4f}",
|
||||
])
|
||||
if step_count % 200 == 0:
|
||||
debug_file.flush()
|
||||
Binary file not shown.
+9
-10
@@ -6,28 +6,28 @@
|
||||
- Nelson Neto <up202108117@up.pt>
|
||||
|
||||
## (i) Title and General objectives
|
||||
**RL-Based Autonomous Shepherd Robot for Livestock Herding**
|
||||
**Autonomous Shepherd Robot for Livestock Herding (Strömbom)**
|
||||
|
||||
- Implement effective herding behaviors through proximity and movement strategies
|
||||
- Build a 3D environment with realistic robot dynamics and LIDAR-based perception
|
||||
- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using Reinforcement Learning
|
||||
- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using the Strömbom heuristic approach
|
||||
|
||||
|
||||
# Group G25 - (ii) Intermediate Goals
|
||||
|
||||
## Intermediate goals
|
||||
- Set up the Webots simulation environment with an open field and target zone
|
||||
- Implement lightweight Gymnasium-based 2D herding environment
|
||||
- Implement lightweight 2D herding environment for algorithm evaluation
|
||||
- Design a Sheep and Dog robot
|
||||
- Implement a sheep flocking model for fast RL iteration
|
||||
- Implement a sheep flocking model for fast Strömbom iteration
|
||||
- Validate LiDAR sensor feedback for sheep detection and distance estimation
|
||||
|
||||
|
||||
# Group G25 - Course Project (Final) Goals
|
||||
|
||||
## (iii) Main goals
|
||||
- State-of-the-art survey on shepherding algorithms and multi-agent RL herding
|
||||
- Train the robot using PPO to successfully herd a single sheep into the goal
|
||||
- State-of-the-art survey on shepherding algorithms with focus on Strömbom herding
|
||||
- Implement and tune Strömbom controller to successfully herd a single sheep into the goal
|
||||
- Achieve fully autonomous herding of multiple sheep and a full flock into the target area
|
||||
- Optimize robot trajectory to minimize the time required to group the flock
|
||||
- Ensure zero collisions between the robot and the sheep during the task
|
||||
@@ -35,7 +35,7 @@
|
||||
- Article, demo video, and final presentation
|
||||
|
||||
## (iv) Extra Merit
|
||||
- Curriculum Learning (scaling from 1 sheep to a flock)
|
||||
- Progressive evaluation (scaling from 1 sheep to a flock)
|
||||
- Comparison of performance between Differential Drive and Mecanum wheels
|
||||
- Robustness testing under sensor noise or varying sheep speeds, configurations and parameters
|
||||
- Multi-shepherd cooperative mode: 2 dogs learn role specialization (collector vs. driver)
|
||||
@@ -46,11 +46,10 @@
|
||||
|
||||
## (v) Tools
|
||||
- Webots for 3D physics simulation with ROS2 integration via `webots_ros2` package
|
||||
- Stable-Baselines3 for the PPO algorithm implementation
|
||||
- Gymnasium (OpenAI) for the RL environment wrapper (lightweight 2D herding env for fast RL training)
|
||||
- Gymnasium (OpenAI) for the simulation wrapper and evaluation tooling
|
||||
- Python as the primary programming language (sheep flocking model, reward shaping, evaluation)
|
||||
|
||||
## (vi) Limitations
|
||||
- Computational Power: Training time might be high for complex flock behaviors
|
||||
- Computational Power: Large batch evaluation and parameter sweeps can still be time-consuming
|
||||
- Sim-to-Real Gap: No real-world validation of the herding controller; project is simulation-only (2D + Webots 3D)
|
||||
- Model Complexity: Simplified sheep behavior (scripted) may not account for all biological livestock nuances
|
||||
@@ -0,0 +1,8 @@
|
||||
"""Shared core for the shepherd herding project.
|
||||
|
||||
This package is the single source of truth for world geometry, sheep
|
||||
flocking dynamics, differential-drive kinematics, observation building,
|
||||
and the Strömbom heuristic. It is imported both by the Webots
|
||||
controllers (for inference) and by the Gymnasium training environment
|
||||
(for fast PPO rollouts), so the two paths cannot drift apart.
|
||||
"""
|
||||
@@ -0,0 +1,70 @@
|
||||
"""Differential-drive kinematics matching the Webots robot specs.
|
||||
|
||||
The Webots controllers and the training env both use these helpers so the
|
||||
sim and the real (Webots) physics agree to first order. They do not model
|
||||
slip, wheel acceleration limits, or contact forces — Webots does that for
|
||||
us at inference time. The training env has to be close enough that a
|
||||
policy trained against this kinematic model still works when handed off
|
||||
to ODE physics.
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
|
||||
def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
|
||||
"""Integrate one step of differential-drive forward kinematics.
|
||||
|
||||
Inputs
|
||||
------
|
||||
x, y : robot position (m)
|
||||
h : robot heading (rad), 0 = +x axis
|
||||
w_left, w_right : wheel angular velocities (rad/s)
|
||||
wheel_radius, wheel_base : robot dimensions (m)
|
||||
dt : timestep (s)
|
||||
|
||||
Returns (new_x, new_y, new_h).
|
||||
"""
|
||||
v = (w_right + w_left) * wheel_radius * 0.5
|
||||
omega = (w_right - w_left) * wheel_radius / wheel_base
|
||||
new_x = x + v * math.cos(h) * dt
|
||||
new_y = y + v * math.sin(h) * dt
|
||||
new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
|
||||
return new_x, new_y, new_h
|
||||
|
||||
|
||||
def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
|
||||
k_turn=4.0):
|
||||
"""Convert a desired (vx, vy) intent in [-1, 1]^2 to wheel speeds.
|
||||
|
||||
Mirrors ``drive_action`` in controllers/shepherd_dog/shepherd_dog.py:
|
||||
forward speed scales by ``cos(err)`` (clamped to ±90°), and a P
|
||||
controller on heading error contributes the wheel-rate differential.
|
||||
"""
|
||||
speed_ms = math.hypot(vx, vy) * max_linear
|
||||
if speed_ms < 1e-3:
|
||||
return 0.0, 0.0
|
||||
target_h = math.atan2(vy, vx)
|
||||
err = math.atan2(math.sin(target_h - h), math.cos(target_h - h))
|
||||
clamped_err = max(-math.pi / 2, min(math.pi / 2, err))
|
||||
fwd_ms = speed_ms * math.cos(clamped_err)
|
||||
fwd_rad = fwd_ms / wheel_radius
|
||||
turn = k_turn * err
|
||||
left = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad - turn))
|
||||
right = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad + turn))
|
||||
return left, right
|
||||
|
||||
|
||||
def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
|
||||
k_turn=4.0):
|
||||
"""Sheep variant: speed already expressed in motor (wheel rad/s) units.
|
||||
|
||||
Matches the existing sheep controller (``controllers/sheep/sheep.py``)
|
||||
where ``speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))`` and
|
||||
these constants are wheel angular velocities, not linear m/s.
|
||||
"""
|
||||
err = math.atan2(math.sin(heading - h), math.cos(heading - h))
|
||||
fwd = max(0.0, math.cos(err)) * speed_motor
|
||||
turn = k_turn * err
|
||||
left = max(-max_wheel_omega, min(max_wheel_omega, fwd - turn))
|
||||
right = max(-max_wheel_omega, min(max_wheel_omega, fwd + turn))
|
||||
return left, right
|
||||
@@ -0,0 +1,178 @@
|
||||
"""Reynolds-style sheep flocking dynamics.
|
||||
|
||||
This is the per-sheep behavioural step used both by the Webots sheep
|
||||
controller (scalar, one sheep at a time) and by the training environment
|
||||
(loop over sheep). The numerics are adapted from the original
|
||||
``controllers/sheep/flocking.py`` and retuned for the new external-pen
|
||||
layout: the south stone wall is intact except in the gate column, so
|
||||
sheep can only reach the pen by walking through that 3-m corridor.
|
||||
|
||||
Force stack each step (summed → heading + speed):
|
||||
flee — quadratic ramp away from dog within FLEE_DIST
|
||||
cohesion — drift toward flock centre, halved while fleeing
|
||||
separation — inverse-distance push from peers
|
||||
walls — soft repulsion + hard escape band against field walls,
|
||||
except inside the gate column where the south wall is
|
||||
absent
|
||||
wander — small persistent drift for natural idle motion
|
||||
|
||||
A sheep latches to ``penned`` the first time it crosses the gate plane
|
||||
into the gate column (handled by callers via ``geometry.is_penned_position``);
|
||||
once latched, ``penned=True`` is passed in here and the force stack
|
||||
switches to in-pen containment + jitter.
|
||||
"""
|
||||
|
||||
import math
|
||||
import random
|
||||
|
||||
from herding.geometry import (
|
||||
FIELD_X, FIELD_Y,
|
||||
PEN_X, PEN_Y,
|
||||
GATE_X,
|
||||
)
|
||||
|
||||
# --- Speed and force constants ---
|
||||
# All speeds here are in wheel rad/s (motor units), matching the existing
|
||||
# sheep controller. Conversion to m/s = speed * SHEEP_WHEEL_RADIUS.
|
||||
MAX_SPEED = 22.0
|
||||
FLEE_SPEED = 20.0
|
||||
WANDER_SPEED = 3.0
|
||||
|
||||
WALL_MARGIN = 5.0
|
||||
WALL_HARD_MARGIN = 1.0
|
||||
WALL_HARD_GAIN = 50.0
|
||||
|
||||
FLEE_DIST = 7.0
|
||||
SEPARATION_DIST = 2.5
|
||||
COHESION_DIST = 8.0
|
||||
|
||||
PEN_MARGIN = 0.8
|
||||
|
||||
|
||||
def _peers_iter(peers):
|
||||
"""Accept either a {name: (x, y)} dict or an iterable of (x, y) tuples."""
|
||||
if isinstance(peers, dict):
|
||||
return list(peers.values())
|
||||
return list(peers)
|
||||
|
||||
|
||||
def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
"""Return ``(heading, speed, new_wander_angle)`` for one sheep step.
|
||||
|
||||
``speed`` is in wheel rad/s (motor units), bounded by ``[WANDER_SPEED,
|
||||
FLEE_SPEED]``. ``heading`` is the world-frame target heading the sheep
|
||||
should aim for (atan2 convention).
|
||||
|
||||
``rng`` is an optional ``random.Random``-compatible object used for
|
||||
the wander-jitter. If ``None``, falls back to Python's global module
|
||||
(matches Webots controller usage). Pass an env-owned RNG to make
|
||||
rollouts deterministic given a seed.
|
||||
"""
|
||||
fx, fy = 0.0, 0.0
|
||||
peer_list = _peers_iter(peers)
|
||||
rnd = rng if rng is not None else random
|
||||
|
||||
if penned:
|
||||
# --- Pen containment: bounce off the four pen walls ---
|
||||
pm = PEN_MARGIN
|
||||
if x < PEN_X[0] + pm:
|
||||
fx += ((PEN_X[0] + pm - x) / pm) * 15.0
|
||||
if x > PEN_X[1] - pm:
|
||||
fx -= ((x - (PEN_X[1] - pm)) / pm) * 15.0
|
||||
if y < PEN_Y[0] + pm:
|
||||
fy += ((PEN_Y[0] + pm - y) / pm) * 15.0
|
||||
if y > PEN_Y[1] - pm:
|
||||
fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0
|
||||
|
||||
# Mild peer separation — penned sheep crowd the corner otherwise.
|
||||
for px, py in peer_list:
|
||||
dx, dy = px - x, py - y
|
||||
d = math.hypot(dx, dy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (dx / d) * push * 2.5
|
||||
fy -= (dy / d) * push * 2.5
|
||||
|
||||
if rnd.random() < 0.02:
|
||||
wander_angle += rnd.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
else:
|
||||
# --- Free-roaming sheep in the field ---
|
||||
fleeing = False
|
||||
if dog_xy is not None:
|
||||
ddx = dog_xy[0] - x
|
||||
ddy = dog_xy[1] - y
|
||||
dist = math.hypot(ddx, ddy)
|
||||
if 0.01 < dist < FLEE_DIST:
|
||||
fleeing = True
|
||||
t = 1.0 - dist / FLEE_DIST
|
||||
s = t * t * 20.0
|
||||
fx -= (ddx / dist) * s
|
||||
fy -= (ddy / dist) * s
|
||||
|
||||
# Cohesion — drift toward flock CoM (peers within COHESION_DIST).
|
||||
# Cohesion is *stronger* under flee than at rest (the
|
||||
# predator-confusion / safety-in-numbers effect — sheep huddle when
|
||||
# threatened). This is what makes shepherding work: the flock stays
|
||||
# as one unit through the narrow gate instead of fragmenting.
|
||||
cx, cy, cn = 0.0, 0.0, 0
|
||||
for px, py in peer_list:
|
||||
d = math.hypot(px - x, py - y)
|
||||
if 0.3 < d < COHESION_DIST:
|
||||
cx += px
|
||||
cy += py
|
||||
cn += 1
|
||||
if cn > 0:
|
||||
# Cohesion needs to be comparable to flee at close range to keep
|
||||
# the flock together through narrow obstacles like the 3m gate.
|
||||
# Flee at 2m has magnitude ~10; cohesion at peer-distance 5m
|
||||
# with w=1.5 contributes ~7.5 — same order, so the flock
|
||||
# translates as a unit instead of fragmenting under pressure.
|
||||
w = 1.5 if fleeing else 0.6
|
||||
fx += (cx / cn - x) * w
|
||||
fy += (cy / cn - y) * w
|
||||
|
||||
# Separation — inverse-distance push from peers.
|
||||
for px, py in peer_list:
|
||||
ddx, ddy = px - x, py - y
|
||||
d = math.hypot(ddx, ddy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (ddx / d) * push * 2.5
|
||||
fy -= (ddy / d) * push * 2.5
|
||||
|
||||
# Wall soft repulsion. The south wall is absent inside the gate
|
||||
# column so sheep can be driven through it by the dog.
|
||||
if x < FIELD_X[0] + WALL_MARGIN:
|
||||
fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
|
||||
if x > FIELD_X[1] - WALL_MARGIN:
|
||||
fx -= ((x - (FIELD_X[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
if y > FIELD_Y[1] - WALL_MARGIN:
|
||||
fy -= ((y - (FIELD_Y[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
if y < FIELD_Y[0] + WALL_MARGIN and not (GATE_X[0] <= x <= GATE_X[1]):
|
||||
fy += ((FIELD_Y[0] + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
|
||||
|
||||
if not fleeing:
|
||||
if random.random() < 0.02:
|
||||
wander_angle += random.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
# --- Hard escape band — overrides everything when very close to a wall ---
|
||||
m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
|
||||
if x - FIELD_X[0] < m:
|
||||
fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
|
||||
if FIELD_X[1] - x < m:
|
||||
fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
|
||||
if FIELD_Y[1] - y < m:
|
||||
fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
|
||||
# South wall hard escape only when not in the gate column and not penned.
|
||||
if (not penned) and (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
|
||||
fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))
|
||||
|
||||
heading = math.atan2(fy, fx)
|
||||
mag = math.hypot(fx, fy)
|
||||
speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
|
||||
return heading, speed, wander_angle
|
||||
@@ -0,0 +1,99 @@
|
||||
"""World geometry and robot specs.
|
||||
|
||||
All coordinates are in meters. (0, 0) is the centre of the field, +x is
|
||||
east, +y is north. Z is up but unused here. These constants must match
|
||||
``worlds/field.wbt`` and the proto files; if the world changes, change
|
||||
this file and only this file.
|
||||
|
||||
Pen layout (post-refactor)
|
||||
--------------------------
|
||||
The pen is *external* to the field, accessed through a 3 m gate cut into
|
||||
the south stone wall at y = -15. Sheep entering through the gate end up
|
||||
in a fenced rectangle south of the field; the dog stays in the field
|
||||
(soft-limited above DOG_SOUTH_LIMIT during training and inference).
|
||||
|
||||
field +y north
|
||||
+-----------+
|
||||
| |
|
||||
| |
|
||||
| ...... |
|
||||
+---||||----+ y = -15 (south wall, gate at x ∈ [10, 13])
|
||||
||||
|
||||
|pen| y ∈ [-22, -15]
|
||||
+---+
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
# --- Field (square, stone-walled) ---
|
||||
FIELD_X = (-15.0, 15.0)
|
||||
FIELD_Y = (-15.0, 15.0)
|
||||
|
||||
# Conservative inside bounds — sheep/dog should not graze the wall.
|
||||
FIELD_INSIDE_MARGIN = 0.5
|
||||
|
||||
# --- Pen (external, south of the field) ---
|
||||
PEN_X = (10.0, 13.0)
|
||||
PEN_Y = (-22.0, -15.0)
|
||||
PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
|
||||
# The point the dog drives the flock toward: the gate centre on the field side.
|
||||
PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
|
||||
|
||||
# --- Gate (the hole in the south stone wall) ---
|
||||
GATE_X = PEN_X
|
||||
GATE_Y = -15.0
|
||||
|
||||
# --- Robot specs (must match proto files) ---
|
||||
# Dog (controllers/shepherd_dog/, protos/ShepherdDog.proto)
|
||||
DOG_WHEEL_RADIUS = 0.038 # m
|
||||
DOG_WHEEL_BASE = 0.28 # m, axle-to-axle
|
||||
DOG_MAX_WHEEL_OMEGA = 70.0 # rad/s
|
||||
DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA # ~2.66 m/s
|
||||
|
||||
# Sheep (controllers/sheep/, protos/Sheep.proto)
|
||||
SHEEP_WHEEL_RADIUS = 0.031 # m
|
||||
SHEEP_WHEEL_BASE = 0.20 # m
|
||||
SHEEP_MAX_WHEEL_OMEGA = 25.0 # rad/s
|
||||
SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA # ~0.78 m/s
|
||||
|
||||
# --- Webots step ---
|
||||
WEBOTS_DT = 0.016 # seconds, matches WorldInfo.basicTimeStep = 16 in field.wbt
|
||||
|
||||
# --- Dog "virtual south wall" (training keeps dog out of the pen) ---
|
||||
# At inference the controller also clips to this so a slightly miscalibrated
|
||||
# policy doesn't accidentally drive into the pen and trap the sheep.
|
||||
DOG_SOUTH_LIMIT = -14.5
|
||||
|
||||
# --- Maximum supported flock size ---
|
||||
MAX_SHEEP = 10
|
||||
|
||||
|
||||
def in_pen(x: float, y: float) -> bool:
|
||||
"""True if (x, y) lies inside the external pen rectangle."""
|
||||
return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
|
||||
|
||||
|
||||
def in_field(x: float, y: float, margin: float = 0.0) -> bool:
|
||||
return (FIELD_X[0] + margin <= x <= FIELD_X[1] - margin
|
||||
and FIELD_Y[0] + margin <= y <= FIELD_Y[1] - margin)
|
||||
|
||||
|
||||
def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:
|
||||
"""True if (x, y) lies in the column of the gate (between field and pen)."""
|
||||
return (PEN_X[0] - margin <= x <= PEN_X[1] + margin
|
||||
and PEN_Y[0] - margin <= y <= GATE_Y + margin)
|
||||
|
||||
|
||||
def is_penned_position(x: float, y: float, latch_margin: float = 0.2) -> bool:
|
||||
"""A sheep latches to "penned" once it crosses the gate plane south.
|
||||
|
||||
True iff x is inside the gate column (with a small margin) AND
|
||||
y has dipped below the gate line. Once latched, the sheep is held by
|
||||
in-pen forces and will not exit on its own.
|
||||
"""
|
||||
return (PEN_X[0] - latch_margin <= x <= PEN_X[1] + latch_margin
|
||||
and y <= GATE_Y)
|
||||
|
||||
|
||||
def distance_to_pen_entry(x: float, y: float) -> float:
|
||||
return math.hypot(x - PEN_ENTRY[0], y - PEN_ENTRY[1])
|
||||
+137
@@ -0,0 +1,137 @@
|
||||
"""Observation builder for the shepherd dog policy.
|
||||
|
||||
Order-invariant 32-D feature vector — the policy generalises across
|
||||
flock sizes 1..MAX_SHEEP because individual sheep coordinates never
|
||||
appear in the observation by index, only summary statistics, a polar
|
||||
histogram, and two "named" sheep (closest-to-pen and rearmost-from-pen).
|
||||
|
||||
The two named sheep matter for the sequential-driving teacher: it
|
||||
targets the closest-to-pen sheep specifically, so the policy needs
|
||||
that channel to mimic the teacher.
|
||||
|
||||
Layout (all components normalised so values stay roughly in [-1, 1]):
|
||||
|
||||
idx field
|
||||
----- ----------------------------------------------------------
|
||||
0..3 dog pose: x/15, y/15, cos(heading), sin(heading)
|
||||
4..5 active-sheep CoM x/15, y/15
|
||||
6..8 flock dispersion: max-radius/15, std_x/15, std_y/15
|
||||
9..11 vector dog→CoM: dx/30, dy/30, dist/30
|
||||
12..14 vector dog→pen-entry: dx/30, dy/30, dist/30
|
||||
15..16 vector furthest-sheep→CoM: dx/15, dy/15
|
||||
17..18 min sheep-to-wall, min dog-to-wall (both /15)
|
||||
19 active-sheep count / MAX_SHEEP
|
||||
20..27 8-bin polar histogram of active sheep around the dog,
|
||||
rotation-aware (binned in dog-relative frame), normalised
|
||||
so the bins sum to 1.
|
||||
28..29 vector dog→closest-to-pen sheep: dx/15, dy/15
|
||||
30..31 vector dog→rearmost (furthest-from-pen) sheep: dx/15, dy/15
|
||||
"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
from herding.geometry import (
|
||||
FIELD_X, FIELD_Y, PEN_ENTRY, MAX_SHEEP,
|
||||
)
|
||||
|
||||
OBS_DIM = 32
|
||||
|
||||
|
||||
def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
|
||||
n_max: int = MAX_SHEEP) -> np.ndarray:
|
||||
"""Assemble the dog policy's observation vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dog_xy : tuple (x, y) of the dog's GPS position (m)
|
||||
dog_heading : dog heading in rad
|
||||
sheep_xy_list : iterable of (x, y) for ALL known sheep
|
||||
sheep_penned_list : parallel iterable of bool — True if sheep is penned
|
||||
n_max : maximum supported flock size used for the count normaliser
|
||||
"""
|
||||
dog_x, dog_y = dog_xy
|
||||
obs = np.zeros(OBS_DIM, dtype=np.float32)
|
||||
|
||||
obs[0] = dog_x / 15.0
|
||||
obs[1] = dog_y / 15.0
|
||||
obs[2] = math.cos(dog_heading)
|
||||
obs[3] = math.sin(dog_heading)
|
||||
|
||||
active = [(x, y) for (x, y), p
|
||||
in zip(sheep_xy_list, sheep_penned_list) if not p]
|
||||
n = len(active)
|
||||
|
||||
pdx0, pdy0 = PEN_ENTRY[0] - dog_x, PEN_ENTRY[1] - dog_y
|
||||
obs[12] = pdx0 / 30.0
|
||||
obs[13] = pdy0 / 30.0
|
||||
obs[14] = math.hypot(pdx0, pdy0) / 30.0
|
||||
|
||||
if n == 0:
|
||||
# All sheep penned — terminal observation.
|
||||
obs[19] = 0.0
|
||||
return obs
|
||||
|
||||
arr = np.asarray(active, dtype=np.float32)
|
||||
com_x = float(arr[:, 0].mean())
|
||||
com_y = float(arr[:, 1].mean())
|
||||
rel = arr - np.array([com_x, com_y], dtype=np.float32)
|
||||
dists = np.hypot(rel[:, 0], rel[:, 1])
|
||||
radius = float(dists.max())
|
||||
std_x = float(arr[:, 0].std())
|
||||
std_y = float(arr[:, 1].std())
|
||||
|
||||
obs[4] = com_x / 15.0
|
||||
obs[5] = com_y / 15.0
|
||||
obs[6] = radius / 15.0
|
||||
obs[7] = std_x / 15.0
|
||||
obs[8] = std_y / 15.0
|
||||
|
||||
cdx, cdy = com_x - dog_x, com_y - dog_y
|
||||
obs[9] = cdx / 30.0
|
||||
obs[10] = cdy / 30.0
|
||||
obs[11] = math.hypot(cdx, cdy) / 30.0
|
||||
|
||||
far_idx = int(np.argmax(dists))
|
||||
obs[15] = float(rel[far_idx, 0]) / 15.0
|
||||
obs[16] = float(rel[far_idx, 1]) / 15.0
|
||||
|
||||
min_sheep_wall = min(
|
||||
float(np.min(arr[:, 0] - FIELD_X[0])),
|
||||
float(np.min(FIELD_X[1] - arr[:, 0])),
|
||||
float(np.min(arr[:, 1] - FIELD_Y[0])),
|
||||
float(np.min(FIELD_Y[1] - arr[:, 1])),
|
||||
)
|
||||
min_dog_wall = min(
|
||||
dog_x - FIELD_X[0], FIELD_X[1] - dog_x,
|
||||
dog_y - FIELD_Y[0], FIELD_Y[1] - dog_y,
|
||||
)
|
||||
obs[17] = min_sheep_wall / 15.0
|
||||
obs[18] = float(min_dog_wall) / 15.0
|
||||
obs[19] = n / n_max
|
||||
|
||||
# 8-bin polar histogram in the dog's body frame.
|
||||
rel_dx = arr[:, 0] - dog_x
|
||||
rel_dy = arr[:, 1] - dog_y
|
||||
angles = np.arctan2(rel_dy, rel_dx) - dog_heading
|
||||
angles = np.arctan2(np.sin(angles), np.cos(angles))
|
||||
bins = np.floor((angles + math.pi) / (2 * math.pi) * 8).astype(int)
|
||||
bins = np.clip(bins, 0, 7)
|
||||
hist = np.bincount(bins, minlength=8).astype(np.float32)
|
||||
hist /= max(1, n)
|
||||
obs[20:28] = hist
|
||||
|
||||
# Closest-to-pen sheep (the sequential teacher's target) and rearmost
|
||||
# (furthest-from-pen, the natural "next target" once the closest is
|
||||
# penned). Both expressed as offset from dog. These two channels make
|
||||
# BC tractable — without them the obs doesn't uniquely identify which
|
||||
# sheep the teacher is steering toward.
|
||||
pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
|
||||
closest_idx = int(np.argmin(pen_dists))
|
||||
rearmost_idx = int(np.argmax(pen_dists))
|
||||
obs[28] = (float(arr[closest_idx, 0]) - dog_x) / 15.0
|
||||
obs[29] = (float(arr[closest_idx, 1]) - dog_y) / 15.0
|
||||
obs[30] = (float(arr[rearmost_idx, 0]) - dog_x) / 15.0
|
||||
obs[31] = (float(arr[rearmost_idx, 1]) - dog_y) / 15.0
|
||||
|
||||
return obs
|
||||
@@ -0,0 +1,98 @@
|
||||
"""Sequential single-target shepherd dog algorithm.
|
||||
|
||||
Strömbom drives the flock's centre of mass; with N sheep and a narrow
|
||||
3 m gate, this fails because the flock is wider than the gate and CoM
|
||||
driving abandons stragglers. Real sheepdogs solve this differently:
|
||||
they pick *one* sheep at a time, drive it through, return for the next.
|
||||
|
||||
This module implements that "pin-and-push" approach.
|
||||
|
||||
Algorithm (one step):
|
||||
1. Active sheep = those still in the field (not yet penned).
|
||||
2. Target = the active sheep currently closest to the pen entry.
|
||||
3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
|
||||
directly behind the target relative to the goal.
|
||||
4. Output unit vector pointing the dog at the drive position.
|
||||
|
||||
Once the target crosses the gate it latches as penned and is removed
|
||||
from the active set; the next-closest unpenned sheep becomes the
|
||||
target. The algorithm naturally "queues" sheep through the gate.
|
||||
|
||||
Empirically (with our flocking dynamics) this scales linearly with
|
||||
flock size and works up to at least n=10 within a 15 000-step budget.
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
from herding.geometry import GATE_Y, PEN_ENTRY, in_pen
|
||||
|
||||
|
||||
DELTA_DRIVE = 1.5 # standoff behind the target sheep
|
||||
APPROACH_GAIN = 1.0 # action magnitude scale (1 = full speed)
|
||||
|
||||
|
||||
def _unit(x, y):
|
||||
d = math.hypot(x, y)
|
||||
if d < 1e-6:
|
||||
return 0.0, 0.0
|
||||
return x / d, y / d
|
||||
|
||||
|
||||
def _is_active(x, y) -> bool:
|
||||
return (not in_pen(x, y)) and y > GATE_Y
|
||||
|
||||
|
||||
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Return ``(vx, vy, mode)`` where mode encodes the current target.
|
||||
|
||||
Compatible with the Strömbom call signature so it can be drop-in
|
||||
swapped in the dog controller and the env's imitation reward.
|
||||
"""
|
||||
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
|
||||
if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle"
|
||||
|
||||
# Pick target = sheep closest to pen entry. Stable choice: as one
|
||||
# sheep approaches and crosses the gate it stays the target until
|
||||
# latched; then the next-closest takes over.
|
||||
name, sx, sy = min(
|
||||
active,
|
||||
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
|
||||
)
|
||||
|
||||
# Drive position behind the target along the (target → pen) line.
|
||||
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
|
||||
tx = sx + DELTA_DRIVE * ux
|
||||
ty = sy + DELTA_DRIVE * uy
|
||||
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
|
||||
|
||||
|
||||
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Debug variant returning ``(vx, vy, mode, debug_dict)``."""
|
||||
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
|
||||
if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle", {
|
||||
"n_active": 0, "target_name": "",
|
||||
"target_x": 0.0, "target_y": 0.0,
|
||||
"drive_x": dog_xy[0], "drive_y": dog_xy[1],
|
||||
}
|
||||
|
||||
name, sx, sy = min(
|
||||
active,
|
||||
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
|
||||
)
|
||||
|
||||
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
|
||||
tx = sx + DELTA_DRIVE * ux
|
||||
ty = sy + DELTA_DRIVE * uy
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
|
||||
return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
|
||||
"n_active": len(active), "target_name": name,
|
||||
"target_x": sx, "target_y": sy,
|
||||
"drive_x": tx, "drive_y": ty,
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
"""Strömbom collect/drive heuristic for the shepherd dog.
|
||||
|
||||
Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
|
||||
updated for the external pen layout. Used as a baseline controller and
|
||||
as the fallback when the RL policy isn't available.
|
||||
|
||||
Reference: Strömbom et al. 2014, "Solving the shepherding problem".
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
from herding.geometry import PEN_ENTRY, GATE_Y, in_pen
|
||||
|
||||
# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
|
||||
# the original (4.0 / 2.5) because the new external pen sits ~26 m from
|
||||
# typical sheep spawn locations — at the old 4 m standoff, the flee force
|
||||
# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
|
||||
# the path inside the 3000-step episode budget.
|
||||
#
|
||||
# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
|
||||
# the dog stays in *drive* mode much longer. With our tighter cohesion
|
||||
# (flocking_sim.py), partially-collected flocks consolidate naturally
|
||||
# during a drive, and we don't waste 80% of the time budget on a slow
|
||||
# "collect" pre-phase.
|
||||
F_FACTOR = 4.0
|
||||
DELTA_COLLECT = 1.5
|
||||
DELTA_DRIVE = 2.0
|
||||
|
||||
|
||||
def _unit(x, y):
|
||||
d = math.hypot(x, y)
|
||||
if d < 1e-6:
|
||||
return 0.0, 0.0
|
||||
return x / d, y / d
|
||||
|
||||
|
||||
def _is_active(x, y) -> bool:
|
||||
"""A sheep is "active" if it's still in the field — not in or below
|
||||
the gate plane (we treat anything south of the gate as committed to
|
||||
the pen and stop trying to herd it)."""
|
||||
return (not in_pen(x, y)) and y > GATE_Y
|
||||
|
||||
|
||||
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
|
||||
|
||||
``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
|
||||
Webots controller's representation).
|
||||
"""
|
||||
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle"
|
||||
|
||||
n = len(active)
|
||||
com_x = sum(p[0] for p in active) / n
|
||||
com_y = sum(p[1] for p in active) / n
|
||||
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||
radius = max(dists)
|
||||
|
||||
if radius > F_FACTOR * math.sqrt(n):
|
||||
# Collect: aim at a point behind the furthest sheep, opposite the CoM.
|
||||
idx = max(range(n), key=lambda i: dists[i])
|
||||
sx, sy = active[idx]
|
||||
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||
mode = "collect"
|
||||
else:
|
||||
# Drive: aim at a point behind the flock CoM relative to the goal.
|
||||
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||
mode = "drive"
|
||||
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
return ax, ay, mode
|
||||
|
||||
|
||||
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Variant of compute_action that also returns a small debug dict.
|
||||
|
||||
Kept for parity with the legacy controller's CSV logger.
|
||||
"""
|
||||
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle", {
|
||||
"n_active": 0, "radius": 0.0, "threshold": 0.0,
|
||||
"com_x": 0.0, "com_y": 0.0,
|
||||
"target_x": dog_xy[0], "target_y": dog_xy[1],
|
||||
}
|
||||
|
||||
n = len(active)
|
||||
com_x = sum(p[0] for p in active) / n
|
||||
com_y = sum(p[1] for p in active) / n
|
||||
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||
radius = max(dists)
|
||||
threshold = F_FACTOR * math.sqrt(n)
|
||||
|
||||
if radius > threshold:
|
||||
idx = max(range(n), key=lambda i: dists[i])
|
||||
sx, sy = active[idx]
|
||||
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||
mode = "collect"
|
||||
else:
|
||||
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||
mode = "drive"
|
||||
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
dbg = {
|
||||
"n_active": n, "radius": radius, "threshold": threshold,
|
||||
"com_x": com_x, "com_y": com_y,
|
||||
"target_x": tx, "target_y": ty,
|
||||
}
|
||||
return ax, ay, mode, dbg
|
||||
@@ -0,0 +1,458 @@
|
||||
# RL-Driven Shepherd Herding — Implementation Plan
|
||||
|
||||
This plan turns the existing Strömbom-only Webots project into a dual-mode
|
||||
shepherd controller (RL primary, Strömbom fallback), with a fast Gymnasium
|
||||
training environment that mirrors the Webots dynamics tightly enough for
|
||||
sim-to-sim transfer. Stable-Baselines3 PPO is the learner.
|
||||
|
||||
---
|
||||
|
||||
## 1. Current state (audit)
|
||||
|
||||
### World geometry — `worlds/field.wbt`
|
||||
- Field bounded by stone walls at **x,y ∈ [−15, +15]**. Inside-usable area is
|
||||
~[−14.5, 14.5] (`X_MIN/MAX` in `flocking.py`).
|
||||
- **Pen is *inside* the field**: x ∈ [10, 13], y ∈ [−15, −8], with the
|
||||
opening on its **north** side at y = −8 (post-and-rail fence W/E; open N).
|
||||
- South stone wall has a **gate at x ∈ [10, 13], y = −15** (split wall +
|
||||
gate posts at x=10 and x=13). So sheep that get penned end up between the
|
||||
fence (N side at y=−8) and the south stone wall (with the wooden gate at
|
||||
y=−15 currently slightly ajar). The pen is effectively an L-shape inside
|
||||
the field, not external.
|
||||
- Spawns: dog at origin (0, 0), 3 sheep around (3, ±2) and (4, 0). Two more
|
||||
sheep are commented out.
|
||||
|
||||
### Robots — protos
|
||||
- **Sheep** (`protos/Sheep.proto`): differential drive, wheel radius 0.031 m,
|
||||
axle half-width 0.10 m → wheel base 0.20 m. `maxVelocity = 25 rad/s` →
|
||||
max linear ≈ **0.78 m/s**. Sensors: GPS, Compass, Emitter+Receiver on
|
||||
channel 1. `supervisor = TRUE` (used to repaint wool pink on pen entry).
|
||||
- **ShepherdDog** (`protos/ShepherdDog.proto`): differential drive, wheel
|
||||
radius 0.038 m, axle half-width 0.14 m → wheel base 0.28 m.
|
||||
`maxVelocity = 70 rad/s` → max linear ≈ **2.66 m/s**. Sensors: GPS,
|
||||
Compass, Gyro, Accelerometer, **Lidar** (front-only, FOV 2.44 rad ≈ 140°,
|
||||
180 rays, range 0.10–12 m, noise 0.005), Emitter+Receiver on channel 1,
|
||||
cosmetic ear/tail motors.
|
||||
|
||||
### Sheep controller — `controllers/sheep/{sheep.py,flocking.py}`
|
||||
- Reynolds-style boid stack: flee (quadratic ramp inside FLEE_DIST=7 m),
|
||||
cohesion (within 8 m), separation (within 2.5 m), wall soft repulsion
|
||||
(margin 5 m), wall hard escape (margin 1 m, gain 50), wander.
|
||||
- Pen-aware: sheep below the gate line but outside the gate corridor get a
|
||||
northward "deadzone" assist; on first entry into the pen rectangle,
|
||||
sheep latches `penned=True`, repaints pink, and switches to in-pen
|
||||
containment + jitter.
|
||||
- Driver: heading-error PD on diff-drive (k=4), forward velocity scaled by
|
||||
`cos(err)`, MAX_SPEED=22 (motor units, capped by proto's 25 rad/s).
|
||||
- Stuck detector: if displacement < 0.05 m for 20 steps, drives toward
|
||||
field origin to escape wall-pin (a known differential-drive failure mode).
|
||||
|
||||
### Dog controller — `controllers/shepherd_dog/{shepherd_dog.py,strombom.py}`
|
||||
- Strömbom collect/drive heuristic. CoM-radius gating
|
||||
`radius > F·√n` with F=2 selects collect (push furthest sheep inward) vs
|
||||
drive (push CoM toward the pen entry point at (11.5, −8.0)).
|
||||
- Deadzone rescue: when a sheep is below the gate line and outside the
|
||||
pen's x-corridor, the dog repositions to a "behind the sheep, opposite
|
||||
the pen" stand-off so the sheep's flee vector points back through the
|
||||
gate. Variants 0/1 alternate lateral offset to break corner cycles.
|
||||
- Stuck-rescue, EMA action smoothing, target-deadband, RESCUE_SPEED_CAP,
|
||||
cooldown — all empirical fixes for diff-drive oscillation.
|
||||
- Logs full per-step debug to `dog_behavior_log.csv` (currently 7 MB —
|
||||
add to `.gitignore`).
|
||||
|
||||
### Deleted training scaffolding (per `git status`)
|
||||
- `controllers/shepherd_dog_rl/{shepherd_dog_rl.py, final_model.zip, vecnorm.pkl, plot_debug.py}`
|
||||
- `training/{config.json, herding_env.py, parity_test.py, requirements.txt, train.py, train_at.py, viz.py, runs/.gitkeep}`
|
||||
|
||||
A previous attempt existed; we'll redesign rather than resurrect, keeping
|
||||
only the lessons (parity-tested env, VecNormalize wrapper, eval cadence).
|
||||
|
||||
---
|
||||
|
||||
## 2. Design decisions
|
||||
|
||||
### 2.1 Pen location — keep inside-field with N gate
|
||||
The user offered moving the pen *external* (through a wall hole). Tradeoffs:
|
||||
|
||||
| Option | Pros | Cons |
|
||||
|---|---|---|
|
||||
| **(A) Keep inside-field** (current) | World already built; Strömbom logic already tuned; gate corridor is short | Dog must navigate around three pen walls; adds geometric clutter |
|
||||
| (B) External pen via wall hole | Cleaner field — dog only sees sheep + outer walls; pen as goal region beyond a 3 m hole at y=−15 | Requires editing `field.wbt` (split south wall, add external pen walls beyond y<−15); existing rescue/deadzone logic must be retuned; outside-field flocking constants don't currently apply |
|
||||
|
||||
**Recommendation: keep (A)** for parity with the working Strömbom controller,
|
||||
but add a **simplification**: widen the pen entrance from 3 m (x ∈ [10, 13])
|
||||
to 4 m (x ∈ [9.5, 13.5]) and raise the entrance line from y=−8 to y=−7.5
|
||||
to give the dog more turning room. Optional later: gate B as a curriculum
|
||||
extension (Section 7).
|
||||
|
||||
### 2.2 Where to train
|
||||
|
||||
PPO on Webots directly is too slow (real-time stepping, single env, slow
|
||||
reset). The previous training scaffolding used a Python 2D sim — that is
|
||||
the right approach. Constraints for sim-to-sim transfer:
|
||||
|
||||
1. **Use the exact same flocking math**: import `controllers/sheep/flocking.py`
|
||||
from the env, do not reimplement.
|
||||
2. **Use the same world constants**: import `controllers/shepherd_dog/strombom.py`
|
||||
for pen geometry and Strömbom baseline.
|
||||
3. **Model differential drive faithfully**: match wheel-radius, base, and
|
||||
max wheel-velocity from the proto files. Heading update from
|
||||
`(ω_R − ω_L)·r / b`, position from `(ω_R + ω_L)·r / 2`.
|
||||
4. **Match Webots step**: `basicTimeStep = 16 ms`. The sheep controller runs
|
||||
at every basic step; the env will use the same `dt = 0.016 s`.
|
||||
5. **Lidar deferred**: dog policy will use a *symbolic* observation
|
||||
(positions of dog + sheep, plus pen geometry) — not raw lidar — for the
|
||||
first iteration. Lidar-from-pixels is a much harder learning problem
|
||||
and isn't required for the herding task. (See Section 7 for an
|
||||
optional later upgrade.)
|
||||
|
||||
### 2.3 Action space for the dog
|
||||
|
||||
Two viable choices:
|
||||
|
||||
- **(a) High-level velocity vector** `(vx, vy) ∈ [−1, 1]²`. The same
|
||||
representation Strömbom emits today; the existing
|
||||
`drive_action(vx, vy, ...)` function in `shepherd_dog.py` converts this
|
||||
to wheel speeds. Decouples the policy from low-level diff-drive
|
||||
oscillations and enables direct A/B against Strömbom.
|
||||
- (b) Direct wheel speeds `(ω_L, ω_R) ∈ [−1, 1]²`. More expressive but the
|
||||
policy must learn diff-drive control from scratch — which is exactly
|
||||
the source of the wall-stuck and oscillation pain we're trying to
|
||||
avoid.
|
||||
|
||||
**Recommendation: (a)** — high-level `(vx, vy)`. Reuses the well-tuned
|
||||
`drive_action` controller, which already handles `cos(err)` clamping and
|
||||
turn gain. RL focuses on *strategy*, not actuation.
|
||||
|
||||
### 2.4 Observation space for the dog
|
||||
|
||||
Symbolic, fixed-size, normalized to [−1, 1]:
|
||||
|
||||
| Field | Dim | Notes |
|
||||
|---|---|---|
|
||||
| Dog (x, y, cos h, sin h) | 4 | Position normalized by 15 |
|
||||
| Sheep CoM (x, y) | 2 | Of *active* (not-penned) sheep |
|
||||
| Sheep dispersion (radius, std-x, std-y) | 3 | Strömbom collect-vs-drive features |
|
||||
| Vector dog→CoM (dx, dy, dist) | 3 | Helps the value function |
|
||||
| Vector dog→pen-entry (dx, dy, dist) | 3 | |
|
||||
| Vector furthest-sheep→CoM (dx, dy) | 2 | Strömbom collect target hint |
|
||||
| Min sheep-to-wall distance + min dog-to-wall | 2 | Safety signal |
|
||||
| Active sheep count / N_max | 1 | |
|
||||
| 8-bin polar histogram of sheep around dog | 8 | Order-invariant flock shape |
|
||||
|
||||
Total: **28 features**. Order-invariant by construction (histogram + summary
|
||||
stats), so the policy generalizes across flock sizes 1..N_max.
|
||||
|
||||
### 2.5 Reward
|
||||
|
||||
Sparse-only is too hard at flock scale; we shape conservatively.
|
||||
|
||||
```
|
||||
r_t = w_pen · ΔN_penned # +1 per newly penned sheep
|
||||
+ w_progress· (d_CoM_pen[t-1] − d_CoM_pen[t]) # closer-to-pen progress
|
||||
+ w_compact· (R[t-1] − R[t]) # tighter flock progress
|
||||
− w_time · 1 # constant time penalty
|
||||
− w_wall · I(min_wall_dist < 1.0 m) # dog too close to wall
|
||||
− w_collide· I(dog within 0.3 m of any sheep) # avoid contact
|
||||
+ w_done · I(all sheep penned) # terminal bonus
|
||||
```
|
||||
|
||||
Initial weights: `w_pen=2.0, w_progress=0.5, w_compact=0.2, w_time=0.005,
|
||||
w_wall=0.01, w_collide=0.05, w_done=10.0`. Tune via 1-sheep curriculum
|
||||
first — if the dog learns 1-sheep cleanly, the weights are sane.
|
||||
|
||||
### 2.6 Episode
|
||||
|
||||
- Max steps: 3000 (≈ 48 s at dt=16 ms — generous).
|
||||
- Termination: all sheep penned (success), dog/sheep stuck > 600 steps with
|
||||
no progress (failure), step limit (timeout).
|
||||
- Reset: domain-randomized — sheep count ∈ {1..N_max}, sheep positions
|
||||
uniform in field minus pen+gate corridor, dog at origin ± U(−2, 2).
|
||||
|
||||
### 2.7 Curriculum
|
||||
|
||||
| Stage | N_sheep | Duration (steps) | Pass criterion |
|
||||
|---|---|---|---|
|
||||
| 0 | 1 | 0.5 M | success ≥ 90 % |
|
||||
| 1 | 2 | 1.0 M | success ≥ 80 % |
|
||||
| 2 | 3 | 1.5 M | success ≥ 70 % |
|
||||
| 3 | 1..3 mixed | 2.0 M | mean reward stable |
|
||||
| 4 (optional) | 5 | 2.0 M | success ≥ 60 % |
|
||||
|
||||
Implemented by changing only `n_sheep` in the env reset.
|
||||
|
||||
---
|
||||
|
||||
## 3. Repository layout (new)
|
||||
|
||||
```
|
||||
project/
|
||||
├── controllers/
|
||||
│ ├── sheep/ # unchanged
|
||||
│ ├── shepherd_dog/ # Strömbom controller (renamed entry)
|
||||
│ │ ├── shepherd_dog.py # mode-switch wrapper: RL | strombom
|
||||
│ │ ├── strombom.py # unchanged (canonical Strömbom)
|
||||
│ │ └── policy_loader.py # NEW: loads SB3 zip + VecNormalize
|
||||
│ └── ...
|
||||
├── herding/ # NEW: Python package, importable from env + controller
|
||||
│ ├── __init__.py
|
||||
│ ├── geometry.py # field/pen constants, in_pen(), wall helpers (single source of truth)
|
||||
│ ├── flocking_sim.py # vectorised numpy port of flocking.py for fast batched sheep
|
||||
│ ├── diffdrive.py # diff-drive integrator matching the proto specs
|
||||
│ └── obs.py # observation builder shared by env and Webots controller
|
||||
├── training/ # NEW
|
||||
│ ├── herding_env.py # gymnasium.Env, single-agent (the dog)
|
||||
│ ├── parity_test.py # asserts env trajectory ≈ Webots trajectory for fixed seeds
|
||||
│ ├── train_ppo.py # SB3 PPO entry point
|
||||
│ ├── eval.py # rollout + metrics (success rate, time-to-pen)
|
||||
│ ├── configs/
|
||||
│ │ ├── ppo_default.yaml
|
||||
│ │ └── curriculum.yaml
|
||||
│ ├── runs/ # tensorboard + checkpoints (.gitignored)
|
||||
│ └── requirements.txt
|
||||
├── docs/
|
||||
│ └── project.md # unchanged
|
||||
├── plan.md # this file
|
||||
└── ...
|
||||
```
|
||||
|
||||
`herding/` becomes the **single source of truth** for geometry and dynamics.
|
||||
The Webots controllers and the training env both import from it, so when a
|
||||
constant changes in one place it changes everywhere — eliminating the
|
||||
sim/Webots-drift class of bugs.
|
||||
|
||||
This means the existing `controllers/sheep/flocking.py` and
|
||||
`controllers/shepherd_dog/strombom.py` become thin shims that re-export
|
||||
from `herding/`. Webots controllers can import `herding/` because Webots
|
||||
adds the project root to `sys.path` at controller startup; we'll verify.
|
||||
|
||||
---
|
||||
|
||||
## 4. The Gymnasium environment — `training/herding_env.py`
|
||||
|
||||
```python
|
||||
class HerdingEnv(gymnasium.Env):
|
||||
metadata = {"render_modes": ["rgb_array", "human"]}
|
||||
|
||||
def __init__(self, n_sheep=3, max_steps=3000, dt=0.016, seed=None):
|
||||
self.action_space = Box(low=-1, high=1, shape=(2,), dtype=np.float32)
|
||||
self.observation_space = Box(low=-1, high=1, shape=(28,), dtype=np.float32)
|
||||
...
|
||||
|
||||
def reset(self, *, seed=None, options=None):
|
||||
# Random sheep positions in field \ pen corridor, dog near origin.
|
||||
# Optional curriculum: options["n_sheep"] overrides.
|
||||
...
|
||||
|
||||
def step(self, action):
|
||||
vx, vy = action # high-level velocity intent
|
||||
# Convert to wheel speeds via the same drive_action inverse used in Webots
|
||||
wL, wR = self._diffdrive_inverse(vx, vy, self.dog_state)
|
||||
self.dog_state = self._integrate_diffdrive(self.dog_state, wL, wR, self.dt)
|
||||
# Step every sheep one boid step (vectorized in flocking_sim.py)
|
||||
self.sheep_state = self._step_sheep(self.sheep_state, self.dog_state)
|
||||
# Update penned set, compute reward, observation, done flags
|
||||
...
|
||||
```
|
||||
|
||||
Key points:
|
||||
- **Vectorised sheep update**: re-implements `flocking.py` in numpy so 100
|
||||
parallel envs with 5 sheep each take ms, not seconds. Numerical parity
|
||||
with the scalar version is asserted in `parity_test.py`.
|
||||
- **Same diff-drive integrator** for the dog as Webots will see at
|
||||
inference. Wall + pen-fence collisions clamp position (a Webots-realistic
|
||||
no-pass-through approximation).
|
||||
- **Domain randomization** in reset: sheep count, spawn positions, sheep
|
||||
flock-parameter jitter (±10 % on FLEE_DIST, COHESION_DIST, etc.) for
|
||||
robustness.
|
||||
|
||||
---
|
||||
|
||||
## 5. Training pipeline — `training/train_ppo.py`
|
||||
|
||||
- **Algorithm**: SB3 `PPO` with `MlpPolicy`, `n_steps=2048`, `batch_size=256`,
|
||||
`n_epochs=10`, `gamma=0.995`, `gae_lambda=0.95`, `clip_range=0.2`,
|
||||
`ent_coef=0.005`, `vf_coef=0.5`, `learning_rate=3e-4`.
|
||||
- **Vec envs**: `SubprocVecEnv` × 16 parallel envs (the env is pure numpy
|
||||
so subprocs are CPU-cheap).
|
||||
- **Normalization**: `VecNormalize(norm_obs=True, norm_reward=True,
|
||||
clip_obs=10.0)`. Pickled alongside the policy zip — both required at
|
||||
inference.
|
||||
- **Callbacks**:
|
||||
- `CheckpointCallback` every 100 k steps.
|
||||
- `EvalCallback` on a separate eval env (no normalization-update) every
|
||||
50 k steps; logs success rate and time-to-pen to TensorBoard.
|
||||
- Custom `CurriculumCallback`: bumps `n_sheep` when eval success rate
|
||||
crosses the stage threshold for 3 consecutive evals.
|
||||
- **Determinism for debugging**: seed-pinned eval env so regressions are
|
||||
catchable.
|
||||
|
||||
---
|
||||
|
||||
## 6. Webots integration — RL inference path
|
||||
|
||||
`controllers/shepherd_dog/shepherd_dog.py` becomes a thin wrapper:
|
||||
|
||||
```python
|
||||
MODE = os.environ.get("HERDING_MODE", "rl") # "rl" | "strombom"
|
||||
|
||||
if MODE == "rl":
|
||||
policy = policy_loader.load("training/runs/best/policy.zip",
|
||||
"training/runs/best/vecnormalize.pkl")
|
||||
obs_fn = build_obs # from herding/obs.py
|
||||
else:
|
||||
obs_fn = None # strombom path uses sheep_positions directly
|
||||
|
||||
while robot.step(timestep) != -1:
|
||||
receive_messages()
|
||||
if MODE == "rl":
|
||||
obs = obs_fn(dog_xy, dog_heading, sheep_positions, ...)
|
||||
action, _ = policy.predict(obs, deterministic=True)
|
||||
vx, vy = action.tolist()
|
||||
else:
|
||||
vx, vy, mode, dbg = compute_action_debug(dog_xy, sheep_positions, PEN_ENTRY)
|
||||
# plus existing rescue/cooldown/EMA layer
|
||||
drive_action(vx, vy, ...)
|
||||
```
|
||||
|
||||
A **safety supervisor** wraps the RL output: if `obs` indicates the dog is
|
||||
< 0.6 m from a wall, override with the existing wall-escape behavior
|
||||
(reverse + turn). This is a hard guarantee diff-drive needs because PPO
|
||||
may not discover wall-escape reliably from on-policy data.
|
||||
|
||||
`policy_loader.py` handles the SB3 import lazily so the controller still
|
||||
works with `MODE=strombom` even if SB3 is not installed in the Webots
|
||||
Python environment.
|
||||
|
||||
---
|
||||
|
||||
## 7. Optional extensions (post-baseline)
|
||||
|
||||
- **External pen** (Section 2.1 option B): edit `field.wbt` to extend the
|
||||
south wall hole into an external L-shaped pen with its own walls; update
|
||||
`herding/geometry.py`; retrain stage 3 only.
|
||||
- **Lidar observation**: replace symbolic obs with 36-bin downsampled
|
||||
lidar + ego state; train end-to-end. Useful as the "extra merit"
|
||||
dimension in the project doc.
|
||||
- **Two-dog mode**: make env multi-agent, train with `MAPPO`-style shared
|
||||
critic or independent PPO. The proto already supports multiple dog
|
||||
instances; world only needs a second `ShepherdDog` node.
|
||||
- **Mecanum comparison**: swap the dog proto for a mecanum variant; same
|
||||
policy, different `_integrate_diffdrive` (becomes holonomic).
|
||||
- **Sheep flock size scaling**: 5, 10, 20 — the obs is order-invariant so
|
||||
the same policy generalises; just curriculum further.
|
||||
|
||||
---
|
||||
|
||||
## 8. Risks & mitigations
|
||||
|
||||
| Risk | Mitigation |
|
||||
|---|---|
|
||||
| Sim-to-Webots gap (sheep dynamics, wall friction) | `parity_test.py` asserts trajectory match within tolerance for fixed seeds; if it fails, fix the env, not the policy |
|
||||
| Dog learns to wall-pin sheep against fence | Add `w_collide` penalty + min-sheep-to-wall term in obs; curriculum from 1 sheep first |
|
||||
| PPO oscillation collapses into spinning | Action smoothing in env step (EMA on `(vx, vy)`, mirroring `ACTION_SMOOTH=0.35` from Strömbom controller); reward small `‖a_t − a_{t-1}‖` penalty |
|
||||
| Pen approach failures (sheep refuse gate) | Reuse the existing `deadzone_rescue` as a *scripted fallback* triggered when a sheep has been deadzoned > 200 steps — RL handles the common case, scripted handles the corner |
|
||||
| Gym version mismatch (gymnasium vs gym) | Lock to `gymnasium>=0.29`, `stable-baselines3>=2.3` in requirements |
|
||||
|
||||
---
|
||||
|
||||
## 9. Milestones (suggested order of implementation)
|
||||
|
||||
1. **M0 — Refactor** (no behavior change): create `herding/` package, move
|
||||
constants out of `flocking.py`/`strombom.py`, leave shims; verify
|
||||
Webots still runs Strömbom unchanged. Add `dog_behavior_log.csv` to
|
||||
`.gitignore`.
|
||||
2. **M1 — Env & parity**: `herding_env.py`, `parity_test.py`. Asserts
|
||||
sheep + dog trajectories match Webots within tolerance for 5 fixed
|
||||
seeds. *Done when parity test green.*
|
||||
3. **M2 — PPO baseline**: train Stage 0 (1 sheep) for 0.5 M steps; eval
|
||||
in env at ≥ 90 % success.
|
||||
4. **M3 — Webots inference**: load Stage 0 policy in `shepherd_dog.py`
|
||||
with `HERDING_MODE=rl`; verify the dog herds 1 sheep into the pen in
|
||||
the actual Webots world. *This is the sim-to-sim transfer gate.*
|
||||
5. **M4 — Curriculum**: stages 1–3, ~5 M steps total, with checkpoints
|
||||
and eval logs.
|
||||
6. **M5 — Strömbom comparison**: run both controllers on a fixed eval
|
||||
suite (same seeds, 1/2/3 sheep), log success rate and time-to-pen.
|
||||
This is a deliverable for the project's "quantitative evaluation"
|
||||
goal.
|
||||
7. **M6 — Documentation**: a short README in `training/` showing how to
|
||||
train, evaluate, and switch modes in Webots.
|
||||
|
||||
Each milestone is independently demoable. M0–M3 is the critical path to
|
||||
"RL works in Webots"; M4–M6 polishes it for the project deliverable.
|
||||
|
||||
---
|
||||
|
||||
## 10. Decisions (locked in by implementation)
|
||||
|
||||
- **Pen layout**: option B (external pen). The pen sits south of the
|
||||
field at x ∈ [10, 13], y ∈ [-22, -15] and is reached through the
|
||||
existing 3 m gap in the south stone wall. The old in-field
|
||||
quarantine fence is gone and the wooden gate is modeled as
|
||||
swung-open and parked on the west gate post so the corridor is
|
||||
unobstructed. This kills the deadzone class entirely.
|
||||
- **Flock size**: 1..10 sheep, sampled uniformly each reset. The order-
|
||||
invariant observation (CoM, dispersion, polar histogram) lets a
|
||||
single policy generalise across the whole range. A curriculum widens
|
||||
``max_n_sheep`` from 1 to 10 over training to keep early exploration
|
||||
tractable.
|
||||
- **Single-sheep mode**: handled by the same policy (n_sheep=1 is the
|
||||
first stage of the curriculum and stays in the training distribution
|
||||
throughout). No separate model.
|
||||
- **Hardware**: GPU for training. SubprocVecEnv × 16 on CPU feeds an
|
||||
MlpPolicy on GPU; ~2–3 h for the full curriculum.
|
||||
|
||||
## 11. What was built
|
||||
|
||||
```
|
||||
herding/ # single source of truth, importable from both
|
||||
geometry.py # field/pen constants, latch helpers, robot specs
|
||||
flocking_sim.py # Reynolds boid step (matches Webots controller)
|
||||
diffdrive.py # diff-drive kinematics + velocity↔wheels
|
||||
obs.py # 28-D order-invariant observation builder
|
||||
strombom.py # collect/drive heuristic (baseline + fallback)
|
||||
|
||||
worlds/field.wbt # external pen south of field, 10 sheep slots,
|
||||
# gate parked open, in-field fence removed
|
||||
|
||||
controllers/sheep/sheep.py # imports from herding/, latches on
|
||||
# is_penned_position
|
||||
controllers/shepherd_dog/
|
||||
shepherd_dog.py # mode switch (HERDING_MODE=rl|strombom),
|
||||
# safety supervisor for DOG_SOUTH_LIMIT
|
||||
policy_loader.py # lazy SB3 zip + VecNormalize loader
|
||||
strombom.py # shim re-exporting herding.strombom
|
||||
|
||||
training/
|
||||
herding_env.py # gymnasium.Env, action smoothing, reward shaping
|
||||
train_ppo.py # SB3 PPO with VecNormalize, eval, checkpoints,
|
||||
# curriculum callback
|
||||
eval.py # success-rate / time-to-pen across n_sheep
|
||||
parity_test.py # shape, determinism, baseline-rollout smoke test
|
||||
configs/ppo_default.yaml
|
||||
requirements.txt
|
||||
README.md # how to train, evaluate, switch modes in Webots
|
||||
```
|
||||
|
||||
## 12. To run
|
||||
|
||||
```bash
|
||||
# 1. Install deps (CUDA-enabled torch wheel for GPU)
|
||||
pip install -r training/requirements.txt
|
||||
|
||||
# 2. Smoke test
|
||||
python -m training.parity_test
|
||||
|
||||
# 3. Train (5 M steps, ~2–3 h on a single GPU)
|
||||
python -m training.train_ppo --out-dir training/runs/baseline
|
||||
|
||||
# 4. Evaluate vs Strömbom
|
||||
python -m training.eval --policy training/runs/baseline/best
|
||||
python -m training.eval --policy strombom
|
||||
|
||||
# 5. Run in Webots
|
||||
export HERDING_MODE=rl
|
||||
export HERDING_POLICY_DIR=$PWD/training/runs/baseline/best
|
||||
webots worlds/field.wbt
|
||||
```
|
||||
@@ -0,0 +1,117 @@
|
||||
"""Collect (obs, action) demonstrations from the sequential teacher.
|
||||
|
||||
Runs the sequential algorithm across a grid of (n_sheep, seed) combos
|
||||
at full difficulty, logs the (observation, action) pair every Nth step,
|
||||
and saves successful trajectories to a numpy ``.npz`` for behavior
|
||||
cloning. Failed trajectories are dropped by default — we only want to
|
||||
teach the policy from good examples.
|
||||
|
||||
Usage::
|
||||
|
||||
python -m tools.collect_demos --out training/demos.npz
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from herding.geometry import PEN_ENTRY
|
||||
from herding.sequential import compute_action
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int):
|
||||
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
||||
difficulty=1.0, seed=seed)
|
||||
obs, _ = env.reset(seed=seed)
|
||||
obs_list, action_list = [], []
|
||||
for step in range(max_steps):
|
||||
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
|
||||
for i in range(env.n_sheep) if not env.sheep_penned[i]}
|
||||
if not positions:
|
||||
break
|
||||
vx, vy, _mode = compute_action(
|
||||
(env.dog_x, env.dog_y), positions, PEN_ENTRY,
|
||||
)
|
||||
action = np.array([vx, vy], dtype=np.float32)
|
||||
if step % subsample == 0:
|
||||
obs_list.append(obs.copy())
|
||||
action_list.append(action.copy())
|
||||
obs, _r, term, trunc, _info = env.step(action)
|
||||
if term or trunc:
|
||||
break
|
||||
success = bool(env.sheep_penned.all())
|
||||
return (
|
||||
np.asarray(obs_list, dtype=np.float32),
|
||||
np.asarray(action_list, dtype=np.float32),
|
||||
success,
|
||||
env.steps,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--out", default="training/demos.npz")
|
||||
parser.add_argument("--n-sheep-list", default="1,2,3,5,8,10")
|
||||
parser.add_argument("--seeds-per-n", type=int, default=15)
|
||||
parser.add_argument("--max-steps", type=int, default=30000)
|
||||
parser.add_argument("--subsample", type=int, default=5,
|
||||
help="Keep every Nth (obs, action) pair.")
|
||||
parser.add_argument("--keep-failures", action="store_true",
|
||||
help="Include partial-success trajectories. Default off.")
|
||||
args = parser.parse_args()
|
||||
|
||||
n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
|
||||
print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
|
||||
f"max_steps={args.max_steps}, subsample={args.subsample}")
|
||||
|
||||
all_obs, all_actions, all_meta = [], [], []
|
||||
t_start = time.time()
|
||||
n_success = 0; n_total = 0
|
||||
|
||||
for n in n_sheep_list:
|
||||
for seed in range(args.seeds_per_n):
|
||||
obs, actions, success, total_steps = collect_one(
|
||||
n, seed, args.max_steps, args.subsample,
|
||||
)
|
||||
n_total += 1
|
||||
if success:
|
||||
n_success += 1
|
||||
keep = success or args.keep_failures
|
||||
if keep and len(obs) > 0:
|
||||
all_obs.append(obs)
|
||||
all_actions.append(actions)
|
||||
all_meta.append((n, seed, len(obs), int(success), total_steps))
|
||||
tag = "✓" if success else "✗"
|
||||
print(f" [{tag}] n={n:>2d} seed={seed:>2d} steps={total_steps:>6d} "
|
||||
f"logged={len(obs):>5d}")
|
||||
|
||||
if not all_obs:
|
||||
raise RuntimeError("No trajectories kept — try --keep-failures.")
|
||||
|
||||
obs = np.concatenate(all_obs, axis=0)
|
||||
actions = np.concatenate(all_actions, axis=0)
|
||||
meta = np.array(all_meta, dtype=np.int32)
|
||||
|
||||
Path(args.out).parent.mkdir(parents=True, exist_ok=True)
|
||||
np.savez(args.out, obs=obs, actions=actions, meta=meta)
|
||||
|
||||
elapsed = time.time() - t_start
|
||||
print(f"\n=== {n_success}/{n_total} trajectories successful ({100*n_success/n_total:.0f}%) ===")
|
||||
print(f"=== {len(obs)} transitions saved to {args.out} ===")
|
||||
print(f"=== obs={obs.shape}, actions={actions.shape}, elapsed={elapsed:.0f}s ===")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Executable
+63
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
# Launch Webots with N sheep enabled and the chosen controller mode.
|
||||
# Generates a temporary world file in worlds/field_test.wbt with sheep
|
||||
# beyond N commented out, sets the env vars the dog controller reads,
|
||||
# then execs Webots on it.
|
||||
#
|
||||
# Usage:
|
||||
# tools/run_webots.sh [N] [MODE]
|
||||
# N : number of active sheep (1..10), default 10
|
||||
# MODE : "rl" | "strombom" | "sequential", default "rl"
|
||||
#
|
||||
# Examples:
|
||||
# tools/run_webots.sh 10 rl # BC-trained RL policy, 10 sheep
|
||||
# tools/run_webots.sh 5 sequential # the analytic teacher, 5 sheep
|
||||
# tools/run_webots.sh 3 strombom # canonical baseline, 3 sheep
|
||||
#
|
||||
# Notes:
|
||||
# * The RL mode loads training/runs/bc_pretrained/policy.zip by default.
|
||||
# Override via HERDING_POLICY_DIR=/path/to/run env var.
|
||||
# * Conda env "tir" must be active (provides stable-baselines3 + torch).
|
||||
|
||||
set -e
|
||||
N=${1:-10}
|
||||
MODE=${2:-rl}
|
||||
|
||||
if (( N < 1 || N > 10 )); then
|
||||
echo "N must be 1..10, got $N" >&2; exit 1
|
||||
fi
|
||||
case "$MODE" in
|
||||
rl|strombom|sequential) ;;
|
||||
*) echo "MODE must be rl|strombom|sequential, got '$MODE'" >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||
SRC="$ROOT/worlds/field.wbt"
|
||||
DST="$ROOT/worlds/field_test.wbt"
|
||||
|
||||
cp "$SRC" "$DST"
|
||||
# Comment out sheep N+1..10 by prefixing the matching Sheep { ... } line.
|
||||
for i in $(seq $((N+1)) 10); do
|
||||
sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST"
|
||||
done
|
||||
|
||||
active=$(grep -c '^Sheep' "$DST")
|
||||
echo "------------------------------------------------------------"
|
||||
echo "World : $DST"
|
||||
echo "Mode : $MODE"
|
||||
echo "Sheep : $active active"
|
||||
echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_pretrained}"
|
||||
echo "------------------------------------------------------------"
|
||||
|
||||
# Webots strips HERDING_* env vars from controller subprocesses in some
|
||||
# setups, so we also write a runtime config file the controller reads.
|
||||
RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_pretrained}"
|
||||
cat > "$ROOT/herding_runtime.cfg" <<EOF
|
||||
HERDING_MODE=$MODE
|
||||
HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
|
||||
EOF
|
||||
|
||||
export HERDING_MODE="$MODE"
|
||||
export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
|
||||
|
||||
exec webots "$DST"
|
||||
@@ -0,0 +1,115 @@
|
||||
# Shepherd Herding — Training & Inference
|
||||
|
||||
This directory holds the Gymnasium environment, PPO training script, and
|
||||
evaluation harness for the RL shepherd-dog policy. The Webots controller
|
||||
in `controllers/shepherd_dog/` loads the resulting policy at inference
|
||||
time when launched with `HERDING_MODE=rl`.
|
||||
|
||||
## Layout
|
||||
|
||||
```
|
||||
training/
|
||||
├── herding_env.py # gymnasium.Env — the dog is the agent
|
||||
├── train_ppo.py # SB3 PPO entry point (vec envs, eval, curriculum)
|
||||
├── eval.py # rollout success-rate / time-to-pen across flock sizes
|
||||
├── parity_test.py # smoke test: shapes, determinism, baseline rollout
|
||||
├── configs/ppo_default.yaml
|
||||
├── runs/ # tensorboard + checkpoints (gitignored)
|
||||
└── requirements.txt
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
python -m venv .venv && source .venv/bin/activate
|
||||
pip install -r training/requirements.txt
|
||||
```
|
||||
|
||||
CPU is the default and also the recommended device — SB3's PPO with an
|
||||
MLP policy of this size runs faster on CPU than on GPU because the
|
||||
bottleneck is rollout collection, not gradient compute. The 16 SubprocVecEnv
|
||||
workers saturate ~16 CPU cores. To force CUDA anyway, pass `--device cuda`.
|
||||
|
||||
## Train
|
||||
|
||||
```bash
|
||||
# Full curriculum (1 → 10 sheep), ~5M steps, ~2–3h on a single GPU.
|
||||
python -m training.train_ppo \
|
||||
--config training/configs/ppo_default.yaml \
|
||||
--out-dir training/runs/baseline
|
||||
```
|
||||
|
||||
Outputs:
|
||||
- `training/runs/baseline/best/best_model.zip` — best eval checkpoint
|
||||
- `training/runs/baseline/best/vecnormalize.pkl` — observation stats
|
||||
- `training/runs/baseline/checkpoints/ppo_*.zip` — periodic checkpoints
|
||||
- `training/runs/baseline/tb/` — TensorBoard logs (`tensorboard --logdir`)
|
||||
|
||||
To resume:
|
||||
|
||||
```bash
|
||||
python -m training.train_ppo --resume training/runs/baseline/checkpoints/ppo_500000_steps.zip
|
||||
```
|
||||
|
||||
## Evaluate
|
||||
|
||||
```bash
|
||||
# RL policy
|
||||
python -m training.eval --policy training/runs/baseline/best
|
||||
|
||||
# Strömbom baseline
|
||||
python -m training.eval --policy strombom
|
||||
```
|
||||
|
||||
Prints success rate, mean steps, and mean penned-count per flock size.
|
||||
Use the same `--n-seeds` for both to get a fair RL-vs-Strömbom A/B.
|
||||
|
||||
## Parity / smoke test
|
||||
|
||||
```bash
|
||||
python -m training.parity_test
|
||||
```
|
||||
|
||||
Checks observation/action shapes, deterministic seeding, the curriculum
|
||||
sampler, and a 400-step Strömbom rollout. Run this before every long
|
||||
training job — catches the boring class of bugs in seconds.
|
||||
|
||||
## Run the policy in Webots
|
||||
|
||||
1. Train (above) — produces `training/runs/<name>/best/`.
|
||||
2. In Webots, set the dog controller's environment variables:
|
||||
|
||||
```bash
|
||||
export HERDING_MODE=rl
|
||||
export HERDING_POLICY_DIR=$(pwd)/training/runs/baseline/best
|
||||
webots worlds/field.wbt
|
||||
```
|
||||
|
||||
Or set them via Webots' controller args / a `.wbproj` if you prefer.
|
||||
|
||||
3. To force the Strömbom baseline (same world, same controller):
|
||||
|
||||
```bash
|
||||
export HERDING_MODE=strombom
|
||||
webots worlds/field.wbt
|
||||
```
|
||||
|
||||
If `HERDING_MODE=rl` but the policy can't be loaded (SB3 not installed,
|
||||
zip missing, etc.), the controller logs the error and falls back to
|
||||
Strömbom automatically.
|
||||
|
||||
## Curriculum knobs
|
||||
|
||||
The default schedule in `configs/ppo_default.yaml` widens
|
||||
`max_n_sheep` over training. Each reset samples `n_sheep ~ U[1,
|
||||
max_n_sheep]`, so the final policy has seen every flock size from 1 to
|
||||
10 in proportion. To pin a specific size, instantiate the env with
|
||||
`HerdingEnv(n_sheep=N)` (see `eval.py`).
|
||||
|
||||
## Reward shaping
|
||||
|
||||
Weights live in class attributes on `HerdingEnv`. Tune from the 1-sheep
|
||||
curriculum first — if the dog can't herd a single sheep cleanly, raising
|
||||
`W_PROGRESS` or lowering `W_TIME` is usually the fix. For multi-sheep
|
||||
collapse modes (dog spins between sheep), increase `W_COMPACT` so
|
||||
tightening the flock pays.
|
||||
@@ -0,0 +1,218 @@
|
||||
"""Behavior cloning of the sequential teacher into an SB3-compatible policy.
|
||||
|
||||
Trains the policy network (mean-action head) of an SB3 ``MlpPolicy`` to
|
||||
mimic the demonstrations collected by ``tools.collect_demos``. The
|
||||
saved zip is loadable via ``PPO.load(...)`` and can be passed to
|
||||
``train_ppo.py --resume`` for fine-tuning.
|
||||
|
||||
Why this works: the teacher (sequential single-target driving) solves
|
||||
n=10 at 80%+ in our env. BC gives the RL a competent starting policy,
|
||||
so PPO doesn't have to discover behavior from scratch — it only has to
|
||||
*refine* the teacher's strategy via the sparse pen reward.
|
||||
|
||||
Usage::
|
||||
|
||||
python -m training.bc_pretrain \\
|
||||
--demos training/demos.npz \\
|
||||
--out training/runs/bc_pretrained
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
def build_model(net_arch_pi, net_arch_vf, log_std_init: float):
|
||||
"""Build a fresh SB3 PPO with the same architecture as train_ppo.
|
||||
|
||||
We only need the policy to load weights into; PPO's training-loop
|
||||
plumbing isn't used during BC.
|
||||
"""
|
||||
env = DummyVecEnv([lambda: HerdingEnv()])
|
||||
model = PPO(
|
||||
"MlpPolicy", env,
|
||||
policy_kwargs=dict(
|
||||
net_arch=dict(pi=net_arch_pi, vf=net_arch_vf),
|
||||
log_std_init=log_std_init,
|
||||
),
|
||||
verbose=0,
|
||||
)
|
||||
return model, env
|
||||
|
||||
|
||||
def policy_forward_mean(policy, obs_batch):
|
||||
"""Return the policy's deterministic mean action for a batch.
|
||||
|
||||
SB3's ActorCriticPolicy doesn't expose this directly — it goes
|
||||
through a Distribution wrapper. We replicate the forward path:
|
||||
extract_features → mlp_extractor → action_net.
|
||||
"""
|
||||
features = policy.extract_features(obs_batch)
|
||||
if isinstance(features, tuple):
|
||||
# SB3 ≥ 2.0 sometimes returns (pi_features, vf_features)
|
||||
pi_features = features[0]
|
||||
else:
|
||||
pi_features = features
|
||||
latent_pi, _latent_vf = policy.mlp_extractor(pi_features)
|
||||
return policy.action_net(latent_pi)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--demos", default="training/demos.npz")
|
||||
parser.add_argument("--out", default="training/runs/bc_pretrained")
|
||||
parser.add_argument("--epochs", type=int, default=60)
|
||||
parser.add_argument("--batch-size", type=int, default=256)
|
||||
parser.add_argument("--lr", type=float, default=1e-3)
|
||||
parser.add_argument("--val-split", type=float, default=0.1)
|
||||
parser.add_argument("--net-arch", default="256,256",
|
||||
help="Comma-separated hidden layer widths.")
|
||||
parser.add_argument("--log-std-init", type=float, default=0.5)
|
||||
parser.add_argument("--cos-weight", type=float, default=1.0,
|
||||
help="Weight on (1 - cosine similarity) loss term. "
|
||||
"MSE alone shrinks policy output toward zero "
|
||||
"(zero-magnitude action minimises mean squared "
|
||||
"error against ±1 targets); cos loss keeps "
|
||||
"the action pointed correctly even at small "
|
||||
"magnitudes.")
|
||||
parser.add_argument("--seed", type=int, default=0)
|
||||
parser.add_argument("--device", default="cpu")
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.manual_seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
|
||||
# --- Load demos ---
|
||||
print(f"[bc] loading demos from {args.demos}")
|
||||
data = np.load(args.demos)
|
||||
obs = data["obs"].astype(np.float32)
|
||||
actions = data["actions"].astype(np.float32)
|
||||
meta = data["meta"]
|
||||
print(f"[bc] obs={obs.shape} actions={actions.shape} trajectories={len(meta)}")
|
||||
if obs.size == 0:
|
||||
raise RuntimeError("Empty demo file.")
|
||||
|
||||
# Action sanity check — sequential outputs unit vectors.
|
||||
a_norms = np.linalg.norm(actions, axis=1)
|
||||
print(f"[bc] action L2 norm: mean={a_norms.mean():.3f} "
|
||||
f"min={a_norms.min():.3f} max={a_norms.max():.3f}")
|
||||
|
||||
# --- Train/val split ---
|
||||
n = len(obs)
|
||||
perm = np.random.permutation(n)
|
||||
n_val = int(n * args.val_split)
|
||||
val_idx, train_idx = perm[:n_val], perm[n_val:]
|
||||
print(f"[bc] train={len(train_idx)} val={len(val_idx)}")
|
||||
|
||||
obs_t = torch.from_numpy(obs)
|
||||
act_t = torch.from_numpy(actions)
|
||||
train_loader = DataLoader(
|
||||
TensorDataset(obs_t[train_idx], act_t[train_idx]),
|
||||
batch_size=args.batch_size, shuffle=True,
|
||||
)
|
||||
val_loader = DataLoader(
|
||||
TensorDataset(obs_t[val_idx], act_t[val_idx]),
|
||||
batch_size=args.batch_size, shuffle=False,
|
||||
)
|
||||
|
||||
# --- Build model ---
|
||||
net_arch_pi = [int(x) for x in args.net_arch.split(",")]
|
||||
net_arch_vf = net_arch_pi[:]
|
||||
model, _env = build_model(net_arch_pi, net_arch_vf, args.log_std_init)
|
||||
policy = model.policy.to(args.device)
|
||||
optimizer = optim.Adam(policy.parameters(), lr=args.lr)
|
||||
|
||||
# --- Train ---
|
||||
print(f"[bc] training: epochs={args.epochs} batch={args.batch_size} "
|
||||
f"lr={args.lr} device={args.device}")
|
||||
t_start = time.time()
|
||||
best_val = float("inf")
|
||||
|
||||
def combined_loss(pred, target):
|
||||
mse = nn.functional.mse_loss(pred, target)
|
||||
p_norm = pred.norm(dim=1).clamp_min(1e-6)
|
||||
t_norm = target.norm(dim=1).clamp_min(1e-6)
|
||||
cos_sim = (pred * target).sum(dim=1) / (p_norm * t_norm)
|
||||
cos_loss = (1.0 - cos_sim).mean()
|
||||
return mse + args.cos_weight * cos_loss, mse.item(), cos_sim.mean().item()
|
||||
|
||||
for epoch in range(args.epochs):
|
||||
policy.train()
|
||||
train_loss_total, train_mse_total, train_cos_total, train_count = 0.0, 0.0, 0.0, 0
|
||||
for ob_batch, act_batch in train_loader:
|
||||
ob_batch = ob_batch.to(args.device)
|
||||
act_batch = act_batch.to(args.device)
|
||||
optimizer.zero_grad()
|
||||
mean_action = policy_forward_mean(policy, ob_batch)
|
||||
loss, mse_val, cos_val = combined_loss(mean_action, act_batch)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
bs = ob_batch.size(0)
|
||||
train_loss_total += loss.item() * bs
|
||||
train_mse_total += mse_val * bs
|
||||
train_cos_total += cos_val * bs
|
||||
train_count += bs
|
||||
train_mse = train_mse_total / max(1, train_count)
|
||||
train_cos = train_cos_total / max(1, train_count)
|
||||
|
||||
policy.eval()
|
||||
val_total, val_count = 0.0, 0
|
||||
cos_sim_total = 0.0
|
||||
with torch.no_grad():
|
||||
for ob_batch, act_batch in val_loader:
|
||||
ob_batch = ob_batch.to(args.device)
|
||||
act_batch = act_batch.to(args.device)
|
||||
mean_action = policy_forward_mean(policy, ob_batch)
|
||||
bs = ob_batch.size(0)
|
||||
val_total += nn.functional.mse_loss(
|
||||
mean_action, act_batch, reduction="sum",
|
||||
).item()
|
||||
# Cosine similarity in action space — useful sanity for
|
||||
# "is the policy pointing the same way as the teacher?".
|
||||
m_norm = mean_action.norm(dim=1).clamp_min(1e-6)
|
||||
a_norm = act_batch.norm(dim=1).clamp_min(1e-6)
|
||||
cos = (mean_action * act_batch).sum(dim=1) / (m_norm * a_norm)
|
||||
cos_sim_total += cos.sum().item()
|
||||
val_count += bs
|
||||
val_mse = val_total / max(1, val_count) / actions.shape[1]
|
||||
cos_sim = cos_sim_total / max(1, val_count)
|
||||
print(f" epoch {epoch+1:>2d}/{args.epochs} "
|
||||
f"train_mse={train_mse:.4f} train_cos={train_cos:+.3f} "
|
||||
f"val_mse={val_mse:.4f} val_cos={cos_sim:+.3f}")
|
||||
if val_mse < best_val:
|
||||
best_val = val_mse
|
||||
|
||||
elapsed = time.time() - t_start
|
||||
print(f"[bc] done in {elapsed:.0f}s best_val_mse={best_val:.4f}")
|
||||
|
||||
# --- Save ---
|
||||
out_dir = Path(args.out)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
model.save(out_dir / "policy.zip")
|
||||
print(f"[bc] saved policy to {out_dir / 'policy.zip'}")
|
||||
print(f"\n[bc] verify with: "
|
||||
f"python -m training.eval --policy {out_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"W_PER_SHEEP": 2.0,
|
||||
"W_ALIGN": 0.05,
|
||||
"W_PEN_BONUS": 10.0,
|
||||
"W_COMPLETE": 100.0,
|
||||
"W_STEP_COST": 0.02,
|
||||
"W_COMPACT": 0.0,
|
||||
"W_WALL_TOUCH": 0.0,
|
||||
"WALL_TOUCH_BUFFER": 0.4,
|
||||
"ALIGN_SHAPE": "standoff",
|
||||
"ALIGN_GATED": true,
|
||||
"ENTRY_AWARE": true,
|
||||
"ent_coef": 0.02
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
# PPO hyperparameters for the herding env. Tuned for a 28-D obs / 2-D
|
||||
# continuous action space with 16 parallel envs on GPU. These are SB3
|
||||
# defaults nudged toward longer credit assignment (gamma=0.995) and a
|
||||
# slightly higher entropy bonus to keep exploration alive while curriculum
|
||||
# expands the flock size.
|
||||
|
||||
# --- PPO ---
|
||||
learning_rate: 3.0e-4
|
||||
n_steps: 2048 # rollout length per env before each update
|
||||
batch_size: 256
|
||||
n_epochs: 10
|
||||
gamma: 0.995
|
||||
gae_lambda: 0.95
|
||||
clip_range: 0.2
|
||||
ent_coef: 0.05 # was 0.01 — earlier runs collapsed to ~0 actions
|
||||
vf_coef: 0.5
|
||||
max_grad_norm: 0.5
|
||||
target_kl: null # disable early-stop on KL
|
||||
|
||||
# --- Network ---
|
||||
policy: MlpPolicy
|
||||
net_arch_pi: [128, 128]
|
||||
net_arch_vf: [128, 128]
|
||||
log_std_init: 0.5 # std≈1.6 instead of default 1.0 — more exploration
|
||||
|
||||
# --- Training schedule ---
|
||||
total_timesteps: 10_000_000
|
||||
n_envs: 16
|
||||
checkpoint_freq: 500_000 # in env steps
|
||||
eval_freq: 100_000 # in env steps
|
||||
n_eval_episodes: 20
|
||||
|
||||
# --- Curriculum (max-n_sheep schedule, in env steps) ---
|
||||
# Each entry: at step s, raise the env's max_n_sheep to k. The env samples
|
||||
# uniformly from [1, max_n_sheep] each reset, so this widens the
|
||||
# distribution gradually rather than swapping fixed sizes.
|
||||
#
|
||||
# State-space curriculum: difficulty controls sheep spawn area
|
||||
# (0 = sheep spawn just north of gate, 1 = sheep spawn anywhere in field).
|
||||
# Plus the existing flock-size curriculum.
|
||||
#
|
||||
# The two together let the policy first learn "what penning looks like"
|
||||
# in a regime where random exploration reliably triggers it, then
|
||||
# gradually generalise to the deployment distribution.
|
||||
curriculum:
|
||||
- { step: 0, max_n_sheep: 1, difficulty: 0.0 }
|
||||
- { step: 1_000_000, max_n_sheep: 1, difficulty: 0.3 }
|
||||
- { step: 2_000_000, max_n_sheep: 2, difficulty: 0.5 }
|
||||
- { step: 4_000_000, max_n_sheep: 3, difficulty: 0.8 }
|
||||
- { step: 6_000_000, max_n_sheep: 5, difficulty: 1.0 }
|
||||
- { step: 8_000_000, max_n_sheep: 8, difficulty: 1.0 }
|
||||
- { step: 9_000_000, max_n_sheep: 10, difficulty: 1.0 }
|
||||
Binary file not shown.
@@ -0,0 +1,136 @@
|
||||
"""Evaluate a trained PPO policy (or the Strömbom baseline) on the env.
|
||||
|
||||
Reports success rate and time-to-pen across a fixed seed grid for each
|
||||
flock size 1..MAX_SHEEP. Used to produce the M5 quantitative comparison
|
||||
table mentioned in plan.md.
|
||||
|
||||
Usage::
|
||||
|
||||
python -m training.eval --policy training/runs/latest/best
|
||||
python -m training.eval --policy strombom
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from statistics import mean, stdev
|
||||
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from herding.geometry import MAX_SHEEP, PEN_ENTRY
|
||||
from herding.strombom import compute_action as strombom_action
|
||||
from herding.sequential import compute_action as sequential_action
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
def rollout(env: HerdingEnv, predict_fn, max_steps: int) -> dict:
|
||||
obs, _ = env.reset()
|
||||
success = False
|
||||
for t in range(max_steps):
|
||||
action = predict_fn(env, obs)
|
||||
obs, _r, terminated, truncated, info = env.step(action)
|
||||
if terminated or truncated:
|
||||
success = bool(info.get("is_success", False))
|
||||
return {"success": success, "steps": info.get("steps", t + 1),
|
||||
"n_penned": info.get("n_penned", 0)}
|
||||
return {"success": False, "steps": max_steps, "n_penned": int(env.sheep_penned.sum())}
|
||||
|
||||
|
||||
def make_analytic_predictor(action_fn):
|
||||
def _predict(env, _obs):
|
||||
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
|
||||
for i in range(env.n_sheep)
|
||||
if not env.sheep_penned[i]}
|
||||
vx, vy, _mode = action_fn((env.dog_x, env.dog_y), positions, PEN_ENTRY)
|
||||
return np.array([vx, vy], dtype=np.float32)
|
||||
return _predict
|
||||
|
||||
|
||||
# Backwards-compat alias.
|
||||
def make_strombom_predictor():
|
||||
return make_analytic_predictor(strombom_action)
|
||||
|
||||
|
||||
def make_policy_predictor(model, vecnorm):
|
||||
def _predict(_env, obs):
|
||||
if vecnorm is not None:
|
||||
obs_b = vecnorm.normalize_obs(np.asarray(obs, dtype=np.float32).reshape(1, -1))
|
||||
else:
|
||||
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
|
||||
action, _ = model.predict(obs_b, deterministic=True)
|
||||
return action[0]
|
||||
return _predict
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--policy", required=True,
|
||||
help="Either 'strombom' or path to an SB3 run directory.")
|
||||
parser.add_argument("--n-seeds", type=int, default=10)
|
||||
parser.add_argument("--max-steps", type=int, default=5000)
|
||||
parser.add_argument("--max-flock", type=int, default=MAX_SHEEP)
|
||||
# 1.0 = deployment distribution (sheep anywhere in field).
|
||||
# Lower values use the training-curriculum spawn band (sheep near gate).
|
||||
parser.add_argument("--difficulty", type=float, default=1.0)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.policy == "strombom":
|
||||
predict = make_analytic_predictor(strombom_action)
|
||||
elif args.policy == "sequential":
|
||||
predict = make_analytic_predictor(sequential_action)
|
||||
else:
|
||||
from stable_baselines3 import PPO
|
||||
run = Path(args.policy)
|
||||
# Resolve to a zip: directory of checkpoints, or a direct zip path.
|
||||
if run.is_file():
|
||||
zip_path = run
|
||||
else:
|
||||
for name in ("best_model.zip", "policy.zip", "final.zip"):
|
||||
if (run / name).exists():
|
||||
zip_path = run / name
|
||||
break
|
||||
else:
|
||||
raise FileNotFoundError(
|
||||
f"No checkpoint found in {run} (tried best_model.zip, "
|
||||
f"policy.zip, final.zip)"
|
||||
)
|
||||
model = PPO.load(str(zip_path), device="auto")
|
||||
vecnorm = None
|
||||
vn_path = run / "vecnormalize.pkl"
|
||||
if not vn_path.exists() and run.parent.name != "best":
|
||||
vn_path = run.parent / "vecnormalize.pkl"
|
||||
if vn_path.exists():
|
||||
import pickle
|
||||
with open(vn_path, "rb") as f:
|
||||
vecnorm = pickle.load(f)
|
||||
vecnorm.training = False
|
||||
vecnorm.norm_reward = False
|
||||
predict = make_policy_predictor(model, vecnorm)
|
||||
|
||||
print(f"{'n_sheep':>8} {'success%':>10} {'mean_steps':>12} {'mean_penned':>12}")
|
||||
print("-" * 46)
|
||||
for n in range(1, args.max_flock + 1):
|
||||
successes, steps, penned = [], [], []
|
||||
for seed in range(args.n_seeds):
|
||||
env = HerdingEnv(n_sheep=n, max_steps=args.max_steps,
|
||||
difficulty=args.difficulty, seed=seed)
|
||||
r = rollout(env, predict, args.max_steps)
|
||||
successes.append(int(r["success"]))
|
||||
steps.append(r["steps"])
|
||||
penned.append(r["n_penned"])
|
||||
sr = 100.0 * mean(successes)
|
||||
ms = mean(steps)
|
||||
mp = mean(penned)
|
||||
print(f"{n:>8d} {sr:>9.1f}% {ms:>12.0f} {mp:>12.2f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
+354
-706
File diff suppressed because it is too large
Load Diff
+75
-297
@@ -1,318 +1,96 @@
|
||||
"""
|
||||
Parity test: verify 2D training env matches Webots controller implementations.
|
||||
"""Parity smoke-test for the herding env.
|
||||
|
||||
Tests:
|
||||
1. Observation building: HerdingEnv._obs() vs shepherd_dog_rl.build_obs()
|
||||
2. Dog drive: HerdingEnv._step_dog_substep() vs shepherd_dog_rl.drive() math
|
||||
3. Sheep drive: HerdingEnv._sheep_drive() vs sheep.py drive() math
|
||||
Verifies (a) all imports resolve, (b) the env's reset/step contract is
|
||||
correct, (c) deterministic seeds give deterministic trajectories, and
|
||||
(d) the Strömbom baseline can drive the env without crashing.
|
||||
|
||||
Run::
|
||||
|
||||
python -m training.parity_test
|
||||
"""
|
||||
|
||||
import sys
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import math
|
||||
import sys
|
||||
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Make imports work from project root
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "controllers", "shepherd_dog_rl"))
|
||||
|
||||
from herding_env import HerdingEnv
|
||||
|
||||
# Re-implement the Webots functions standalone (no Webots dependency)
|
||||
FIELD = 15.0
|
||||
PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
|
||||
PEN_ENTRY = np.array([11.5, -8.0], dtype=np.float32)
|
||||
PEN_X = (10.0, 13.0)
|
||||
PEN_Y = (-15.0, -8.0)
|
||||
ENTRY_AWARE = True
|
||||
from herding.geometry import MAX_SHEEP, PEN_ENTRY
|
||||
from herding.obs import OBS_DIM
|
||||
from herding.strombom import compute_action
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
def webots_build_obs(dog_pos, sheep_positions, n_sheep, dog_heading):
|
||||
"""Standalone version of shepherd_dog_rl.py build_obs()."""
|
||||
D = 2 * FIELD
|
||||
active_pos = np.array(
|
||||
[p for p in sheep_positions
|
||||
if not (PEN_X[0] < p[0] < PEN_X[1] and PEN_Y[0] < p[1] < PEN_Y[1])],
|
||||
dtype=np.float32
|
||||
)
|
||||
n_active = len(active_pos)
|
||||
if n_active > 0:
|
||||
com = active_pos.mean(axis=0)
|
||||
d_from_com = np.linalg.norm(active_pos - com, axis=1)
|
||||
sorted_idx = np.argsort(d_from_com)[::-1]
|
||||
radius = float(d_from_com[sorted_idx[0]])
|
||||
def nth(n):
|
||||
return active_pos[sorted_idx[n]] if len(sorted_idx) > n else com
|
||||
far1, far2, far3 = nth(0), nth(1), nth(2)
|
||||
else:
|
||||
com = PEN_CENTER.copy()
|
||||
radius = 0.0
|
||||
far1 = far2 = far3 = PEN_CENTER.copy()
|
||||
frac_active = n_active / max(n_sheep, 1)
|
||||
pen_ref = PEN_ENTRY if ENTRY_AWARE else PEN_CENTER
|
||||
return np.array([
|
||||
dog_pos[0] / FIELD, dog_pos[1] / FIELD,
|
||||
(com[0] - dog_pos[0]) / D, (com[1] - dog_pos[1]) / D,
|
||||
(far1[0] - com[0]) / D, (far1[1] - com[1]) / D,
|
||||
(far2[0] - com[0]) / D, (far2[1] - com[1]) / D,
|
||||
(far3[0] - com[0]) / D, (far3[1] - com[1]) / D,
|
||||
(pen_ref[0] - com[0]) / D, (pen_ref[1] - com[1]) / D,
|
||||
(pen_ref[0] - far1[0]) / D, (pen_ref[1] - far1[1]) / D,
|
||||
radius / D,
|
||||
frac_active,
|
||||
math.cos(dog_heading), math.sin(dog_heading),
|
||||
], dtype=np.float32)
|
||||
def test_obs_action_shapes():
|
||||
env = HerdingEnv(n_sheep=3, seed=0)
|
||||
obs, info = env.reset()
|
||||
assert obs.shape == (OBS_DIM,), obs.shape
|
||||
assert obs.dtype == np.float32
|
||||
obs2, r, term, trunc, info = env.step(np.array([0.5, 0.0], dtype=np.float32))
|
||||
assert obs2.shape == (OBS_DIM,)
|
||||
assert isinstance(r, float)
|
||||
assert isinstance(term, bool) and isinstance(trunc, bool)
|
||||
print("[ok] shapes")
|
||||
|
||||
|
||||
def webots_dog_drive(heading, speed_ms, wheel_r=0.038, k_turn=4.0,
|
||||
motor_max=70.0, axle_track=0.28):
|
||||
"""Standalone version of shepherd_dog_rl.py drive() kinematics.
|
||||
def test_reset_determinism():
|
||||
"""Reset with the same seed should give the same initial observation.
|
||||
|
||||
Returns (v_linear, omega, left_w, right_w).
|
||||
We don't require step-determinism — PPO doesn't need it, and chasing
|
||||
bit-exactness through the flocking jitter isn't worth the complexity.
|
||||
"""
|
||||
err = math.atan2(math.sin(heading), math.cos(heading))
|
||||
fwd_ms = speed_ms * max(0.0, math.cos(err))
|
||||
fwd_rad = fwd_ms / wheel_r
|
||||
turn = k_turn * err
|
||||
l = max(-motor_max, min(motor_max, fwd_rad - turn))
|
||||
r = max(-motor_max, min(motor_max, fwd_rad + turn))
|
||||
v = wheel_r * 0.5 * (r + l)
|
||||
w = (wheel_r / axle_track) * (r - l)
|
||||
return v, w, l, r
|
||||
env_a = HerdingEnv(n_sheep=3, seed=42)
|
||||
env_b = HerdingEnv(n_sheep=3, seed=42)
|
||||
obs_a, _ = env_a.reset(seed=42)
|
||||
obs_b, _ = env_b.reset(seed=42)
|
||||
assert np.allclose(obs_a, obs_b), "Reset is non-deterministic for same seed"
|
||||
print("[ok] reset determinism")
|
||||
|
||||
|
||||
def webots_sheep_drive(heading, speed_rad, wheel_r=0.031, k_turn=4.0,
|
||||
motor_max=22.0, axle_track=0.20):
|
||||
"""Standalone version of sheep.py drive() kinematics."""
|
||||
err = math.atan2(math.sin(heading), math.cos(heading))
|
||||
fwd = speed_rad * max(0.0, math.cos(err))
|
||||
k = 4.0
|
||||
l = max(-motor_max, min(motor_max, fwd - k * err))
|
||||
r = max(-motor_max, min(motor_max, fwd + k * err))
|
||||
v = wheel_r * 0.5 * (r + l)
|
||||
w = (wheel_r / axle_track) * (r - l)
|
||||
return v, w, l, r
|
||||
def test_curriculum_n_sheep_varies():
|
||||
env = HerdingEnv(seed=0)
|
||||
sizes = set()
|
||||
for _ in range(40):
|
||||
_, info = env.reset()
|
||||
sizes.add(info["n_sheep"])
|
||||
assert 1 in sizes
|
||||
assert max(sizes) <= MAX_SHEEP
|
||||
print(f"[ok] curriculum sampling — saw n_sheep in {sorted(sizes)}")
|
||||
|
||||
|
||||
def test_obs_parity():
|
||||
"""Test that build_obs matches between 2D env and Webots controller."""
|
||||
print("=== Test 1: Observation Parity ===")
|
||||
env = HerdingEnv(n_sheep=3)
|
||||
# Set ENTRY_AWARE to match our webots constant
|
||||
env.ENTRY_AWARE = ENTRY_AWARE
|
||||
env.reset(seed=42)
|
||||
|
||||
# Manually set positions for a controlled test
|
||||
env.dog_pos = np.array([5.0, 3.0], dtype=np.float32)
|
||||
env.dog_heading = 1.2
|
||||
env.sheep_pos[0] = np.array([0.0, 0.0], dtype=np.float32)
|
||||
env.sheep_pos[1] = np.array([2.0, -1.0], dtype=np.float32)
|
||||
env.sheep_pos[2] = np.array([11.5, -11.5], dtype=np.float32) # penned
|
||||
env.penned[0] = False
|
||||
env.penned[1] = False
|
||||
env.penned[2] = True
|
||||
|
||||
obs_2d = env._obs()
|
||||
|
||||
# Build equivalent Webots observation
|
||||
sheep_positions = [
|
||||
env.sheep_pos[0].tolist(),
|
||||
env.sheep_pos[1].tolist(),
|
||||
env.sheep_pos[2].tolist(),
|
||||
]
|
||||
obs_webots = webots_build_obs(
|
||||
env.dog_pos, sheep_positions, 3, env.dog_heading
|
||||
)
|
||||
|
||||
max_diff = float(np.max(np.abs(obs_2d - obs_webots)))
|
||||
print(f" Max element-wise diff: {max_diff:.2e}")
|
||||
if max_diff < 1e-6:
|
||||
print(" PASS: Observations match")
|
||||
else:
|
||||
print(" FAIL: Observations differ!")
|
||||
for i in range(18):
|
||||
if abs(obs_2d[i] - obs_webots[i]) > 1e-6:
|
||||
print(f" dim {i}: 2d={obs_2d[i]:.6f} webots={obs_webots[i]:.6f}")
|
||||
return max_diff < 1e-6
|
||||
def test_strombom_drives_env():
|
||||
"""Quick functional check that the analytic baseline can play the env
|
||||
without exploding. Not a success-rate test — just no errors / NaNs."""
|
||||
env = HerdingEnv(n_sheep=2, max_steps=400, seed=1)
|
||||
obs, _ = env.reset()
|
||||
for t in range(400):
|
||||
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
|
||||
for i in range(env.n_sheep)
|
||||
if not env.sheep_penned[i]}
|
||||
if not positions:
|
||||
break
|
||||
vx, vy, _mode = compute_action((env.dog_x, env.dog_y), positions, PEN_ENTRY)
|
||||
obs, r, term, trunc, info = env.step(np.array([vx, vy], dtype=np.float32))
|
||||
assert np.isfinite(obs).all(), f"NaN/Inf in obs at step {t}"
|
||||
assert np.isfinite(r), f"NaN reward at step {t}"
|
||||
if term or trunc:
|
||||
break
|
||||
print(f"[ok] strombom rollout — final n_penned={int(env.sheep_penned.sum())}/{env.n_sheep} after {env.steps} steps")
|
||||
|
||||
|
||||
def test_dog_drive_parity():
|
||||
"""Test that dog diff-drive matches Webots controller."""
|
||||
print("\n=== Test 2: Dog Drive Parity ===")
|
||||
env = HerdingEnv(n_sheep=1)
|
||||
env.reset(seed=42)
|
||||
|
||||
all_pass = True
|
||||
test_cases = [
|
||||
# (heading_error, speed_ms) — target_heading relative to current heading
|
||||
(0.0, 2.5), # aligned, full speed
|
||||
(0.5, 2.5), # 30deg error
|
||||
(1.5, 2.5), # ~86deg error
|
||||
(3.14, 2.5), # ~180deg error — should spin in place
|
||||
(0.0, 0.5), # aligned, slow
|
||||
(0.3, 1.0), # small error, medium speed
|
||||
]
|
||||
|
||||
for heading_err, speed_ms in test_cases:
|
||||
env.dog_heading = 0.0
|
||||
target_heading = heading_err
|
||||
action = np.array([
|
||||
math.cos(target_heading), math.sin(target_heading)
|
||||
], dtype=np.float32) * (speed_ms / env.DOG_SPEED)
|
||||
|
||||
# 2D env step
|
||||
dbg = env._step_dog_substep(action, 0.016)
|
||||
v_2d = dbg["v"]
|
||||
w_2d = dbg["w"]
|
||||
l_2d = dbg["left_w"]
|
||||
r_2d = dbg["right_w"]
|
||||
|
||||
# Webots equivalent
|
||||
v_w, w_w, l_w, r_w = webots_dog_drive(heading_err, speed_ms)
|
||||
|
||||
diffs = {
|
||||
"v": abs(v_2d - v_w),
|
||||
"w": abs(w_2d - w_w),
|
||||
"left": abs(l_2d - l_w),
|
||||
"right": abs(r_2d - r_w),
|
||||
}
|
||||
max_diff = max(diffs.values())
|
||||
ok = max_diff < 1e-6
|
||||
status = "PASS" if ok else "FAIL"
|
||||
print(f" err={heading_err:.2f} spd={speed_ms:.1f}: {status} (max_diff={max_diff:.2e})")
|
||||
if not ok:
|
||||
for k, d in diffs.items():
|
||||
if d > 1e-6:
|
||||
print(f" {k}: 2d={eval(k+'_2d'):.6f} webots={eval(k+'_w'):.6f}")
|
||||
all_pass = False
|
||||
|
||||
return all_pass
|
||||
|
||||
|
||||
def test_sheep_drive_parity():
|
||||
"""Test that sheep diff-drive matches Webots sheep controller."""
|
||||
print("\n=== Test 3: Sheep Drive Parity ===")
|
||||
env = HerdingEnv(n_sheep=1)
|
||||
env.reset(seed=42)
|
||||
|
||||
all_pass = True
|
||||
test_cases = [
|
||||
# (heading_error, speed_rad)
|
||||
(0.0, 20.0), # aligned, flee speed
|
||||
(0.0, 3.0), # aligned, wander speed
|
||||
(0.5, 15.0), # moderate error
|
||||
(1.57, 10.0), # 90deg — should spin in place
|
||||
(3.14, 20.0), # 180deg — should spin in place fast
|
||||
(0.2, 8.0), # small error, medium speed
|
||||
]
|
||||
|
||||
for heading_err, speed_rad in test_cases:
|
||||
env.sheep_heading[0] = 0.0
|
||||
env.sheep_pos[0] = np.array([0.0, 0.0], dtype=np.float32)
|
||||
target_heading = heading_err
|
||||
|
||||
# 2D env
|
||||
new_pos = env._sheep_drive(0, target_heading, speed_rad, 0.016)
|
||||
v_2d_raw = float(np.linalg.norm(new_pos - np.array([0.0, 0.0]))) / 0.016
|
||||
# Re-derive v, w from the internal state
|
||||
heading_2d = env.sheep_heading[0]
|
||||
|
||||
# Webots equivalent
|
||||
v_w, w_w, l_w, r_w = webots_sheep_drive(heading_err, speed_rad)
|
||||
|
||||
# For 2D, compute the same intermediate values
|
||||
err_2d = (target_heading - 0.0 + np.pi) % (2 * np.pi) - np.pi
|
||||
fwd_2d = speed_rad * max(0.0, math.cos(err_2d))
|
||||
turn_2d = 4.0 * err_2d
|
||||
l_2d = max(-22.0, min(22.0, fwd_2d - turn_2d))
|
||||
r_2d = max(-22.0, min(22.0, fwd_2d + turn_2d))
|
||||
|
||||
diffs = {
|
||||
"left": abs(l_2d - l_w),
|
||||
"right": abs(r_2d - r_w),
|
||||
}
|
||||
max_diff = max(diffs.values())
|
||||
ok = max_diff < 1e-6
|
||||
status = "PASS" if ok else "FAIL"
|
||||
print(f" err={heading_err:.2f} spd={speed_rad:.1f}: {status} (max_diff={max_diff:.2e})")
|
||||
if not ok:
|
||||
for k, d in diffs.items():
|
||||
if d > 1e-6:
|
||||
print(f" {k}: 2d={l_2d if k=='left' else r_2d:.6f} webots={l_w if k=='left' else r_w:.6f}")
|
||||
all_pass = False
|
||||
|
||||
return all_pass
|
||||
|
||||
|
||||
def test_full_trajectory_parity():
|
||||
"""Test that running identical actions produces matching trajectories."""
|
||||
print("\n=== Test 4: Full Trajectory Parity (dog only) ===")
|
||||
# Run 50 steps with a fixed action, compare dog heading/position
|
||||
# at each step between 2D env kinematics and pure Webots kinematics.
|
||||
env = HerdingEnv(n_sheep=1)
|
||||
env.reset(seed=42)
|
||||
env.dog_pos = np.array([0.0, 0.0], dtype=np.float32)
|
||||
env.dog_heading = 0.0
|
||||
env.ENTRY_AWARE = ENTRY_AWARE
|
||||
|
||||
action = np.array([0.8, -0.6], dtype=np.float32) # magnitude 1.0
|
||||
dt = 0.016667 # sub_dt
|
||||
|
||||
# Webots-side tracking
|
||||
wb_heading = 0.0
|
||||
wb_x, wb_y = 0.0, 0.0
|
||||
|
||||
max_heading_diff = 0.0
|
||||
max_pos_diff = 0.0
|
||||
|
||||
for step in range(50):
|
||||
# 2D env sub-step
|
||||
env._step_dog_substep(action, dt)
|
||||
|
||||
# Webots-side computation
|
||||
speed_ms = 1.0 * 2.5
|
||||
target_heading = math.atan2(-0.6, 0.8)
|
||||
err = math.atan2(math.sin(target_heading - wb_heading),
|
||||
math.cos(target_heading - wb_heading))
|
||||
fwd_ms = speed_ms * max(0.0, math.cos(err))
|
||||
fwd_rad = fwd_ms / 0.038
|
||||
turn = 4.0 * err
|
||||
l = max(-70.0, min(70.0, fwd_rad - turn))
|
||||
r = max(-70.0, min(70.0, fwd_rad + turn))
|
||||
v = 0.038 * 0.5 * (r + l)
|
||||
w = (0.038 / 0.28) * (r - l)
|
||||
wb_heading = math.atan2(math.sin(wb_heading + w * dt),
|
||||
math.cos(wb_heading + w * dt))
|
||||
wb_x += math.cos(wb_heading) * v * dt
|
||||
wb_y += math.sin(wb_heading) * v * dt
|
||||
|
||||
heading_diff = abs(env.dog_heading - wb_heading)
|
||||
pos_diff = math.hypot(env.dog_pos[0] - wb_x, env.dog_pos[1] - wb_y)
|
||||
max_heading_diff = max(max_heading_diff, heading_diff)
|
||||
max_pos_diff = max(max_pos_diff, pos_diff)
|
||||
|
||||
print(f" Max heading diff over 50 steps: {max_heading_diff:.2e} rad")
|
||||
print(f" Max position diff over 50 steps: {max_pos_diff:.2e} m")
|
||||
ok = max_pos_diff < 1e-4
|
||||
print(f" {'PASS' if ok else 'FAIL'}: Trajectories match")
|
||||
return ok
|
||||
def main():
|
||||
test_obs_action_shapes()
|
||||
test_reset_determinism()
|
||||
test_curriculum_n_sheep_varies()
|
||||
test_strombom_drives_env()
|
||||
print("\nAll parity checks passed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
results = []
|
||||
results.append(("Obs parity", test_obs_parity()))
|
||||
results.append(("Dog drive parity", test_dog_drive_parity()))
|
||||
results.append(("Sheep drive parity", test_sheep_drive_parity()))
|
||||
results.append(("Trajectory parity", test_full_trajectory_parity()))
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("RESULTS")
|
||||
print("=" * 50)
|
||||
all_pass = True
|
||||
for name, passed in results:
|
||||
print(f" {name}: {'PASS' if passed else 'FAIL'}")
|
||||
if not passed:
|
||||
all_pass = False
|
||||
print(f"\nOverall: {'ALL PASS' if all_pass else 'SOME FAILURES'}")
|
||||
env.close()
|
||||
main()
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
gymnasium>=0.29
|
||||
stable-baselines3>=2.3
|
||||
torch>=2.2
|
||||
numpy>=1.26
|
||||
matplotlib>=3.8
|
||||
tensorboard>=2.16
|
||||
# Pin major versions; SB3 2.x requires gymnasium and torch >= 1.13.
|
||||
gymnasium>=0.29,<2.0
|
||||
stable-baselines3[extra]>=2.3,<3.0
|
||||
torch>=2.1
|
||||
numpy>=1.24
|
||||
pyyaml>=6.0
|
||||
tensorboard>=2.14
|
||||
tqdm>=4.66
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
|
||||
@@ -1,392 +0,0 @@
|
||||
"""
|
||||
PPO training for the herding task with curriculum learning.
|
||||
|
||||
Trains from scratch through a 1→max_sheep curriculum, evaluates after each
|
||||
stage, and auto-generates trajectory/timeseries plots plus a summary chart.
|
||||
|
||||
Usage
|
||||
-----
|
||||
python train.py # defaults from config.json
|
||||
python train.py --config my_config.json --max-sheep 5
|
||||
python train.py --max-sheep 3 --steps-per-stage 1000000
|
||||
|
||||
Outputs (in runs/<timestamp>/):
|
||||
config.json resolved config
|
||||
final_model.zip trained PPO model
|
||||
vecnorm.pkl VecNormalize statistics
|
||||
stage_results.json per-stage evaluation metrics
|
||||
success_rate.png summary bar chart
|
||||
eval/ trajectory & timeseries plots per sheep count
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.callbacks import BaseCallback
|
||||
from stable_baselines3.common.vec_env import (
|
||||
DummyVecEnv,
|
||||
SubprocVecEnv,
|
||||
VecNormalize,
|
||||
)
|
||||
|
||||
from herding_env import HerdingEnv
|
||||
from viz import (
|
||||
run_and_record,
|
||||
plot_trajectory,
|
||||
plot_timeseries,
|
||||
plot_success_rate,
|
||||
save_episode_gif,
|
||||
)
|
||||
|
||||
|
||||
# ── Callbacks ────────────────────────────────────────────────────────────────
|
||||
|
||||
class ProgressCallback(BaseCallback):
|
||||
"""One-line progress summary every `freq` env steps."""
|
||||
|
||||
def __init__(self, stage_label: str, freq: int = 100_000):
|
||||
super().__init__()
|
||||
self.stage_label = stage_label
|
||||
self.freq = freq
|
||||
self._last = 0
|
||||
self._ep_returns = []
|
||||
self._ep_success = []
|
||||
self._total_eps = 0
|
||||
self._total_success = 0
|
||||
self._cur_ret = None
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
rewards = self.locals.get("rewards")
|
||||
dones = self.locals.get("dones")
|
||||
infos = self.locals.get("infos", [])
|
||||
if rewards is None or dones is None:
|
||||
return True
|
||||
if self._cur_ret is None or len(self._cur_ret) != len(rewards):
|
||||
self._cur_ret = np.zeros(len(rewards), dtype=np.float64)
|
||||
self._cur_ret += np.asarray(rewards, dtype=np.float64)
|
||||
for i, d in enumerate(dones):
|
||||
if not d:
|
||||
continue
|
||||
self._ep_returns.append(float(self._cur_ret[i]))
|
||||
info = infos[i] if i < len(infos) else {}
|
||||
success = int(info.get("n_penned", 0) == info.get("n_sheep", -1))
|
||||
self._ep_success.append(success)
|
||||
self._total_eps += 1
|
||||
self._total_success += success
|
||||
self._cur_ret[i] = 0.0
|
||||
if len(self._ep_returns) > 50:
|
||||
self._ep_returns.pop(0)
|
||||
self._ep_success.pop(0)
|
||||
if self.num_timesteps - self._last >= self.freq:
|
||||
self._last = self.num_timesteps
|
||||
n = len(self._ep_returns)
|
||||
mean_r = float(np.mean(self._ep_returns)) if n else float("nan")
|
||||
win_sr = float(np.mean(self._ep_success)) if n else float("nan")
|
||||
cum_sr = (self._total_success / self._total_eps
|
||||
if self._total_eps else float("nan"))
|
||||
print(f" ... [{self.stage_label} | "
|
||||
f"{self.num_timesteps:>7,} steps | "
|
||||
f"ret(last {n})={mean_r:+.2f} "
|
||||
f"win_sr={win_sr*100:.0f}% cum_sr={cum_sr*100:.0f}%]",
|
||||
flush=True)
|
||||
return True
|
||||
|
||||
|
||||
# ── Environment factory ──────────────────────────────────────────────────────
|
||||
|
||||
def make_env(n_sheep, seed, max_steps, reward_cfg=None):
|
||||
def _init():
|
||||
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
||||
reward_cfg=reward_cfg)
|
||||
env.reset(seed=seed)
|
||||
return env
|
||||
return _init
|
||||
|
||||
|
||||
# ── Failure-mode classification ──────────────────────────────────────────────
|
||||
|
||||
COMPACT_RADIUS = 5.0
|
||||
|
||||
|
||||
def _classify(ep_radii, ep_com_dists, n_penned, n_sheep):
|
||||
if n_penned == n_sheep:
|
||||
return "SUCCESS"
|
||||
if min(ep_radii) > COMPACT_RADIUS:
|
||||
return "NEVER_COMPACT"
|
||||
first = next(i for i, r in enumerate(ep_radii) if r <= COMPACT_RADIUS)
|
||||
if min(ep_com_dists[first:]) > 3.0:
|
||||
return "COMPACT_CANT_DRIVE"
|
||||
if n_penned == 0:
|
||||
return "DROVE_NO_SHEEP"
|
||||
return f"PARTIAL_{n_penned}of{n_sheep}"
|
||||
|
||||
|
||||
# ── Evaluation ───────────────────────────────────────────────────────────────
|
||||
|
||||
def evaluate(model, vn_template, n_sheep, n_episodes, max_steps,
|
||||
reward_cfg=None):
|
||||
"""Evaluate at a given sheep count; returns metrics dict."""
|
||||
raw = DummyVecEnv([make_env(n_sheep, 9999, max_steps, reward_cfg)])
|
||||
vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
|
||||
vn.obs_rms = deepcopy(vn_template.obs_rms)
|
||||
vn.ret_rms = deepcopy(vn_template.ret_rms)
|
||||
|
||||
successes = 0
|
||||
ep_lens = []
|
||||
min_pen_list = []
|
||||
action_mags = []
|
||||
failure_counts = {}
|
||||
rc_sums = {}
|
||||
rc_n = 0
|
||||
|
||||
for _ in range(n_episodes):
|
||||
obs = vn.reset()
|
||||
done = False
|
||||
steps = 0
|
||||
min_pen = float("inf")
|
||||
mags = []
|
||||
ep_radii = []
|
||||
ep_com_dists = []
|
||||
while not done:
|
||||
action, _ = model.predict(obs, deterministic=True)
|
||||
obs, _, dones, infos = vn.step(action)
|
||||
done = dones[0]
|
||||
inner = vn.envs[0]
|
||||
com, radius, _ = inner._flock_stats()
|
||||
min_pen = min(min_pen, float(np.linalg.norm(com - inner.PEN_CENTER)))
|
||||
mags.append(float(np.linalg.norm(action[0])))
|
||||
ep_radii.append(radius)
|
||||
ep_com_dists.append(float(np.linalg.norm(com - inner.PEN_CENTER)))
|
||||
steps += 1
|
||||
rc = infos[0].get("rcomps")
|
||||
if rc:
|
||||
for k, v in rc.items():
|
||||
rc_sums[k] = rc_sums.get(k, 0.0) + v
|
||||
rc_n += 1
|
||||
n_penned = infos[0].get("n_penned", 0)
|
||||
success = n_penned == n_sheep
|
||||
successes += int(success)
|
||||
ep_lens.append(steps)
|
||||
min_pen_list.append(min_pen)
|
||||
action_mags.extend(mags)
|
||||
mode = _classify(ep_radii, ep_com_dists, n_penned, n_sheep)
|
||||
failure_counts[mode] = failure_counts.get(mode, 0) + 1
|
||||
|
||||
vn.close()
|
||||
|
||||
result = {
|
||||
"sr": successes / n_episodes,
|
||||
"mean_len": float(np.mean(ep_lens)),
|
||||
"mean_min_pen": float(np.mean(min_pen_list)),
|
||||
"mean_act": float(np.mean(action_mags)) if action_mags else 0.0,
|
||||
"failure_modes": failure_counts,
|
||||
}
|
||||
if rc_n > 0:
|
||||
result["reward_per_step"] = {k: v / rc_n for k, v in rc_sums.items()}
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
"W_PER_SHEEP": 2.0,
|
||||
"W_ALIGN": 0.05,
|
||||
"W_PEN_BONUS": 10.0,
|
||||
"W_COMPLETE": 100.0,
|
||||
"W_STEP_COST": 0.02,
|
||||
"W_SOUTH": 0.01,
|
||||
"W_COMPACT": 0.0,
|
||||
"W_WALL_TOUCH": 0.04,
|
||||
"WALL_TOUCH_BUFFER": 0.3,
|
||||
"ALIGN_SHAPE": "standoff",
|
||||
"ALIGN_GATED": True,
|
||||
"ENTRY_AWARE": True,
|
||||
"ent_coef": 0.02,
|
||||
}
|
||||
|
||||
|
||||
def parse_args():
|
||||
p = argparse.ArgumentParser(
|
||||
description="PPO training for herding task with curriculum learning")
|
||||
p.add_argument("--config", type=str, default=None,
|
||||
help="JSON config file (reward weights + ent_coef)")
|
||||
p.add_argument("--max-sheep", type=int, default=10)
|
||||
p.add_argument("--steps-per-stage", type=int, default=1_500_000)
|
||||
p.add_argument("--n-envs", type=int, default=8)
|
||||
p.add_argument("--max-steps", type=int, default=2500)
|
||||
p.add_argument("--eval-episodes", type=int, default=30)
|
||||
p.add_argument("--run-dir", type=str, default=None)
|
||||
p.add_argument("--no-gif", action="store_true",
|
||||
help="Skip per-stage GIF rendering (PNGs still produced).")
|
||||
p.add_argument("--gif-fps", type=int, default=20)
|
||||
p.add_argument("--gif-skip", type=int, default=3,
|
||||
help="Keep every Nth frame (smaller GIF; default 3).")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
# ── Main ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
# Load config: --config overrides, else auto-load config.json if present
|
||||
cfg = dict(DEFAULT_CONFIG)
|
||||
config_path = args.config
|
||||
if config_path is None and os.path.exists("config.json"):
|
||||
config_path = "config.json"
|
||||
if config_path:
|
||||
with open(config_path) as f:
|
||||
cfg.update(json.load(f))
|
||||
print(f"Config loaded from {config_path}")
|
||||
|
||||
rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
|
||||
|
||||
# Run directory
|
||||
run_dir = args.run_dir or os.path.join(
|
||||
"runs", time.strftime("%Y%m%d_%H%M%S"))
|
||||
eval_dir = os.path.join(run_dir, "eval")
|
||||
os.makedirs(eval_dir, exist_ok=True)
|
||||
with open(os.path.join(run_dir, "config.json"), "w") as f:
|
||||
json.dump(cfg, f, indent=2)
|
||||
|
||||
print(f"Config: {cfg}")
|
||||
print(f"Run dir: {run_dir}")
|
||||
print(f"Curriculum: 1 → {args.max_sheep} sheep, "
|
||||
f"{args.steps_per_stage:,} steps/stage\n")
|
||||
|
||||
# Training envs
|
||||
train_env = SubprocVecEnv([
|
||||
make_env(1, seed=i, max_steps=args.max_steps, reward_cfg=rcfg)
|
||||
for i in range(args.n_envs)
|
||||
])
|
||||
vn = VecNormalize(train_env, norm_obs=True, norm_reward=True,
|
||||
clip_obs=10.0)
|
||||
|
||||
# Model — force CPU (PPO with MLP runs faster on CPU than GPU; SB3 warns
|
||||
# about this otherwise).
|
||||
model = PPO(
|
||||
"MlpPolicy", vn,
|
||||
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
|
||||
gamma=0.995, gae_lambda=0.95, clip_range=0.2,
|
||||
ent_coef=cfg.get("ent_coef", 0.02), vf_coef=0.5, max_grad_norm=0.5,
|
||||
policy_kwargs=dict(net_arch=[256, 256]),
|
||||
device="cpu",
|
||||
verbose=0,
|
||||
)
|
||||
|
||||
# Curriculum training
|
||||
stage_results = []
|
||||
t0 = time.time()
|
||||
|
||||
try:
|
||||
for n in range(1, args.max_sheep + 1):
|
||||
if n == 1:
|
||||
print(f"\n[Stage n_sheep=1] training {args.steps_per_stage:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=args.steps_per_stage,
|
||||
reset_num_timesteps=True,
|
||||
callback=ProgressCallback("1 sheep", freq=100_000),
|
||||
)
|
||||
else:
|
||||
# Mixed transition: half envs stay at n-1, half advance to n,
|
||||
# for the first half of the stage budget. This prevents the
|
||||
# n+1 task's noisy early gradients from destroying the n policy
|
||||
# (catastrophic forgetting) before it has a chance to adapt.
|
||||
half = max(1, args.n_envs // 2)
|
||||
for i in range(half):
|
||||
vn.env_method("set_n_sheep", n - 1, indices=[i])
|
||||
for i in range(half, args.n_envs):
|
||||
vn.env_method("set_n_sheep", n, indices=[i])
|
||||
mix_steps = args.steps_per_stage // 2
|
||||
full_steps = args.steps_per_stage - mix_steps
|
||||
print(f"\n[Stage n_sheep={n}] mixed ({n-1}/{n} sheep) "
|
||||
f"{mix_steps:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=mix_steps,
|
||||
reset_num_timesteps=False,
|
||||
callback=ProgressCallback(f"{n-1}→{n} mix", freq=100_000),
|
||||
)
|
||||
vn.env_method("set_n_sheep", n)
|
||||
print(f"[Stage n_sheep={n}] full ({n} sheep) {full_steps:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=full_steps,
|
||||
reset_num_timesteps=False,
|
||||
callback=ProgressCallback(f"{n} sheep", freq=100_000),
|
||||
)
|
||||
|
||||
# Evaluate
|
||||
print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
|
||||
r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
|
||||
print(f"[Stage n_sheep={n}] sr={r['sr']*100:.0f}% "
|
||||
f"mean_len={r['mean_len']:.0f} "
|
||||
f"mean_min_pen={r['mean_min_pen']:.1f}m "
|
||||
f"mean_act={r['mean_act']:.2f}")
|
||||
|
||||
# Failure-mode breakdown
|
||||
if r["failure_modes"]:
|
||||
modes = " ".join(
|
||||
f"{k}={v}" for k, v in sorted(
|
||||
r["failure_modes"].items(), key=lambda x: -x[1]))
|
||||
print(f" failure modes: {modes}")
|
||||
|
||||
# Reward breakdown
|
||||
if "reward_per_step" in r:
|
||||
rps = r["reward_per_step"]
|
||||
print(f" reward/step: " + " ".join(
|
||||
f"{k}={v:+.4f}" for k, v in rps.items()))
|
||||
|
||||
# Episode visualisation: trajectory + timeseries + animated GIF
|
||||
hist = run_and_record(model, vn, n, args.max_steps, rcfg,
|
||||
seed=1000 + n)
|
||||
tag = "success" if hist["success"] else "fail"
|
||||
plot_trajectory(
|
||||
hist,
|
||||
os.path.join(eval_dir, f"traj_{n}s_{tag}.png"))
|
||||
plot_timeseries(
|
||||
hist,
|
||||
os.path.join(eval_dir, f"ts_{n}s_{tag}.png"))
|
||||
if not args.no_gif:
|
||||
save_episode_gif(
|
||||
hist,
|
||||
os.path.join(eval_dir, f"ep_{n}s_{tag}.gif"),
|
||||
fps=args.gif_fps, skip=args.gif_skip)
|
||||
|
||||
r["n_sheep"] = n
|
||||
stage_results.append(r)
|
||||
|
||||
# Save artefacts
|
||||
model.save(os.path.join(run_dir, "final_model"))
|
||||
vn.save(os.path.join(run_dir, "vecnorm.pkl"))
|
||||
with open(os.path.join(run_dir, "stage_results.json"), "w") as f:
|
||||
json.dump(stage_results, f, indent=2)
|
||||
|
||||
finally:
|
||||
try:
|
||||
vn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Summary
|
||||
elapsed = (time.time() - t0) / 60
|
||||
print("\n" + "=" * 70)
|
||||
print(" TRAINING SUMMARY")
|
||||
print("=" * 70)
|
||||
for r in stage_results:
|
||||
print(f" n_sheep={r['n_sheep']} sr={r['sr']*100:>3.0f}% "
|
||||
f"len={r['mean_len']:>5.0f} min_pen={r['mean_min_pen']:>5.1f}m "
|
||||
f"act={r['mean_act']:.2f}")
|
||||
print(f"\n Total time: {elapsed:.1f} min")
|
||||
print(f" Artefacts: {run_dir}/")
|
||||
|
||||
plot_success_rate(stage_results, os.path.join(run_dir, "success_rate.png"))
|
||||
print(f" Plots: {run_dir}/success_rate.png, {eval_dir}/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,412 +0,0 @@
|
||||
"""
|
||||
PPO training with attention-based policy (train_at.py).
|
||||
|
||||
Key difference from train.py
|
||||
-----------------------------
|
||||
- Observation exposes ALL sheep as individual per-sheep tokens rather than
|
||||
only the top-3 farthest. The policy therefore has complete flock visibility
|
||||
at any sheep count — no hidden sheep even at n=10.
|
||||
- A TransformerFeaturesExtractor processes the sheep tokens with multi-head
|
||||
self-attention (permutation-invariant), then mean-pools over valid tokens
|
||||
and concatenates the result with global dog/pen features.
|
||||
- Curriculum transition uses the same mixed-env approach as train.py: half
|
||||
the envs stay at n-1 for the first half of each new stage to suppress
|
||||
catastrophic forgetting.
|
||||
|
||||
Observation layout (7 + MAX_SHEEP*6 = 67 dims, fixed)
|
||||
-------------------------------------------------------
|
||||
Global (7):
|
||||
dog_x / FIELD, dog_y / FIELD,
|
||||
cos(heading), sin(heading),
|
||||
(pen_x - dog_x) / D, (pen_y - dog_y) / D,
|
||||
n_active / n_sheep
|
||||
|
||||
Per sheep i (6):
|
||||
(sheep_x - dog_x) / D, (sheep_y - dog_y) / D, ← pos rel to dog
|
||||
(pen_x - sheep_x) / D, (pen_y - sheep_y) / D, ← sheep-to-pen
|
||||
is_active 1.0 if not penned, else 0.0
|
||||
is_valid 1.0 if i < n_sheep, else 0.0 (padding sentinel)
|
||||
|
||||
After VecNormalize, is_valid for real sheep normalises > 0 and for
|
||||
padding tokens < 0 (because mean ∈ (0,1)), so a threshold of 0 cleanly
|
||||
separates real from padded without any extra bookkeeping.
|
||||
|
||||
Usage
|
||||
-----
|
||||
python train_at.py # defaults from config.json
|
||||
python train_at.py --max-sheep 10 --steps-per-stage 2000000
|
||||
python train_at.py --embed-dim 128 --n-heads 4 --n-layers 3
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from gymnasium import spaces
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize
|
||||
|
||||
from herding_env import HerdingEnv
|
||||
from train import ProgressCallback, _classify, COMPACT_RADIUS, DEFAULT_CONFIG
|
||||
from viz import (
|
||||
run_and_record, plot_trajectory, plot_timeseries,
|
||||
plot_success_rate, save_episode_gif,
|
||||
)
|
||||
|
||||
|
||||
# ── Per-sheep token observation environment ───────────────────────────────────
|
||||
|
||||
class HerdingEnvAt(HerdingEnv):
|
||||
"""
|
||||
HerdingEnv with a per-sheep token observation for the attention policy.
|
||||
Everything else (dynamics, reward, curriculum interface) is inherited.
|
||||
"""
|
||||
|
||||
OBS_GLOBAL = 7
|
||||
OBS_SHEEP = 6
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
obs_dim = self.OBS_GLOBAL + self.MAX_SHEEP * self.OBS_SHEEP
|
||||
self.observation_space = spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32
|
||||
)
|
||||
|
||||
def _obs(self) -> np.ndarray:
|
||||
S = self.FIELD
|
||||
D = 2.0 * self.FIELD
|
||||
pen_ref = self.PEN_ENTRY if self.ENTRY_AWARE else self.PEN_CENTER
|
||||
active_mask = ~self.penned[:self.n_sheep]
|
||||
n_active = int(active_mask.sum())
|
||||
|
||||
global_feats = np.array([
|
||||
self.dog_pos[0] / S,
|
||||
self.dog_pos[1] / S,
|
||||
float(np.cos(self.dog_heading)),
|
||||
float(np.sin(self.dog_heading)),
|
||||
(pen_ref[0] - self.dog_pos[0]) / D,
|
||||
(pen_ref[1] - self.dog_pos[1]) / D,
|
||||
n_active / max(self.n_sheep, 1),
|
||||
], dtype=np.float32)
|
||||
|
||||
sheep_feats = np.zeros((self.MAX_SHEEP, self.OBS_SHEEP), dtype=np.float32)
|
||||
for i in range(self.n_sheep):
|
||||
pos = self.sheep_pos[i]
|
||||
sheep_feats[i] = [
|
||||
(pos[0] - self.dog_pos[0]) / D,
|
||||
(pos[1] - self.dog_pos[1]) / D,
|
||||
(pen_ref[0] - pos[0]) / D,
|
||||
(pen_ref[1] - pos[1]) / D,
|
||||
float(not self.penned[i]),
|
||||
1.0, # is_valid: this sheep exists
|
||||
]
|
||||
# i >= n_sheep: all zeros, is_valid=0 → masked out in attention
|
||||
|
||||
return np.concatenate([global_feats, sheep_feats.ravel()])
|
||||
|
||||
|
||||
# ── Attention features extractor ──────────────────────────────────────────────
|
||||
|
||||
class ShepherdAttentionExtractor(BaseFeaturesExtractor):
|
||||
"""
|
||||
Multi-head self-attention over per-sheep tokens, mean-pooled over valid
|
||||
(non-padding) tokens and concatenated with global dog/pen features.
|
||||
|
||||
After VecNormalize:
|
||||
real sheep → is_valid_norm > 0 (normalised from 1.0)
|
||||
padding → is_valid_norm ≤ 0 (normalised from 0.0)
|
||||
so threshold at 0 is always correct regardless of curriculum stage.
|
||||
"""
|
||||
|
||||
GLOBAL_DIM = HerdingEnvAt.OBS_GLOBAL # 7
|
||||
SHEEP_DIM = HerdingEnvAt.OBS_SHEEP # 6
|
||||
MAX_SHEEP = HerdingEnv.MAX_SHEEP # 10
|
||||
VALID_IDX = 5 # index of is_valid within each token
|
||||
|
||||
def __init__(self, observation_space, embed_dim: int = 64,
|
||||
n_heads: int = 4, n_layers: int = 2, ff_dim: int = 128):
|
||||
super().__init__(observation_space,
|
||||
features_dim=self.GLOBAL_DIM + embed_dim)
|
||||
self.sheep_embed = nn.Linear(self.SHEEP_DIM, embed_dim)
|
||||
encoder_layer = nn.TransformerEncoderLayer(
|
||||
d_model=embed_dim, nhead=n_heads, dim_feedforward=ff_dim,
|
||||
dropout=0.0, batch_first=True,
|
||||
)
|
||||
self.transformer = nn.TransformerEncoder(encoder_layer,
|
||||
num_layers=n_layers,
|
||||
enable_nested_tensor=False)
|
||||
|
||||
def forward(self, obs: torch.Tensor) -> torch.Tensor:
|
||||
B = obs.shape[0]
|
||||
global_feats = obs[:, :self.GLOBAL_DIM] # (B, 7)
|
||||
tokens = obs[:, self.GLOBAL_DIM:].view(
|
||||
B, self.MAX_SHEEP, self.SHEEP_DIM) # (B, 10, 6)
|
||||
|
||||
# is_valid after VecNorm: real > 0, padding ≤ 0
|
||||
is_valid_norm = tokens[:, :, self.VALID_IDX] # (B, 10)
|
||||
key_padding_mask = is_valid_norm <= 0.0 # True → ignore
|
||||
|
||||
x = self.sheep_embed(tokens) # (B, 10, E)
|
||||
x = self.transformer(x, src_key_padding_mask=key_padding_mask)
|
||||
|
||||
valid_w = (is_valid_norm > 0.0).float().unsqueeze(-1) # (B, 10, 1)
|
||||
pooled = (x * valid_w).sum(1) / valid_w.sum(1).clamp(min=1.0)
|
||||
|
||||
return torch.cat([global_feats, pooled], dim=1) # (B, 7+E)
|
||||
|
||||
|
||||
# ── Environment factory ───────────────────────────────────────────────────────
|
||||
|
||||
def make_env_at(n_sheep, seed, max_steps, reward_cfg=None):
|
||||
def _init():
|
||||
env = HerdingEnvAt(n_sheep=n_sheep, max_steps=max_steps,
|
||||
reward_cfg=reward_cfg)
|
||||
env.reset(seed=seed)
|
||||
return env
|
||||
return _init
|
||||
|
||||
|
||||
# ── Evaluation ────────────────────────────────────────────────────────────────
|
||||
|
||||
def evaluate_at(model, vn_template, n_sheep, n_episodes, max_steps,
|
||||
reward_cfg=None):
|
||||
raw = DummyVecEnv([make_env_at(n_sheep, 9999, max_steps, reward_cfg)])
|
||||
vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
|
||||
vn.obs_rms = deepcopy(vn_template.obs_rms)
|
||||
vn.ret_rms = deepcopy(vn_template.ret_rms)
|
||||
|
||||
successes = 0
|
||||
ep_lens, min_pen_list, action_mags = [], [], []
|
||||
failure_counts, rc_sums = {}, {}
|
||||
rc_n = 0
|
||||
|
||||
for _ in range(n_episodes):
|
||||
obs = vn.reset()
|
||||
done = False
|
||||
steps, min_pen = 0, float("inf")
|
||||
mags, ep_radii, ep_com_dists = [], [], []
|
||||
while not done:
|
||||
action, _ = model.predict(obs, deterministic=True)
|
||||
obs, _, dones, infos = vn.step(action)
|
||||
done = dones[0]
|
||||
inner = vn.envs[0]
|
||||
com, radius, _ = inner._flock_stats()
|
||||
min_pen = min(min_pen,
|
||||
float(np.linalg.norm(com - inner.PEN_CENTER)))
|
||||
mags.append(float(np.linalg.norm(action[0])))
|
||||
ep_radii.append(radius)
|
||||
ep_com_dists.append(float(np.linalg.norm(com - inner.PEN_CENTER)))
|
||||
steps += 1
|
||||
rc = infos[0].get("rcomps")
|
||||
if rc:
|
||||
for k, v in rc.items():
|
||||
rc_sums[k] = rc_sums.get(k, 0.0) + v
|
||||
rc_n += 1
|
||||
n_penned = infos[0].get("n_penned", 0)
|
||||
successes += int(n_penned == n_sheep)
|
||||
ep_lens.append(steps)
|
||||
min_pen_list.append(min_pen)
|
||||
action_mags.extend(mags)
|
||||
mode = _classify(ep_radii, ep_com_dists, n_penned, n_sheep)
|
||||
failure_counts[mode] = failure_counts.get(mode, 0) + 1
|
||||
|
||||
vn.close()
|
||||
result = {
|
||||
"sr": successes / n_episodes,
|
||||
"mean_len": float(np.mean(ep_lens)),
|
||||
"mean_min_pen": float(np.mean(min_pen_list)),
|
||||
"mean_act": float(np.mean(action_mags)) if action_mags else 0.0,
|
||||
"failure_modes": failure_counts,
|
||||
}
|
||||
if rc_n > 0:
|
||||
result["reward_per_step"] = {k: v / rc_n for k, v in rc_sums.items()}
|
||||
return result
|
||||
|
||||
|
||||
# ── CLI ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
def parse_args():
|
||||
p = argparse.ArgumentParser(
|
||||
description="PPO + attention training for herding task")
|
||||
p.add_argument("--config", type=str, default=None)
|
||||
p.add_argument("--max-sheep", type=int, default=10)
|
||||
p.add_argument("--steps-per-stage", type=int, default=1_500_000)
|
||||
p.add_argument("--n-envs", type=int, default=8)
|
||||
p.add_argument("--max-steps", type=int, default=2500)
|
||||
p.add_argument("--eval-episodes", type=int, default=30)
|
||||
p.add_argument("--run-dir", type=str, default=None)
|
||||
p.add_argument("--no-gif", action="store_true")
|
||||
p.add_argument("--gif-fps", type=int, default=20)
|
||||
p.add_argument("--gif-skip", type=int, default=3)
|
||||
# Attention architecture
|
||||
p.add_argument("--embed-dim", type=int, default=64,
|
||||
help="Transformer embedding dimension (default 64)")
|
||||
p.add_argument("--n-heads", type=int, default=4,
|
||||
help="Number of attention heads (default 4)")
|
||||
p.add_argument("--n-layers", type=int, default=2,
|
||||
help="Number of transformer encoder layers (default 2)")
|
||||
p.add_argument("--ff-dim", type=int, default=128,
|
||||
help="Transformer feed-forward dim (default 128)")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
cfg = dict(DEFAULT_CONFIG)
|
||||
config_path = args.config
|
||||
if config_path is None and os.path.exists("config.json"):
|
||||
config_path = "config.json"
|
||||
if config_path:
|
||||
with open(config_path) as f:
|
||||
cfg.update(json.load(f))
|
||||
print(f"Config loaded from {config_path}")
|
||||
|
||||
rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
|
||||
|
||||
run_dir = args.run_dir or os.path.join(
|
||||
"runs", "at_" + time.strftime("%Y%m%d_%H%M%S"))
|
||||
eval_dir = os.path.join(run_dir, "eval")
|
||||
os.makedirs(eval_dir, exist_ok=True)
|
||||
with open(os.path.join(run_dir, "config.json"), "w") as f:
|
||||
json.dump(cfg, f, indent=2)
|
||||
|
||||
print(f"Config: {cfg}")
|
||||
print(f"Run dir: {run_dir}")
|
||||
print(f"Curriculum: 1 → {args.max_sheep} sheep, "
|
||||
f"{args.steps_per_stage:,} steps/stage")
|
||||
print(f"Transformer: embed={args.embed_dim} heads={args.n_heads} "
|
||||
f"layers={args.n_layers} ff={args.ff_dim}\n")
|
||||
|
||||
train_env = SubprocVecEnv([
|
||||
make_env_at(1, seed=i, max_steps=args.max_steps, reward_cfg=rcfg)
|
||||
for i in range(args.n_envs)
|
||||
])
|
||||
vn = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.0)
|
||||
|
||||
model = PPO(
|
||||
"MlpPolicy", vn,
|
||||
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
|
||||
gamma=0.995, gae_lambda=0.95, clip_range=0.2,
|
||||
ent_coef=cfg.get("ent_coef", 0.02), vf_coef=0.5, max_grad_norm=0.5,
|
||||
policy_kwargs=dict(
|
||||
features_extractor_class=ShepherdAttentionExtractor,
|
||||
features_extractor_kwargs=dict(
|
||||
embed_dim=args.embed_dim,
|
||||
n_heads=args.n_heads,
|
||||
n_layers=args.n_layers,
|
||||
ff_dim=args.ff_dim,
|
||||
),
|
||||
net_arch=[256, 256],
|
||||
),
|
||||
device="cpu",
|
||||
verbose=0,
|
||||
)
|
||||
|
||||
stage_results = []
|
||||
t0 = time.time()
|
||||
|
||||
try:
|
||||
for n in range(1, args.max_sheep + 1):
|
||||
if n == 1:
|
||||
print(f"\n[Stage n_sheep=1] training {args.steps_per_stage:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=args.steps_per_stage,
|
||||
reset_num_timesteps=True,
|
||||
callback=ProgressCallback("1 sheep", freq=100_000),
|
||||
)
|
||||
else:
|
||||
half = max(1, args.n_envs // 2)
|
||||
mix_steps = args.steps_per_stage // 2
|
||||
full_steps = args.steps_per_stage - mix_steps
|
||||
|
||||
for i in range(half):
|
||||
vn.env_method("set_n_sheep", n - 1, indices=[i])
|
||||
for i in range(half, args.n_envs):
|
||||
vn.env_method("set_n_sheep", n, indices=[i])
|
||||
|
||||
print(f"\n[Stage n_sheep={n}] mixed ({n-1}/{n} sheep) "
|
||||
f"{mix_steps:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=mix_steps,
|
||||
reset_num_timesteps=False,
|
||||
callback=ProgressCallback(f"{n-1}→{n} mix", freq=100_000),
|
||||
)
|
||||
|
||||
vn.env_method("set_n_sheep", n)
|
||||
print(f"[Stage n_sheep={n}] full ({n} sheep) {full_steps:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=full_steps,
|
||||
reset_num_timesteps=False,
|
||||
callback=ProgressCallback(f"{n} sheep", freq=100_000),
|
||||
)
|
||||
|
||||
print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
|
||||
r = evaluate_at(model, vn, n, args.eval_episodes,
|
||||
args.max_steps, rcfg)
|
||||
print(f"[Stage n_sheep={n}] sr={r['sr']*100:.0f}% "
|
||||
f"mean_len={r['mean_len']:.0f} "
|
||||
f"mean_min_pen={r['mean_min_pen']:.1f}m "
|
||||
f"mean_act={r['mean_act']:.2f}")
|
||||
if r["failure_modes"]:
|
||||
modes = " ".join(
|
||||
f"{k}={v}" for k, v in sorted(
|
||||
r["failure_modes"].items(), key=lambda x: -x[1]))
|
||||
print(f" failure modes: {modes}")
|
||||
if "reward_per_step" in r:
|
||||
rps = r["reward_per_step"]
|
||||
print(" reward/step: " + " ".join(
|
||||
f"{k}={v:+.4f}" for k, v in rps.items()))
|
||||
|
||||
hist = run_and_record(
|
||||
model, vn, n, args.max_steps, rcfg,
|
||||
seed=1000 + n, make_env_fn=make_env_at,
|
||||
)
|
||||
tag = "success" if hist["success"] else "fail"
|
||||
plot_trajectory(hist, os.path.join(eval_dir, f"traj_{n}s_{tag}.png"))
|
||||
plot_timeseries(hist, os.path.join(eval_dir, f"ts_{n}s_{tag}.png"))
|
||||
if not args.no_gif:
|
||||
save_episode_gif(
|
||||
hist,
|
||||
os.path.join(eval_dir, f"ep_{n}s_{tag}.gif"),
|
||||
fps=args.gif_fps, skip=args.gif_skip)
|
||||
|
||||
r["n_sheep"] = n
|
||||
stage_results.append(r)
|
||||
|
||||
model.save(os.path.join(run_dir, "final_model"))
|
||||
vn.save(os.path.join(run_dir, "vecnorm.pkl"))
|
||||
with open(os.path.join(run_dir, "stage_results.json"), "w") as f:
|
||||
json.dump(stage_results, f, indent=2)
|
||||
|
||||
finally:
|
||||
try:
|
||||
vn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
elapsed = (time.time() - t0) / 60
|
||||
print("\n" + "=" * 70)
|
||||
print(" TRAINING SUMMARY (attention policy)")
|
||||
print("=" * 70)
|
||||
for r in stage_results:
|
||||
print(f" n_sheep={r['n_sheep']} sr={r['sr']*100:>3.0f}% "
|
||||
f"len={r['mean_len']:>5.0f} "
|
||||
f"min_pen={r['mean_min_pen']:>5.1f}m "
|
||||
f"act={r['mean_act']:.2f}")
|
||||
print(f"\n Total time: {elapsed:.1f} min")
|
||||
print(f" Artefacts: {run_dir}/")
|
||||
plot_success_rate(stage_results, os.path.join(run_dir, "success_rate.png"))
|
||||
print(f" Plots: {run_dir}/success_rate.png, {eval_dir}/")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,267 @@
|
||||
"""Train a PPO shepherd-dog policy on ``HerdingEnv`` with curriculum.
|
||||
|
||||
Defaults to 16 parallel ``SubprocVecEnv`` workers feeding a GPU policy.
|
||||
Saves checkpoints, the best-eval model, and the VecNormalize stats —
|
||||
all three are needed at inference time by the Webots controller.
|
||||
|
||||
Usage::
|
||||
|
||||
python -m training.train_ppo \
|
||||
--config training/configs/ppo_default.yaml \
|
||||
--out-dir training/runs/baseline
|
||||
|
||||
To resume from a checkpoint::
|
||||
|
||||
python -m training.train_ppo --resume training/runs/baseline/checkpoints/ppo_500000_steps.zip
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
import numpy as np
|
||||
import torch as th
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.callbacks import (
|
||||
BaseCallback, CheckpointCallback, EvalCallback,
|
||||
)
|
||||
from stable_baselines3.common.monitor import Monitor
|
||||
from stable_baselines3.common.vec_env import (
|
||||
DummyVecEnv, SubprocVecEnv, VecNormalize,
|
||||
)
|
||||
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Env factories
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
def _make_env(rank: int, seed: int = 0):
|
||||
def _thunk():
|
||||
env = HerdingEnv(seed=seed + rank)
|
||||
env = Monitor(env, info_keywords=("is_success", "n_sheep", "n_penned"))
|
||||
return env
|
||||
return _thunk
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Curriculum callback
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
class CurriculumCallback(BaseCallback):
|
||||
"""Drive the env's flock-size + state-space difficulty curriculum.
|
||||
|
||||
Schedule entries: {step, max_n_sheep, difficulty}. The largest entry
|
||||
whose step <= num_timesteps wins; both knobs update together.
|
||||
"""
|
||||
|
||||
def __init__(self, schedule, vec_envs, verbose: int = 0):
|
||||
super().__init__(verbose)
|
||||
self.schedule = sorted(schedule, key=lambda d: d["step"])
|
||||
# Accept a list of envs so the eval env tracks training difficulty.
|
||||
self.vec_envs = vec_envs if isinstance(vec_envs, (list, tuple)) else [vec_envs]
|
||||
self._last_n = None
|
||||
self._last_d = None
|
||||
|
||||
def _call(self, method, value):
|
||||
for v in self.vec_envs:
|
||||
try:
|
||||
v.env_method(method, value)
|
||||
except AttributeError:
|
||||
v.venv.env_method(method, value)
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
t = self.num_timesteps
|
||||
n = self.schedule[0]["max_n_sheep"]
|
||||
d = self.schedule[0].get("difficulty", 1.0)
|
||||
for entry in self.schedule:
|
||||
if t >= entry["step"]:
|
||||
n = entry["max_n_sheep"]
|
||||
d = entry.get("difficulty", 1.0)
|
||||
if n != self._last_n:
|
||||
self._call("set_max_n_sheep", n)
|
||||
self._last_n = n
|
||||
if d != self._last_d:
|
||||
self._call("set_difficulty", d)
|
||||
self._last_d = d
|
||||
if self.verbose:
|
||||
print(f"[curriculum] t={t} → max_n_sheep={n} difficulty={d}")
|
||||
return True
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Main
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--config", default=os.path.join(_HERE, "configs", "ppo_default.yaml"))
|
||||
parser.add_argument("--out-dir", default=os.path.join(_HERE, "runs", "latest"))
|
||||
parser.add_argument("--n-envs", type=int, default=None,
|
||||
help="Override config n_envs.")
|
||||
parser.add_argument("--total-timesteps", type=int, default=None,
|
||||
help="Override config total_timesteps.")
|
||||
parser.add_argument("--seed", type=int, default=0)
|
||||
parser.add_argument("--resume", type=str, default=None,
|
||||
help="Path to a SB3 zip to resume from.")
|
||||
# SB3 recommends CPU for MlpPolicy — GPU helps CNN policies, not MLPs
|
||||
# of this size. Override with --device cuda if you really want it.
|
||||
parser.add_argument("--device", default="cpu")
|
||||
parser.add_argument("--no-vecnorm", action="store_true",
|
||||
help="Disable VecNormalize wrapper. Required when "
|
||||
"resuming from a BC-pretrained policy that "
|
||||
"wasn't trained under it.")
|
||||
parser.add_argument("--no-curriculum", action="store_true",
|
||||
help="Skip curriculum callback (resumed policy is "
|
||||
"already competent across the distribution).")
|
||||
parser.add_argument("--imitate-weight", type=float, default=None,
|
||||
help="Override env W_IMITATE. Set to 0 to disable "
|
||||
"Strömbom imitation reward.")
|
||||
parser.add_argument("--difficulty", type=float, default=None,
|
||||
help="Override env difficulty (0=easy, 1=hard). "
|
||||
"Used in BC fine-tune to skip easy curriculum.")
|
||||
parser.add_argument("--log-std", type=float, default=None,
|
||||
help="Override the policy's log_std after load. "
|
||||
"BC trained with std≈1.6 (log_std=0.5) which "
|
||||
"is too noisy for fine-tune. Use -1.5 (std≈0.22) "
|
||||
"to keep PPO close to the BC mean while still "
|
||||
"exploring locally.")
|
||||
parser.add_argument("--learning-rate", type=float, default=None,
|
||||
help="Override config learning rate. For BC "
|
||||
"fine-tune, 5e-5 is much safer than the 3e-4 "
|
||||
"default.")
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.config) as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
n_envs = args.n_envs or cfg["n_envs"]
|
||||
total_timesteps = args.total_timesteps or cfg["total_timesteps"]
|
||||
|
||||
out = Path(args.out_dir)
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
(out / "checkpoints").mkdir(exist_ok=True)
|
||||
(out / "best").mkdir(exist_ok=True)
|
||||
(out / "evals").mkdir(exist_ok=True)
|
||||
|
||||
print(f"[train] out={out} n_envs={n_envs} total={total_timesteps} device={args.device}")
|
||||
|
||||
# --- Train env (vectorised, optionally normalised) ---
|
||||
env_fns = [_make_env(i, seed=args.seed) for i in range(n_envs)]
|
||||
venv = SubprocVecEnv(env_fns) if n_envs > 1 else DummyVecEnv(env_fns)
|
||||
eval_venv = DummyVecEnv([_make_env(99, seed=args.seed + 999)])
|
||||
if not args.no_vecnorm:
|
||||
venv = VecNormalize(venv, norm_obs=True, norm_reward=False, clip_obs=10.0)
|
||||
eval_venv = VecNormalize(eval_venv, norm_obs=True, norm_reward=False,
|
||||
clip_obs=10.0, training=False)
|
||||
eval_venv.obs_rms = venv.obs_rms
|
||||
else:
|
||||
print("[train] VecNormalize disabled (resumed policy was trained without it).")
|
||||
|
||||
# Apply env-level overrides (used by BC fine-tune to disable Strömbom
|
||||
# imitation and start at full deployment difficulty).
|
||||
def _env_call(method, value):
|
||||
for v in (venv, eval_venv):
|
||||
try:
|
||||
v.env_method(method, value)
|
||||
except AttributeError:
|
||||
v.venv.env_method(method, value)
|
||||
|
||||
if args.imitate_weight is not None:
|
||||
_env_call("set_imitate_weight", args.imitate_weight)
|
||||
print(f"[train] W_IMITATE overridden to {args.imitate_weight}")
|
||||
if args.difficulty is not None:
|
||||
_env_call("set_difficulty", args.difficulty)
|
||||
print(f"[train] difficulty pinned to {args.difficulty}")
|
||||
|
||||
# --- Model ---
|
||||
policy_kwargs = dict(
|
||||
net_arch=dict(pi=cfg["net_arch_pi"], vf=cfg["net_arch_vf"]),
|
||||
log_std_init=cfg.get("log_std_init", 0.0),
|
||||
)
|
||||
|
||||
if args.resume:
|
||||
print(f"[train] resuming from {args.resume}")
|
||||
custom_objects = {}
|
||||
if args.learning_rate is not None:
|
||||
custom_objects["learning_rate"] = args.learning_rate
|
||||
model = PPO.load(args.resume, env=venv, device=args.device,
|
||||
tensorboard_log=str(out / "tb"),
|
||||
custom_objects=custom_objects or None)
|
||||
if args.log_std is not None:
|
||||
import torch as _th
|
||||
with _th.no_grad():
|
||||
model.policy.log_std.fill_(args.log_std)
|
||||
print(f"[train] log_std overridden to {args.log_std} "
|
||||
f"(std≈{2.71828 ** args.log_std:.2f})")
|
||||
if args.learning_rate is not None:
|
||||
print(f"[train] learning_rate overridden to {args.learning_rate}")
|
||||
else:
|
||||
model = PPO(
|
||||
cfg["policy"], venv,
|
||||
learning_rate=cfg["learning_rate"],
|
||||
n_steps=cfg["n_steps"],
|
||||
batch_size=cfg["batch_size"],
|
||||
n_epochs=cfg["n_epochs"],
|
||||
gamma=cfg["gamma"],
|
||||
gae_lambda=cfg["gae_lambda"],
|
||||
clip_range=cfg["clip_range"],
|
||||
ent_coef=cfg["ent_coef"],
|
||||
vf_coef=cfg["vf_coef"],
|
||||
max_grad_norm=cfg["max_grad_norm"],
|
||||
target_kl=cfg.get("target_kl"),
|
||||
policy_kwargs=policy_kwargs,
|
||||
tensorboard_log=str(out / "tb"),
|
||||
seed=args.seed,
|
||||
device=args.device,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
# --- Callbacks ---
|
||||
ckpt_cb = CheckpointCallback(
|
||||
save_freq=max(1, cfg["checkpoint_freq"] // n_envs),
|
||||
save_path=str(out / "checkpoints"), name_prefix="ppo",
|
||||
save_vecnormalize=True,
|
||||
)
|
||||
eval_cb = EvalCallback(
|
||||
eval_venv,
|
||||
best_model_save_path=str(out / "best"),
|
||||
log_path=str(out / "evals"),
|
||||
eval_freq=max(1, cfg["eval_freq"] // n_envs),
|
||||
n_eval_episodes=cfg["n_eval_episodes"],
|
||||
deterministic=True,
|
||||
)
|
||||
callbacks = [ckpt_cb, eval_cb]
|
||||
if not args.no_curriculum and "curriculum" in cfg and cfg["curriculum"]:
|
||||
callbacks.append(CurriculumCallback(
|
||||
cfg["curriculum"], [venv, eval_venv], verbose=1,
|
||||
))
|
||||
elif args.no_curriculum:
|
||||
print("[train] curriculum disabled — env knobs left at their current values.")
|
||||
|
||||
# --- Train ---
|
||||
model.learn(total_timesteps=total_timesteps, callback=callbacks,
|
||||
progress_bar=True)
|
||||
|
||||
# --- Save final model + VecNormalize stats ---
|
||||
model.save(out / "final.zip")
|
||||
venv.save(str(out / "vecnormalize.pkl"))
|
||||
# The EvalCallback already wrote best_model.zip into out/best/ — drop the
|
||||
# VecNormalize stats next to it for the controller to pick up.
|
||||
venv.save(str(out / "best" / "vecnormalize.pkl"))
|
||||
print(f"[train] done. saved to {out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
-342
@@ -1,342 +0,0 @@
|
||||
"""
|
||||
All visualization for the herding policy: trajectory plots, timeseries plots,
|
||||
success-rate bar chart, and animated GIFs.
|
||||
|
||||
Used both by train.py (auto-rendered after each curriculum stage) and as a CLI
|
||||
to render a fresh episode against a saved model.
|
||||
|
||||
CLI usage:
|
||||
python viz.py --run-dir runs/v1 --n-sheep 5
|
||||
python viz.py --run-dir runs/v1 --n-sheep 10 --no-gif
|
||||
python viz.py --model runs/v1/final_model.zip --vecnorm runs/v1/vecnorm.pkl \\
|
||||
--n-sheep 3 --out-dir vis_v1_3sheep
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
from copy import deepcopy
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as mpatches
|
||||
import matplotlib.animation as animation
|
||||
from matplotlib.collections import LineCollection
|
||||
import numpy as np
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
|
||||
|
||||
from herding_env import HerdingEnv
|
||||
|
||||
|
||||
# ── Palette ──────────────────────────────────────────────────────────────────
|
||||
|
||||
SHEEP_COLORS = [
|
||||
"#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
|
||||
"#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
|
||||
]
|
||||
DOG_COLOR = "#4e342e"
|
||||
|
||||
|
||||
# ── Common drawing primitives ────────────────────────────────────────────────
|
||||
|
||||
def draw_field(ax):
|
||||
ax.set_xlim(-16, 16)
|
||||
ax.set_ylim(-16, 16)
|
||||
ax.set_aspect("equal")
|
||||
ax.set_facecolor("#dcedc8")
|
||||
ax.add_patch(mpatches.Rectangle(
|
||||
(-15, -15), 30, 30, fill=False, edgecolor="#795548", lw=2))
|
||||
ax.add_patch(mpatches.Rectangle(
|
||||
(10, -15), 3, 7, facecolor="#ffe082", edgecolor="#795548", lw=2))
|
||||
ax.text(11.5, -11.5, "pen", ha="center", va="center",
|
||||
fontsize=8, color="#795548")
|
||||
|
||||
|
||||
def faded_path(ax, xs, ys, color, lw=1.5, label=None):
|
||||
n = len(xs)
|
||||
if n < 2:
|
||||
return
|
||||
points = np.array([xs, ys]).T.reshape(-1, 1, 2)
|
||||
segs = np.concatenate([points[:-1], points[1:]], axis=1)
|
||||
alphas = np.linspace(0.15, 1.0, len(segs))
|
||||
colors = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
|
||||
ax.add_collection(LineCollection(segs, colors=colors, linewidth=lw))
|
||||
if label:
|
||||
ax.plot([], [], color=color, lw=lw, label=label)
|
||||
|
||||
|
||||
# ── Episode rollout ──────────────────────────────────────────────────────────
|
||||
|
||||
def make_eval_env(n_sheep, seed, max_steps, reward_cfg=None):
|
||||
def _init():
|
||||
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
||||
reward_cfg=reward_cfg)
|
||||
env.reset(seed=seed)
|
||||
return env
|
||||
return _init
|
||||
|
||||
|
||||
def run_and_record(model, vn_template, n_sheep, max_steps,
|
||||
reward_cfg=None, seed=42, make_env_fn=None):
|
||||
"""Run one deterministic episode and return full trajectory history."""
|
||||
_factory = make_env_fn or make_eval_env
|
||||
raw = DummyVecEnv([_factory(n_sheep, seed, max_steps, reward_cfg)])
|
||||
vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
|
||||
vn.obs_rms = deepcopy(vn_template.obs_rms)
|
||||
vn.ret_rms = deepcopy(vn_template.ret_rms)
|
||||
|
||||
obs = vn.reset()
|
||||
inner = vn.envs[0]
|
||||
done = False
|
||||
|
||||
dog_xs, dog_ys = [], []
|
||||
sheep_xs = [[] for _ in range(n_sheep)]
|
||||
sheep_ys = [[] for _ in range(n_sheep)]
|
||||
sheep_penned = [[] for _ in range(n_sheep)]
|
||||
radii = []
|
||||
pen_dists = [[] for _ in range(n_sheep)]
|
||||
action_mags = []
|
||||
rewards = []
|
||||
penned_at = [None] * n_sheep
|
||||
step = 0
|
||||
|
||||
while not done:
|
||||
action, _ = model.predict(obs, deterministic=True)
|
||||
obs, reward, dones, infos = vn.step(action)
|
||||
done = dones[0]
|
||||
step += 1
|
||||
|
||||
dog_xs.append(float(inner.dog_pos[0]))
|
||||
dog_ys.append(float(inner.dog_pos[1]))
|
||||
com, radius, _ = inner._flock_stats()
|
||||
radii.append(radius)
|
||||
rewards.append(float(reward[0]))
|
||||
action_mags.append(float(np.linalg.norm(action[0])))
|
||||
for i in range(n_sheep):
|
||||
sheep_xs[i].append(float(inner.sheep_pos[i][0]))
|
||||
sheep_ys[i].append(float(inner.sheep_pos[i][1]))
|
||||
sheep_penned[i].append(bool(inner.penned[i]))
|
||||
pen_dists[i].append(
|
||||
float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
|
||||
if inner.penned[i] and penned_at[i] is None:
|
||||
penned_at[i] = step
|
||||
|
||||
n_penned = infos[0].get("n_penned", 0)
|
||||
vn.close()
|
||||
|
||||
return dict(
|
||||
dog_xs=dog_xs, dog_ys=dog_ys,
|
||||
sheep_xs=sheep_xs, sheep_ys=sheep_ys,
|
||||
sheep_penned=sheep_penned,
|
||||
radii=radii, pen_dists=pen_dists,
|
||||
action_mags=action_mags, rewards=rewards,
|
||||
penned_at=penned_at,
|
||||
n_penned=n_penned, n_sheep=n_sheep,
|
||||
success=n_penned == n_sheep, steps=step,
|
||||
)
|
||||
|
||||
|
||||
# ── Static plots ─────────────────────────────────────────────────────────────
|
||||
|
||||
def plot_trajectory(hist, out_path):
|
||||
fig, ax = plt.subplots(figsize=(7, 7))
|
||||
draw_field(ax)
|
||||
for i in range(hist["n_sheep"]):
|
||||
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
|
||||
xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
|
||||
faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
|
||||
ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
|
||||
end = hist["penned_at"][i] if hist["penned_at"][i] is not None else -1
|
||||
ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
|
||||
faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0,
|
||||
label="dog")
|
||||
ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR,
|
||||
ms=10, zorder=5)
|
||||
ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR,
|
||||
ms=10, zorder=5)
|
||||
result = ("SUCCESS" if hist["success"]
|
||||
else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
|
||||
ax.set_title(f"n={hist['n_sheep']} {result} {hist['steps']} steps",
|
||||
fontsize=12)
|
||||
ax.legend(loc="upper left", fontsize=8)
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_timeseries(hist, out_path):
|
||||
t = np.arange(hist["steps"])
|
||||
fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
|
||||
|
||||
axes[0].plot(t, hist["radii"], color="steelblue")
|
||||
axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact (5m)")
|
||||
axes[0].set_ylabel("flock radius (m)")
|
||||
axes[0].legend(fontsize=8)
|
||||
axes[0].set_title("Flock radius")
|
||||
|
||||
for i in range(hist["n_sheep"]):
|
||||
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
|
||||
axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1,
|
||||
label=f"sheep {i+1}")
|
||||
if hist["penned_at"][i] is not None:
|
||||
axes[1].axvline(hist["penned_at"][i], color=c, ls=":", lw=1)
|
||||
axes[1].set_ylabel("dist to pen (m)")
|
||||
axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
|
||||
axes[1].set_title("Per-sheep distance to pen")
|
||||
|
||||
axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
|
||||
axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
|
||||
axes[2].set_ylabel("action ||(vx,vy)||")
|
||||
axes[2].set_ylim(0, 1.5)
|
||||
axes[2].set_title("Dog action magnitude")
|
||||
axes[2].legend(fontsize=8)
|
||||
|
||||
axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
|
||||
axes[3].axhline(0, color="black", lw=0.5)
|
||||
axes[3].set_ylabel("reward")
|
||||
axes[3].set_xlabel("step")
|
||||
axes[3].set_title("Reward per step")
|
||||
|
||||
result = ("SUCCESS" if hist["success"]
|
||||
else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
|
||||
fig.suptitle(f"n_sheep={hist['n_sheep']} {result} {hist['steps']} steps",
|
||||
fontsize=13)
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def plot_success_rate(stage_results, out_path):
|
||||
fig, ax = plt.subplots(figsize=(8, 4))
|
||||
ns = [r["n_sheep"] for r in stage_results]
|
||||
srs = [r["sr"] * 100 for r in stage_results]
|
||||
bars = ax.bar(ns, srs, color="steelblue", edgecolor="white")
|
||||
ax.set_xlabel("Sheep count")
|
||||
ax.set_ylabel("Success rate (%)")
|
||||
ax.set_ylim(0, 105)
|
||||
ax.axhline(90, color="orange", ls="--", lw=1, label="90% target")
|
||||
for bar, sr in zip(bars, srs):
|
||||
ax.text(bar.get_x() + bar.get_width() / 2,
|
||||
bar.get_height() + 1, f"{sr:.0f}%",
|
||||
ha="center", fontsize=9)
|
||||
ax.legend()
|
||||
ax.set_title("Evaluation success rate per sheep count")
|
||||
plt.tight_layout()
|
||||
fig.savefig(out_path, dpi=120)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
# ── Animated GIF ─────────────────────────────────────────────────────────────
|
||||
|
||||
def save_episode_gif(hist, out_path, fps=20, skip=3):
|
||||
"""Render hist as an animated GIF. `skip` keeps every Nth frame (smaller file)."""
|
||||
n_sheep = hist["n_sheep"]
|
||||
frames = list(range(0, hist["steps"], max(1, skip)))
|
||||
if frames[-1] != hist["steps"] - 1:
|
||||
frames.append(hist["steps"] - 1)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(6, 6))
|
||||
draw_field(ax)
|
||||
title = ax.text(0, 16.5, "", ha="center", fontsize=11)
|
||||
dog_marker, = ax.plot([], [], "s", color=DOG_COLOR, ms=12,
|
||||
markeredgecolor="black", markeredgewidth=1.5,
|
||||
zorder=5)
|
||||
sheep_markers = []
|
||||
for i in range(n_sheep):
|
||||
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
|
||||
m, = ax.plot([], [], "o", color=c, ms=10,
|
||||
markeredgecolor="#333", markeredgewidth=1, zorder=4)
|
||||
sheep_markers.append(m)
|
||||
dog_trail, = ax.plot([], [], color=DOG_COLOR, lw=1.0, alpha=0.5)
|
||||
|
||||
def update(k):
|
||||
title.set_text(
|
||||
f"n={n_sheep} step {k+1}/{hist['steps']} "
|
||||
f"penned {sum(hist['sheep_penned'][i][k] for i in range(n_sheep))}/{n_sheep}")
|
||||
dog_marker.set_data([hist["dog_xs"][k]], [hist["dog_ys"][k]])
|
||||
dog_trail.set_data(hist["dog_xs"][:k+1], hist["dog_ys"][:k+1])
|
||||
for i, m in enumerate(sheep_markers):
|
||||
m.set_data([hist["sheep_xs"][i][k]], [hist["sheep_ys"][i][k]])
|
||||
penned = hist["sheep_penned"][i][k]
|
||||
m.set_color("deeppink" if penned else SHEEP_COLORS[i % len(SHEEP_COLORS)])
|
||||
return [title, dog_marker, dog_trail, *sheep_markers]
|
||||
|
||||
anim = animation.FuncAnimation(
|
||||
fig, update, frames=frames, interval=1000 / fps, blit=False)
|
||||
anim.save(out_path, writer=animation.PillowWriter(fps=fps), dpi=80)
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _resolve_paths(args):
|
||||
if args.run_dir:
|
||||
model_path = os.path.join(args.run_dir, "final_model.zip")
|
||||
vn_path = os.path.join(args.run_dir, "vecnorm.pkl")
|
||||
cfg_path = os.path.join(args.run_dir, "config.json")
|
||||
else:
|
||||
model_path = args.model
|
||||
vn_path = args.vecnorm
|
||||
cfg_path = args.config
|
||||
return model_path, vn_path, cfg_path
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(
|
||||
description="Render trajectory + timeseries + GIF for a saved policy.")
|
||||
p.add_argument("--run-dir", type=str, default=None,
|
||||
help="Run directory containing final_model.zip + vecnorm.pkl + config.json")
|
||||
p.add_argument("--model", type=str, default=None)
|
||||
p.add_argument("--vecnorm", type=str, default=None)
|
||||
p.add_argument("--config", type=str, default=None)
|
||||
p.add_argument("--n-sheep", type=int, default=3)
|
||||
p.add_argument("--seed", type=int, default=42)
|
||||
p.add_argument("--max-steps", type=int, default=2500)
|
||||
p.add_argument("--out-dir", type=str, default=None)
|
||||
p.add_argument("--no-gif", action="store_true",
|
||||
help="Skip the animated GIF (PNG-only is faster).")
|
||||
p.add_argument("--gif-fps", type=int, default=20)
|
||||
p.add_argument("--gif-skip", type=int, default=3)
|
||||
args = p.parse_args()
|
||||
|
||||
model_path, vn_path, cfg_path = _resolve_paths(args)
|
||||
if not (model_path and vn_path):
|
||||
p.error("either --run-dir or both --model and --vecnorm are required")
|
||||
|
||||
rcfg = None
|
||||
if cfg_path and os.path.exists(cfg_path):
|
||||
with open(cfg_path) as f:
|
||||
cfg = json.load(f)
|
||||
rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
|
||||
|
||||
out_dir = args.out_dir or os.path.join(
|
||||
os.path.dirname(os.path.abspath(model_path)),
|
||||
f"vis_{args.n_sheep}s")
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
print(f"Loading model: {model_path}")
|
||||
print(f"Loading vecnorm: {vn_path}")
|
||||
model = PPO.load(model_path, device="cpu")
|
||||
|
||||
raw = DummyVecEnv([make_eval_env(args.n_sheep, args.seed, args.max_steps, rcfg)])
|
||||
vn = VecNormalize.load(vn_path, raw)
|
||||
|
||||
print(f"Rolling out n_sheep={args.n_sheep} (seed={args.seed})...")
|
||||
hist = run_and_record(model, vn, args.n_sheep, args.max_steps,
|
||||
reward_cfg=rcfg, seed=args.seed)
|
||||
result = "SUCCESS" if hist["success"] else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})"
|
||||
print(f" {result} in {hist['steps']} steps")
|
||||
|
||||
plot_trajectory(hist, os.path.join(out_dir, "trajectory.png"))
|
||||
plot_timeseries(hist, os.path.join(out_dir, "timeseries.png"))
|
||||
print(f" saved trajectory.png + timeseries.png to {out_dir}/")
|
||||
if not args.no_gif:
|
||||
gif_path = os.path.join(out_dir, "episode.gif")
|
||||
print(f" rendering GIF (fps={args.gif_fps}, skip={args.gif_skip})...")
|
||||
save_episode_gif(hist, gif_path, fps=args.gif_fps, skip=args.gif_skip)
|
||||
print(f" saved {gif_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,5 +1,5 @@
|
||||
Webots Project File version R2025a
|
||||
perspectives: 000000ff00000000fd00000002000000010000011c00000298fc0200000001fb0000001400540065007800740045006400690074006f00720100000000000002980000003f00ffffff000000030000084300000238fc0100000001fb0000001a0043006f006e0073006f006c00650041006c006c0041006c006c0100000000000008430000006900ffffff000007250000029800000001000000020000000100000008fc00000000
|
||||
perspectives: 000000ff00000000fd00000002000000010000011c000001bcfc0200000001fb0000001400540065007800740045006400690074006f00720100000000000001bc0000003f00ffffff00000003000005c600000220fc0100000001fb0000001a0043006f006e0073006f006c00650041006c006c0041006c006c0100000000000005c60000006900ffffff000004a8000001bc00000001000000020000000100000008fc00000000
|
||||
simulationViewPerspectives: 000000ff000000010000000200000100000006250100000002010000000100
|
||||
sceneTreePerspectives: 000000ff00000001000000030000001f000000c0000000000100000002010000000200
|
||||
maximizedDockId: -1
|
||||
|
||||
+69
-63
@@ -10,7 +10,7 @@ EXTERNPROTO "../protos/Sheep.proto"
|
||||
# World
|
||||
WorldInfo {
|
||||
info [
|
||||
"RL-Based Autonomous Shepherd Robot"
|
||||
"Autonomous Shepherd Robot (Strömbom)"
|
||||
"Group G25"
|
||||
]
|
||||
title "Shepherd Herding"
|
||||
@@ -106,19 +106,26 @@ Solid { translation -2.5 -15 0.84 children [ Shape { appearance USE CAP geometry
|
||||
Solid { translation 14 -15 0.40 children [ Shape { appearance USE STONE_A geometry Box { size 2.0 0.16 0.80 } } ] boundingObject Box { size 2.0 0.16 0.80 } }
|
||||
Solid { translation 14 -15 0.84 children [ Shape { appearance USE CAP geometry Box { size 2.1 0.26 0.07 } } ] boundingObject Box { size 2.1 0.26 0.07 } }
|
||||
# Gate posts
|
||||
Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
||||
Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
||||
# Outer gate (wooden, slightly ajar, Z-brace)
|
||||
Solid { translation 11.5 -15.08 0.55 rotation 0 0 1 0.25 children [
|
||||
Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||
Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||
# Outer gate — fully open, hinged on the west gate post. Modeled as a swung-back
|
||||
# wooden gate parallel to the south wall, on the west side, so the 3m corridor
|
||||
# between gate posts (x=10..13, y=-15) is unobstructed.
|
||||
Solid { translation 8.6 -15.05 0.55 rotation 0 0 1 0 children [
|
||||
Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
|
||||
Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [ Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } } ] }
|
||||
# FPOST appearance DEF lives here so the external pen below can USE it.
|
||||
Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [
|
||||
Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } }
|
||||
] }
|
||||
] boundingObject Box { size 2.80 0.08 1.00 } }
|
||||
|
||||
# ==================== QUARANTINE PEN (wooden post-and-rail fence, inside field) ====================
|
||||
# Flow: main field → inner gate → quarantine area → outer gate → outside
|
||||
# ==================== EXTERNAL PEN (south of field, accessed through south-wall gate) ====================
|
||||
# Flow: main field → south-wall gate (x ∈ [10, 13], y = -15) → external pen
|
||||
# The pen is a wooden post-and-rail rectangle south of the field, x ∈ [10, 13],
|
||||
# y ∈ [-22, -15], open on the north side (the gate hole is the entrance).
|
||||
|
||||
# West wall (x=10, ~7m along Y)
|
||||
Solid { translation 10 -11.46 0.55 children [
|
||||
# Pen west wall (x=10, y from -22 to -15, length 7m)
|
||||
Solid { translation 10 -18.5 0.55 children [
|
||||
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
@@ -130,8 +137,8 @@ Solid { translation 10 -11.46 0.55 children [
|
||||
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
||||
] boundingObject Box { size 0.14 6.92 1.10 } }
|
||||
|
||||
# East wall (x=13)
|
||||
Solid { translation 13 -11.46 0.55 children [
|
||||
# Pen east wall (x=13, y from -22 to -15, length 7m)
|
||||
Solid { translation 13 -18.5 0.55 children [
|
||||
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
@@ -143,39 +150,50 @@ Solid { translation 13 -11.46 0.55 children [
|
||||
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
||||
] boundingObject Box { size 0.14 6.92 1.10 } }
|
||||
|
||||
# North wall - open entrance (no wall, just corner posts)
|
||||
Solid { translation 10 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
|
||||
Solid { translation 13 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
|
||||
# Pen south wall (y=-22, x from 10 to 13, length 3m, closes the back of the pen)
|
||||
Solid { translation 11.5 -22 0.55 children [
|
||||
Transform { translation -1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Transform { translation 1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
|
||||
Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
|
||||
Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
|
||||
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 2.92 0.14 0.04 } } ] }
|
||||
] boundingObject Box { size 2.92 0.14 1.10 } }
|
||||
|
||||
# Pen north corner posts at the gate opening (no wall — sheep enter here from the field)
|
||||
Solid { translation 10 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
Solid { translation 13 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||
|
||||
# Corner pillars
|
||||
Solid { translation 15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
||||
Solid { translation 15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
||||
Solid { translation -15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
||||
Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
||||
Solid { translation 15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||
Solid { translation 15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||
Solid { translation -15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||
Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||
|
||||
# Mid-pillars every 5 m — East
|
||||
Solid { translation 15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation 15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation 15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation 15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation 15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
# West
|
||||
Solid { translation -15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
# North
|
||||
Solid { translation 10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 0 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation 5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation 0 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
# South
|
||||
Solid { translation 5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
||||
Solid { translation 5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation 0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||
|
||||
# ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
|
||||
# Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
|
||||
@@ -503,28 +521,16 @@ ShepherdDog {
|
||||
}
|
||||
|
||||
# ==================== SHEEP ====================
|
||||
Sheep {
|
||||
translation 3 2 0.5
|
||||
name "sheep1"
|
||||
controller "sheep"
|
||||
}
|
||||
Sheep {
|
||||
translation 3 -2 0.5
|
||||
name "sheep2"
|
||||
controller "sheep"
|
||||
}
|
||||
Sheep {
|
||||
translation 4 0 0.5
|
||||
name "sheep3"
|
||||
controller "sheep"
|
||||
}
|
||||
# Sheep {
|
||||
# translation 3.5 1 0.5
|
||||
# name "sheep4"
|
||||
# controller "sheep"
|
||||
# }
|
||||
# Sheep {
|
||||
# translation 3.5 -1 0.5
|
||||
# name "sheep5"
|
||||
# controller "sheep"
|
||||
# }
|
||||
# Up to 10 sheep, scattered through the field's central/north zone. Comment
|
||||
# out trailing slots to test smaller flock sizes; the dog policy is trained
|
||||
# to handle 1..10 sheep so any prefix works.
|
||||
Sheep { translation 3.0 2.0 0.5 name "sheep1" controller "sheep" }
|
||||
Sheep { translation 3.0 -2.0 0.5 name "sheep2" controller "sheep" }
|
||||
Sheep { translation 4.0 0.0 0.5 name "sheep3" controller "sheep" }
|
||||
Sheep { translation -3.0 4.0 0.5 name "sheep4" controller "sheep" }
|
||||
Sheep { translation -5.0 -2.0 0.5 name "sheep5" controller "sheep" }
|
||||
Sheep { translation 6.0 5.0 0.5 name "sheep6" controller "sheep" }
|
||||
Sheep { translation -6.0 6.0 0.5 name "sheep7" controller "sheep" }
|
||||
Sheep { translation 0.0 8.0 0.5 name "sheep8" controller "sheep" }
|
||||
Sheep { translation -8.0 0.0 0.5 name "sheep9" controller "sheep" }
|
||||
Sheep { translation 7.0 -4.0 0.5 name "sheep10" controller "sheep" }
|
||||
|
||||
Reference in New Issue
Block a user