Checkpoint 2
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
"""Shared core for the shepherd herding project.
|
||||
|
||||
This package is the single source of truth for world geometry, sheep
|
||||
flocking dynamics, differential-drive kinematics, observation building,
|
||||
and the Strömbom heuristic. It is imported both by the Webots
|
||||
controllers (for inference) and by the Gymnasium training environment
|
||||
(for fast PPO rollouts), so the two paths cannot drift apart.
|
||||
"""
|
||||
@@ -0,0 +1,70 @@
|
||||
"""Differential-drive kinematics matching the Webots robot specs.
|
||||
|
||||
The Webots controllers and the training env both use these helpers so the
|
||||
sim and the real (Webots) physics agree to first order. They do not model
|
||||
slip, wheel acceleration limits, or contact forces — Webots does that for
|
||||
us at inference time. The training env has to be close enough that a
|
||||
policy trained against this kinematic model still works when handed off
|
||||
to ODE physics.
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
|
||||
def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt):
|
||||
"""Integrate one step of differential-drive forward kinematics.
|
||||
|
||||
Inputs
|
||||
------
|
||||
x, y : robot position (m)
|
||||
h : robot heading (rad), 0 = +x axis
|
||||
w_left, w_right : wheel angular velocities (rad/s)
|
||||
wheel_radius, wheel_base : robot dimensions (m)
|
||||
dt : timestep (s)
|
||||
|
||||
Returns (new_x, new_y, new_h).
|
||||
"""
|
||||
v = (w_right + w_left) * wheel_radius * 0.5
|
||||
omega = (w_right - w_left) * wheel_radius / wheel_base
|
||||
new_x = x + v * math.cos(h) * dt
|
||||
new_y = y + v * math.sin(h) * dt
|
||||
new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
|
||||
return new_x, new_y, new_h
|
||||
|
||||
|
||||
def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
|
||||
k_turn=4.0):
|
||||
"""Convert a desired (vx, vy) intent in [-1, 1]^2 to wheel speeds.
|
||||
|
||||
Mirrors ``drive_action`` in controllers/shepherd_dog/shepherd_dog.py:
|
||||
forward speed scales by ``cos(err)`` (clamped to ±90°), and a P
|
||||
controller on heading error contributes the wheel-rate differential.
|
||||
"""
|
||||
speed_ms = math.hypot(vx, vy) * max_linear
|
||||
if speed_ms < 1e-3:
|
||||
return 0.0, 0.0
|
||||
target_h = math.atan2(vy, vx)
|
||||
err = math.atan2(math.sin(target_h - h), math.cos(target_h - h))
|
||||
clamped_err = max(-math.pi / 2, min(math.pi / 2, err))
|
||||
fwd_ms = speed_ms * math.cos(clamped_err)
|
||||
fwd_rad = fwd_ms / wheel_radius
|
||||
turn = k_turn * err
|
||||
left = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad - turn))
|
||||
right = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad + turn))
|
||||
return left, right
|
||||
|
||||
|
||||
def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
|
||||
k_turn=4.0):
|
||||
"""Sheep variant: speed already expressed in motor (wheel rad/s) units.
|
||||
|
||||
Matches the existing sheep controller (``controllers/sheep/sheep.py``)
|
||||
where ``speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))`` and
|
||||
these constants are wheel angular velocities, not linear m/s.
|
||||
"""
|
||||
err = math.atan2(math.sin(heading - h), math.cos(heading - h))
|
||||
fwd = max(0.0, math.cos(err)) * speed_motor
|
||||
turn = k_turn * err
|
||||
left = max(-max_wheel_omega, min(max_wheel_omega, fwd - turn))
|
||||
right = max(-max_wheel_omega, min(max_wheel_omega, fwd + turn))
|
||||
return left, right
|
||||
@@ -0,0 +1,178 @@
|
||||
"""Reynolds-style sheep flocking dynamics.
|
||||
|
||||
This is the per-sheep behavioural step used both by the Webots sheep
|
||||
controller (scalar, one sheep at a time) and by the training environment
|
||||
(loop over sheep). The numerics are adapted from the original
|
||||
``controllers/sheep/flocking.py`` and retuned for the new external-pen
|
||||
layout: the south stone wall is intact except in the gate column, so
|
||||
sheep can only reach the pen by walking through that 3-m corridor.
|
||||
|
||||
Force stack each step (summed → heading + speed):
|
||||
flee — quadratic ramp away from dog within FLEE_DIST
|
||||
cohesion — drift toward flock centre, halved while fleeing
|
||||
separation — inverse-distance push from peers
|
||||
walls — soft repulsion + hard escape band against field walls,
|
||||
except inside the gate column where the south wall is
|
||||
absent
|
||||
wander — small persistent drift for natural idle motion
|
||||
|
||||
A sheep latches to ``penned`` the first time it crosses the gate plane
|
||||
into the gate column (handled by callers via ``geometry.is_penned_position``);
|
||||
once latched, ``penned=True`` is passed in here and the force stack
|
||||
switches to in-pen containment + jitter.
|
||||
"""
|
||||
|
||||
import math
|
||||
import random
|
||||
|
||||
from herding.geometry import (
|
||||
FIELD_X, FIELD_Y,
|
||||
PEN_X, PEN_Y,
|
||||
GATE_X,
|
||||
)
|
||||
|
||||
# --- Speed and force constants ---
|
||||
# All speeds here are in wheel rad/s (motor units), matching the existing
|
||||
# sheep controller. Conversion to m/s = speed * SHEEP_WHEEL_RADIUS.
|
||||
MAX_SPEED = 22.0
|
||||
FLEE_SPEED = 20.0
|
||||
WANDER_SPEED = 3.0
|
||||
|
||||
WALL_MARGIN = 5.0
|
||||
WALL_HARD_MARGIN = 1.0
|
||||
WALL_HARD_GAIN = 50.0
|
||||
|
||||
FLEE_DIST = 7.0
|
||||
SEPARATION_DIST = 2.5
|
||||
COHESION_DIST = 8.0
|
||||
|
||||
PEN_MARGIN = 0.8
|
||||
|
||||
|
||||
def _peers_iter(peers):
|
||||
"""Accept either a {name: (x, y)} dict or an iterable of (x, y) tuples."""
|
||||
if isinstance(peers, dict):
|
||||
return list(peers.values())
|
||||
return list(peers)
|
||||
|
||||
|
||||
def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||
"""Return ``(heading, speed, new_wander_angle)`` for one sheep step.
|
||||
|
||||
``speed`` is in wheel rad/s (motor units), bounded by ``[WANDER_SPEED,
|
||||
FLEE_SPEED]``. ``heading`` is the world-frame target heading the sheep
|
||||
should aim for (atan2 convention).
|
||||
|
||||
``rng`` is an optional ``random.Random``-compatible object used for
|
||||
the wander-jitter. If ``None``, falls back to Python's global module
|
||||
(matches Webots controller usage). Pass an env-owned RNG to make
|
||||
rollouts deterministic given a seed.
|
||||
"""
|
||||
fx, fy = 0.0, 0.0
|
||||
peer_list = _peers_iter(peers)
|
||||
rnd = rng if rng is not None else random
|
||||
|
||||
if penned:
|
||||
# --- Pen containment: bounce off the four pen walls ---
|
||||
pm = PEN_MARGIN
|
||||
if x < PEN_X[0] + pm:
|
||||
fx += ((PEN_X[0] + pm - x) / pm) * 15.0
|
||||
if x > PEN_X[1] - pm:
|
||||
fx -= ((x - (PEN_X[1] - pm)) / pm) * 15.0
|
||||
if y < PEN_Y[0] + pm:
|
||||
fy += ((PEN_Y[0] + pm - y) / pm) * 15.0
|
||||
if y > PEN_Y[1] - pm:
|
||||
fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0
|
||||
|
||||
# Mild peer separation — penned sheep crowd the corner otherwise.
|
||||
for px, py in peer_list:
|
||||
dx, dy = px - x, py - y
|
||||
d = math.hypot(dx, dy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (dx / d) * push * 2.5
|
||||
fy -= (dy / d) * push * 2.5
|
||||
|
||||
if rnd.random() < 0.02:
|
||||
wander_angle += rnd.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
else:
|
||||
# --- Free-roaming sheep in the field ---
|
||||
fleeing = False
|
||||
if dog_xy is not None:
|
||||
ddx = dog_xy[0] - x
|
||||
ddy = dog_xy[1] - y
|
||||
dist = math.hypot(ddx, ddy)
|
||||
if 0.01 < dist < FLEE_DIST:
|
||||
fleeing = True
|
||||
t = 1.0 - dist / FLEE_DIST
|
||||
s = t * t * 20.0
|
||||
fx -= (ddx / dist) * s
|
||||
fy -= (ddy / dist) * s
|
||||
|
||||
# Cohesion — drift toward flock CoM (peers within COHESION_DIST).
|
||||
# Cohesion is *stronger* under flee than at rest (the
|
||||
# predator-confusion / safety-in-numbers effect — sheep huddle when
|
||||
# threatened). This is what makes shepherding work: the flock stays
|
||||
# as one unit through the narrow gate instead of fragmenting.
|
||||
cx, cy, cn = 0.0, 0.0, 0
|
||||
for px, py in peer_list:
|
||||
d = math.hypot(px - x, py - y)
|
||||
if 0.3 < d < COHESION_DIST:
|
||||
cx += px
|
||||
cy += py
|
||||
cn += 1
|
||||
if cn > 0:
|
||||
# Cohesion needs to be comparable to flee at close range to keep
|
||||
# the flock together through narrow obstacles like the 3m gate.
|
||||
# Flee at 2m has magnitude ~10; cohesion at peer-distance 5m
|
||||
# with w=1.5 contributes ~7.5 — same order, so the flock
|
||||
# translates as a unit instead of fragmenting under pressure.
|
||||
w = 1.5 if fleeing else 0.6
|
||||
fx += (cx / cn - x) * w
|
||||
fy += (cy / cn - y) * w
|
||||
|
||||
# Separation — inverse-distance push from peers.
|
||||
for px, py in peer_list:
|
||||
ddx, ddy = px - x, py - y
|
||||
d = math.hypot(ddx, ddy)
|
||||
if 0.05 < d < SEPARATION_DIST:
|
||||
push = (SEPARATION_DIST - d) / d
|
||||
fx -= (ddx / d) * push * 2.5
|
||||
fy -= (ddy / d) * push * 2.5
|
||||
|
||||
# Wall soft repulsion. The south wall is absent inside the gate
|
||||
# column so sheep can be driven through it by the dog.
|
||||
if x < FIELD_X[0] + WALL_MARGIN:
|
||||
fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
|
||||
if x > FIELD_X[1] - WALL_MARGIN:
|
||||
fx -= ((x - (FIELD_X[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
if y > FIELD_Y[1] - WALL_MARGIN:
|
||||
fy -= ((y - (FIELD_Y[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||
if y < FIELD_Y[0] + WALL_MARGIN and not (GATE_X[0] <= x <= GATE_X[1]):
|
||||
fy += ((FIELD_Y[0] + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
|
||||
|
||||
if not fleeing:
|
||||
if random.random() < 0.02:
|
||||
wander_angle += random.uniform(-0.6, 0.6)
|
||||
fx += math.cos(wander_angle) * 0.5
|
||||
fy += math.sin(wander_angle) * 0.5
|
||||
|
||||
# --- Hard escape band — overrides everything when very close to a wall ---
|
||||
m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
|
||||
if x - FIELD_X[0] < m:
|
||||
fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
|
||||
if FIELD_X[1] - x < m:
|
||||
fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
|
||||
if FIELD_Y[1] - y < m:
|
||||
fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
|
||||
# South wall hard escape only when not in the gate column and not penned.
|
||||
if (not penned) and (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
|
||||
fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))
|
||||
|
||||
heading = math.atan2(fy, fx)
|
||||
mag = math.hypot(fx, fy)
|
||||
speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
|
||||
return heading, speed, wander_angle
|
||||
@@ -0,0 +1,99 @@
|
||||
"""World geometry and robot specs.
|
||||
|
||||
All coordinates are in meters. (0, 0) is the centre of the field, +x is
|
||||
east, +y is north. Z is up but unused here. These constants must match
|
||||
``worlds/field.wbt`` and the proto files; if the world changes, change
|
||||
this file and only this file.
|
||||
|
||||
Pen layout (post-refactor)
|
||||
--------------------------
|
||||
The pen is *external* to the field, accessed through a 3 m gate cut into
|
||||
the south stone wall at y = -15. Sheep entering through the gate end up
|
||||
in a fenced rectangle south of the field; the dog stays in the field
|
||||
(soft-limited above DOG_SOUTH_LIMIT during training and inference).
|
||||
|
||||
field +y north
|
||||
+-----------+
|
||||
| |
|
||||
| |
|
||||
| ...... |
|
||||
+---||||----+ y = -15 (south wall, gate at x ∈ [10, 13])
|
||||
||||
|
||||
|pen| y ∈ [-22, -15]
|
||||
+---+
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
# --- Field (square, stone-walled) ---
|
||||
FIELD_X = (-15.0, 15.0)
|
||||
FIELD_Y = (-15.0, 15.0)
|
||||
|
||||
# Conservative inside bounds — sheep/dog should not graze the wall.
|
||||
FIELD_INSIDE_MARGIN = 0.5
|
||||
|
||||
# --- Pen (external, south of the field) ---
|
||||
PEN_X = (10.0, 13.0)
|
||||
PEN_Y = (-22.0, -15.0)
|
||||
PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
|
||||
# The point the dog drives the flock toward: the gate centre on the field side.
|
||||
PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
|
||||
|
||||
# --- Gate (the hole in the south stone wall) ---
|
||||
GATE_X = PEN_X
|
||||
GATE_Y = -15.0
|
||||
|
||||
# --- Robot specs (must match proto files) ---
|
||||
# Dog (controllers/shepherd_dog/, protos/ShepherdDog.proto)
|
||||
DOG_WHEEL_RADIUS = 0.038 # m
|
||||
DOG_WHEEL_BASE = 0.28 # m, axle-to-axle
|
||||
DOG_MAX_WHEEL_OMEGA = 70.0 # rad/s
|
||||
DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA # ~2.66 m/s
|
||||
|
||||
# Sheep (controllers/sheep/, protos/Sheep.proto)
|
||||
SHEEP_WHEEL_RADIUS = 0.031 # m
|
||||
SHEEP_WHEEL_BASE = 0.20 # m
|
||||
SHEEP_MAX_WHEEL_OMEGA = 25.0 # rad/s
|
||||
SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA # ~0.78 m/s
|
||||
|
||||
# --- Webots step ---
|
||||
WEBOTS_DT = 0.016 # seconds, matches WorldInfo.basicTimeStep = 16 in field.wbt
|
||||
|
||||
# --- Dog "virtual south wall" (training keeps dog out of the pen) ---
|
||||
# At inference the controller also clips to this so a slightly miscalibrated
|
||||
# policy doesn't accidentally drive into the pen and trap the sheep.
|
||||
DOG_SOUTH_LIMIT = -14.5
|
||||
|
||||
# --- Maximum supported flock size ---
|
||||
MAX_SHEEP = 10
|
||||
|
||||
|
||||
def in_pen(x: float, y: float) -> bool:
|
||||
"""True if (x, y) lies inside the external pen rectangle."""
|
||||
return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
|
||||
|
||||
|
||||
def in_field(x: float, y: float, margin: float = 0.0) -> bool:
|
||||
return (FIELD_X[0] + margin <= x <= FIELD_X[1] - margin
|
||||
and FIELD_Y[0] + margin <= y <= FIELD_Y[1] - margin)
|
||||
|
||||
|
||||
def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:
|
||||
"""True if (x, y) lies in the column of the gate (between field and pen)."""
|
||||
return (PEN_X[0] - margin <= x <= PEN_X[1] + margin
|
||||
and PEN_Y[0] - margin <= y <= GATE_Y + margin)
|
||||
|
||||
|
||||
def is_penned_position(x: float, y: float, latch_margin: float = 0.2) -> bool:
|
||||
"""A sheep latches to "penned" once it crosses the gate plane south.
|
||||
|
||||
True iff x is inside the gate column (with a small margin) AND
|
||||
y has dipped below the gate line. Once latched, the sheep is held by
|
||||
in-pen forces and will not exit on its own.
|
||||
"""
|
||||
return (PEN_X[0] - latch_margin <= x <= PEN_X[1] + latch_margin
|
||||
and y <= GATE_Y)
|
||||
|
||||
|
||||
def distance_to_pen_entry(x: float, y: float) -> float:
|
||||
return math.hypot(x - PEN_ENTRY[0], y - PEN_ENTRY[1])
|
||||
+137
@@ -0,0 +1,137 @@
|
||||
"""Observation builder for the shepherd dog policy.
|
||||
|
||||
Order-invariant 32-D feature vector — the policy generalises across
|
||||
flock sizes 1..MAX_SHEEP because individual sheep coordinates never
|
||||
appear in the observation by index, only summary statistics, a polar
|
||||
histogram, and two "named" sheep (closest-to-pen and rearmost-from-pen).
|
||||
|
||||
The two named sheep matter for the sequential-driving teacher: it
|
||||
targets the closest-to-pen sheep specifically, so the policy needs
|
||||
that channel to mimic the teacher.
|
||||
|
||||
Layout (all components normalised so values stay roughly in [-1, 1]):
|
||||
|
||||
idx field
|
||||
----- ----------------------------------------------------------
|
||||
0..3 dog pose: x/15, y/15, cos(heading), sin(heading)
|
||||
4..5 active-sheep CoM x/15, y/15
|
||||
6..8 flock dispersion: max-radius/15, std_x/15, std_y/15
|
||||
9..11 vector dog→CoM: dx/30, dy/30, dist/30
|
||||
12..14 vector dog→pen-entry: dx/30, dy/30, dist/30
|
||||
15..16 vector furthest-sheep→CoM: dx/15, dy/15
|
||||
17..18 min sheep-to-wall, min dog-to-wall (both /15)
|
||||
19 active-sheep count / MAX_SHEEP
|
||||
20..27 8-bin polar histogram of active sheep around the dog,
|
||||
rotation-aware (binned in dog-relative frame), normalised
|
||||
so the bins sum to 1.
|
||||
28..29 vector dog→closest-to-pen sheep: dx/15, dy/15
|
||||
30..31 vector dog→rearmost (furthest-from-pen) sheep: dx/15, dy/15
|
||||
"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
from herding.geometry import (
|
||||
FIELD_X, FIELD_Y, PEN_ENTRY, MAX_SHEEP,
|
||||
)
|
||||
|
||||
OBS_DIM = 32
|
||||
|
||||
|
||||
def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
|
||||
n_max: int = MAX_SHEEP) -> np.ndarray:
|
||||
"""Assemble the dog policy's observation vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dog_xy : tuple (x, y) of the dog's GPS position (m)
|
||||
dog_heading : dog heading in rad
|
||||
sheep_xy_list : iterable of (x, y) for ALL known sheep
|
||||
sheep_penned_list : parallel iterable of bool — True if sheep is penned
|
||||
n_max : maximum supported flock size used for the count normaliser
|
||||
"""
|
||||
dog_x, dog_y = dog_xy
|
||||
obs = np.zeros(OBS_DIM, dtype=np.float32)
|
||||
|
||||
obs[0] = dog_x / 15.0
|
||||
obs[1] = dog_y / 15.0
|
||||
obs[2] = math.cos(dog_heading)
|
||||
obs[3] = math.sin(dog_heading)
|
||||
|
||||
active = [(x, y) for (x, y), p
|
||||
in zip(sheep_xy_list, sheep_penned_list) if not p]
|
||||
n = len(active)
|
||||
|
||||
pdx0, pdy0 = PEN_ENTRY[0] - dog_x, PEN_ENTRY[1] - dog_y
|
||||
obs[12] = pdx0 / 30.0
|
||||
obs[13] = pdy0 / 30.0
|
||||
obs[14] = math.hypot(pdx0, pdy0) / 30.0
|
||||
|
||||
if n == 0:
|
||||
# All sheep penned — terminal observation.
|
||||
obs[19] = 0.0
|
||||
return obs
|
||||
|
||||
arr = np.asarray(active, dtype=np.float32)
|
||||
com_x = float(arr[:, 0].mean())
|
||||
com_y = float(arr[:, 1].mean())
|
||||
rel = arr - np.array([com_x, com_y], dtype=np.float32)
|
||||
dists = np.hypot(rel[:, 0], rel[:, 1])
|
||||
radius = float(dists.max())
|
||||
std_x = float(arr[:, 0].std())
|
||||
std_y = float(arr[:, 1].std())
|
||||
|
||||
obs[4] = com_x / 15.0
|
||||
obs[5] = com_y / 15.0
|
||||
obs[6] = radius / 15.0
|
||||
obs[7] = std_x / 15.0
|
||||
obs[8] = std_y / 15.0
|
||||
|
||||
cdx, cdy = com_x - dog_x, com_y - dog_y
|
||||
obs[9] = cdx / 30.0
|
||||
obs[10] = cdy / 30.0
|
||||
obs[11] = math.hypot(cdx, cdy) / 30.0
|
||||
|
||||
far_idx = int(np.argmax(dists))
|
||||
obs[15] = float(rel[far_idx, 0]) / 15.0
|
||||
obs[16] = float(rel[far_idx, 1]) / 15.0
|
||||
|
||||
min_sheep_wall = min(
|
||||
float(np.min(arr[:, 0] - FIELD_X[0])),
|
||||
float(np.min(FIELD_X[1] - arr[:, 0])),
|
||||
float(np.min(arr[:, 1] - FIELD_Y[0])),
|
||||
float(np.min(FIELD_Y[1] - arr[:, 1])),
|
||||
)
|
||||
min_dog_wall = min(
|
||||
dog_x - FIELD_X[0], FIELD_X[1] - dog_x,
|
||||
dog_y - FIELD_Y[0], FIELD_Y[1] - dog_y,
|
||||
)
|
||||
obs[17] = min_sheep_wall / 15.0
|
||||
obs[18] = float(min_dog_wall) / 15.0
|
||||
obs[19] = n / n_max
|
||||
|
||||
# 8-bin polar histogram in the dog's body frame.
|
||||
rel_dx = arr[:, 0] - dog_x
|
||||
rel_dy = arr[:, 1] - dog_y
|
||||
angles = np.arctan2(rel_dy, rel_dx) - dog_heading
|
||||
angles = np.arctan2(np.sin(angles), np.cos(angles))
|
||||
bins = np.floor((angles + math.pi) / (2 * math.pi) * 8).astype(int)
|
||||
bins = np.clip(bins, 0, 7)
|
||||
hist = np.bincount(bins, minlength=8).astype(np.float32)
|
||||
hist /= max(1, n)
|
||||
obs[20:28] = hist
|
||||
|
||||
# Closest-to-pen sheep (the sequential teacher's target) and rearmost
|
||||
# (furthest-from-pen, the natural "next target" once the closest is
|
||||
# penned). Both expressed as offset from dog. These two channels make
|
||||
# BC tractable — without them the obs doesn't uniquely identify which
|
||||
# sheep the teacher is steering toward.
|
||||
pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
|
||||
closest_idx = int(np.argmin(pen_dists))
|
||||
rearmost_idx = int(np.argmax(pen_dists))
|
||||
obs[28] = (float(arr[closest_idx, 0]) - dog_x) / 15.0
|
||||
obs[29] = (float(arr[closest_idx, 1]) - dog_y) / 15.0
|
||||
obs[30] = (float(arr[rearmost_idx, 0]) - dog_x) / 15.0
|
||||
obs[31] = (float(arr[rearmost_idx, 1]) - dog_y) / 15.0
|
||||
|
||||
return obs
|
||||
@@ -0,0 +1,98 @@
|
||||
"""Sequential single-target shepherd dog algorithm.
|
||||
|
||||
Strömbom drives the flock's centre of mass; with N sheep and a narrow
|
||||
3 m gate, this fails because the flock is wider than the gate and CoM
|
||||
driving abandons stragglers. Real sheepdogs solve this differently:
|
||||
they pick *one* sheep at a time, drive it through, return for the next.
|
||||
|
||||
This module implements that "pin-and-push" approach.
|
||||
|
||||
Algorithm (one step):
|
||||
1. Active sheep = those still in the field (not yet penned).
|
||||
2. Target = the active sheep currently closest to the pen entry.
|
||||
3. Drive position = ``target + Δ · unit(target − pen_entry)`` —
|
||||
directly behind the target relative to the goal.
|
||||
4. Output unit vector pointing the dog at the drive position.
|
||||
|
||||
Once the target crosses the gate it latches as penned and is removed
|
||||
from the active set; the next-closest unpenned sheep becomes the
|
||||
target. The algorithm naturally "queues" sheep through the gate.
|
||||
|
||||
Empirically (with our flocking dynamics) this scales linearly with
|
||||
flock size and works up to at least n=10 within a 15 000-step budget.
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
from herding.geometry import GATE_Y, PEN_ENTRY, in_pen
|
||||
|
||||
|
||||
DELTA_DRIVE = 1.5 # standoff behind the target sheep
|
||||
APPROACH_GAIN = 1.0 # action magnitude scale (1 = full speed)
|
||||
|
||||
|
||||
def _unit(x, y):
|
||||
d = math.hypot(x, y)
|
||||
if d < 1e-6:
|
||||
return 0.0, 0.0
|
||||
return x / d, y / d
|
||||
|
||||
|
||||
def _is_active(x, y) -> bool:
|
||||
return (not in_pen(x, y)) and y > GATE_Y
|
||||
|
||||
|
||||
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Return ``(vx, vy, mode)`` where mode encodes the current target.
|
||||
|
||||
Compatible with the Strömbom call signature so it can be drop-in
|
||||
swapped in the dog controller and the env's imitation reward.
|
||||
"""
|
||||
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
|
||||
if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle"
|
||||
|
||||
# Pick target = sheep closest to pen entry. Stable choice: as one
|
||||
# sheep approaches and crosses the gate it stays the target until
|
||||
# latched; then the next-closest takes over.
|
||||
name, sx, sy = min(
|
||||
active,
|
||||
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
|
||||
)
|
||||
|
||||
# Drive position behind the target along the (target → pen) line.
|
||||
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
|
||||
tx = sx + DELTA_DRIVE * ux
|
||||
ty = sy + DELTA_DRIVE * uy
|
||||
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}"
|
||||
|
||||
|
||||
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Debug variant returning ``(vx, vy, mode, debug_dict)``."""
|
||||
active = [(name, x, y) for name, (x, y) in sheep_positions.items()
|
||||
if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle", {
|
||||
"n_active": 0, "target_name": "",
|
||||
"target_x": 0.0, "target_y": 0.0,
|
||||
"drive_x": dog_xy[0], "drive_y": dog_xy[1],
|
||||
}
|
||||
|
||||
name, sx, sy = min(
|
||||
active,
|
||||
key=lambda s: math.hypot(s[1] - pen_target[0], s[2] - pen_target[1]),
|
||||
)
|
||||
|
||||
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
|
||||
tx = sx + DELTA_DRIVE * ux
|
||||
ty = sy + DELTA_DRIVE * uy
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
|
||||
return APPROACH_GAIN * ax, APPROACH_GAIN * ay, f"drive:{name}", {
|
||||
"n_active": len(active), "target_name": name,
|
||||
"target_x": sx, "target_y": sy,
|
||||
"drive_x": tx, "drive_y": ty,
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
"""Strömbom collect/drive heuristic for the shepherd dog.
|
||||
|
||||
Adapted from the original ``controllers/shepherd_dog/strombom.py`` and
|
||||
updated for the external pen layout. Used as a baseline controller and
|
||||
as the fallback when the RL policy isn't available.
|
||||
|
||||
Reference: Strömbom et al. 2014, "Solving the shepherding problem".
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
from herding.geometry import PEN_ENTRY, GATE_Y, in_pen
|
||||
|
||||
# Algorithm parameters. DELTA_DRIVE / DELTA_COLLECT were tightened from
|
||||
# the original (4.0 / 2.5) because the new external pen sits ~26 m from
|
||||
# typical sheep spawn locations — at the old 4 m standoff, the flee force
|
||||
# (quadratic ramp, 3.7 at 4 m vs ~10 at 2 m) couldn't move sheep through
|
||||
# the path inside the 3000-step episode budget.
|
||||
#
|
||||
# F_FACTOR was 2.0 in the original Strömbom paper; raised to 4.0 here so
|
||||
# the dog stays in *drive* mode much longer. With our tighter cohesion
|
||||
# (flocking_sim.py), partially-collected flocks consolidate naturally
|
||||
# during a drive, and we don't waste 80% of the time budget on a slow
|
||||
# "collect" pre-phase.
|
||||
F_FACTOR = 4.0
|
||||
DELTA_COLLECT = 1.5
|
||||
DELTA_DRIVE = 2.0
|
||||
|
||||
|
||||
def _unit(x, y):
|
||||
d = math.hypot(x, y)
|
||||
if d < 1e-6:
|
||||
return 0.0, 0.0
|
||||
return x / d, y / d
|
||||
|
||||
|
||||
def _is_active(x, y) -> bool:
|
||||
"""A sheep is "active" if it's still in the field — not in or below
|
||||
the gate plane (we treat anything south of the gate as committed to
|
||||
the pen and stop trying to herd it)."""
|
||||
return (not in_pen(x, y)) and y > GATE_Y
|
||||
|
||||
|
||||
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}.
|
||||
|
||||
``sheep_positions`` is a ``{name: (x, y)}`` mapping (matches the
|
||||
Webots controller's representation).
|
||||
"""
|
||||
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle"
|
||||
|
||||
n = len(active)
|
||||
com_x = sum(p[0] for p in active) / n
|
||||
com_y = sum(p[1] for p in active) / n
|
||||
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||
radius = max(dists)
|
||||
|
||||
if radius > F_FACTOR * math.sqrt(n):
|
||||
# Collect: aim at a point behind the furthest sheep, opposite the CoM.
|
||||
idx = max(range(n), key=lambda i: dists[i])
|
||||
sx, sy = active[idx]
|
||||
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||
mode = "collect"
|
||||
else:
|
||||
# Drive: aim at a point behind the flock CoM relative to the goal.
|
||||
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||
mode = "drive"
|
||||
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
return ax, ay, mode
|
||||
|
||||
|
||||
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||
"""Variant of compute_action that also returns a small debug dict.
|
||||
|
||||
Kept for parity with the legacy controller's CSV logger.
|
||||
"""
|
||||
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, "idle", {
|
||||
"n_active": 0, "radius": 0.0, "threshold": 0.0,
|
||||
"com_x": 0.0, "com_y": 0.0,
|
||||
"target_x": dog_xy[0], "target_y": dog_xy[1],
|
||||
}
|
||||
|
||||
n = len(active)
|
||||
com_x = sum(p[0] for p in active) / n
|
||||
com_y = sum(p[1] for p in active) / n
|
||||
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||
radius = max(dists)
|
||||
threshold = F_FACTOR * math.sqrt(n)
|
||||
|
||||
if radius > threshold:
|
||||
idx = max(range(n), key=lambda i: dists[i])
|
||||
sx, sy = active[idx]
|
||||
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||
mode = "collect"
|
||||
else:
|
||||
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||
mode = "drive"
|
||||
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
dbg = {
|
||||
"n_active": n, "radius": radius, "threshold": threshold,
|
||||
"com_x": com_x, "com_y": com_y,
|
||||
"target_x": tx, "target_y": ty,
|
||||
}
|
||||
return ax, ay, mode, dbg
|
||||
Reference in New Issue
Block a user