1c197e0ff7
Two changes that together raise diff/round gym success ~52%→88% (BC)
and ~68%→88% (RL) without retraining; diff/field stays at 100%.
* TrackerConfig.consensus_k default 1 → 3 (radius 0.5 m, max_age 15
frames). The same candidate-promotion mechanism that closed the
Webots LiDAR gap also filters gym tracker phantoms — they show up
on the round field where sheep run further between detection
cycles than GATE_M, so each new position spawns a fresh track
while the stale one persists in memory. SheepTracker() called with
no tracker_cfg keeps the legacy pass-through behaviour for
backwards compatibility.
* Strömbom + universal teachers now detect when the natural
"behind the flock" drive target leaves the curved boundary and
fall back to pushing the flock radially inward toward the centre.
Breaks the wall-circling pattern that previously trapped both the
analytical baselines and the trained policies.
A/B numbers (n_sheep ∈ {1,2,3,5,10}, 5 seeds each, max_steps=15000):
diff/field bc: baseline 100% consensus 100%
diff/field rl: baseline 100% consensus 100%
diff/round bc: baseline 52% consensus 88%
diff/round rl: baseline 68% consensus 88%
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
116 lines
4.1 KiB
Python
116 lines
4.1 KiB
Python
"""Strömbom (2014) collect/drive heuristic for the shepherd dog.
|
|
|
|
When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
|
|
to a point behind the furthest sheep and pushes it back toward the
|
|
flock CoM. Otherwise it drives, parking behind the CoM relative to
|
|
the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.
|
|
|
|
Reference: Strömbom et al. 2014, "Solving the shepherding problem."
|
|
"""
|
|
|
|
import math
|
|
|
|
from herding.world.geometry import (
|
|
FIELD_ROUND_R, FIELD_SHAPE,
|
|
PEN_ENTRY, GATE_Y, in_pen,
|
|
)
|
|
|
|
F_FACTOR = 4.0 # collect/drive threshold scaled by √n
|
|
DELTA_COLLECT = 1.5 # drive-position offset behind the furthest sheep
|
|
DELTA_DRIVE = 2.0 # drive-position offset behind the flock CoM
|
|
|
|
|
|
def _unit(x, y):
|
|
d = math.hypot(x, y)
|
|
if d < 1e-6:
|
|
return 0.0, 0.0
|
|
return x / d, y / d
|
|
|
|
|
|
def _is_active(x, y) -> bool:
|
|
"""A sheep still in the field counts; one south of the gate doesn't."""
|
|
return (not in_pen(x, y)) and y > GATE_Y
|
|
|
|
|
|
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
|
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
|
|
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
|
if not active:
|
|
return 0.0, 0.0, "idle"
|
|
|
|
n = len(active)
|
|
com_x = sum(p[0] for p in active) / n
|
|
com_y = sum(p[1] for p in active) / n
|
|
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
|
radius = max(dists)
|
|
|
|
if radius > F_FACTOR * math.sqrt(n):
|
|
# Collect: aim behind the furthest sheep, opposite the CoM.
|
|
idx = max(range(n), key=lambda i: dists[i])
|
|
sx, sy = active[idx]
|
|
ux, uy = _unit(sx - com_x, sy - com_y)
|
|
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
|
mode = "collect"
|
|
else:
|
|
# Drive: aim behind the CoM, opposite the pen.
|
|
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
|
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
|
mode = "drive"
|
|
|
|
# Round-field wall fallback: if the drive target lies outside the
|
|
# curved boundary, push the flock radially inward first so it
|
|
# leaves the wall — otherwise the dog ends up tangent to the wall
|
|
# and the flock circles indefinitely.
|
|
if FIELD_SHAPE == "field_round" and mode == "drive":
|
|
if math.hypot(tx, ty) > FIELD_ROUND_R - 1.0:
|
|
r_com = math.hypot(com_x, com_y)
|
|
if r_com > 1e-3:
|
|
ux2, uy2 = com_x / r_com, com_y / r_com
|
|
tx = com_x + DELTA_DRIVE * ux2
|
|
ty = com_y + DELTA_DRIVE * uy2
|
|
r_t = math.hypot(tx, ty)
|
|
if r_t > FIELD_ROUND_R - 1.0:
|
|
scale = (FIELD_ROUND_R - 1.0) / r_t
|
|
tx *= scale
|
|
ty *= scale
|
|
|
|
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
|
return ax, ay, mode
|
|
|
|
|
|
def compute_action_debug(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
|
"""``compute_action`` plus a small debug dict (CoM, target, radius)."""
|
|
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
|
if not active:
|
|
return 0.0, 0.0, "idle", {
|
|
"n_active": 0, "radius": 0.0, "threshold": 0.0,
|
|
"com_x": 0.0, "com_y": 0.0,
|
|
"target_x": dog_xy[0], "target_y": dog_xy[1],
|
|
}
|
|
|
|
n = len(active)
|
|
com_x = sum(p[0] for p in active) / n
|
|
com_y = sum(p[1] for p in active) / n
|
|
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
|
radius = max(dists)
|
|
threshold = F_FACTOR * math.sqrt(n)
|
|
|
|
if radius > threshold:
|
|
idx = max(range(n), key=lambda i: dists[i])
|
|
sx, sy = active[idx]
|
|
ux, uy = _unit(sx - com_x, sy - com_y)
|
|
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
|
mode = "collect"
|
|
else:
|
|
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
|
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
|
mode = "drive"
|
|
|
|
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
|
dbg = {
|
|
"n_active": n, "radius": radius, "threshold": threshold,
|
|
"com_x": com_x, "com_y": com_y,
|
|
"target_x": tx, "target_y": ty,
|
|
}
|
|
return ax, ay, mode, dbg
|