Checkpoint 8
This commit is contained in:
@@ -33,7 +33,11 @@ class ActiveScanTeacher:
|
||||
|
||||
Call signature::
|
||||
|
||||
vx, vy, mode = teacher(dog_xy, dog_heading, sheep_positions, pen_target)
|
||||
vx, vy, omega, mode = teacher(dog_xy, dog_heading, sheep_positions,
|
||||
pen_target, drive_mode="differential")
|
||||
|
||||
``omega`` is the yaw-rate intent (mecanum only); 0.0 for differential
|
||||
drive and during blind exploration phases.
|
||||
"""
|
||||
|
||||
def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
|
||||
@@ -62,7 +66,8 @@ class ActiveScanTeacher:
|
||||
return 0.0, 0.0
|
||||
return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
|
||||
|
||||
def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target):
|
||||
def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target,
|
||||
drive_mode="differential"):
|
||||
self.step += 1
|
||||
n_visible = len(sheep_positions)
|
||||
|
||||
@@ -75,7 +80,7 @@ class ActiveScanTeacher:
|
||||
if self.step <= self.initial_scan:
|
||||
vx, vy = self._scan_action(dog_heading)
|
||||
self.last_action = (vx, vy)
|
||||
return vx, vy, "scan_initial"
|
||||
return vx, vy, 0.0, "scan_initial"
|
||||
|
||||
# Phase 2: walk-to-centre after a sustained empty tracker.
|
||||
if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
|
||||
@@ -87,16 +92,31 @@ class ActiveScanTeacher:
|
||||
vx, vy = ex, ey
|
||||
mode = "explore"
|
||||
self.last_action = (vx, vy)
|
||||
return vx, vy, mode
|
||||
return vx, vy, 0.0, mode
|
||||
|
||||
# Phase 2b: brief tracker blink — hold the previous action.
|
||||
if n_visible == 0:
|
||||
vx, vy = self.last_action
|
||||
return vx, vy, "hold"
|
||||
return vx, vy, 0.0, "hold"
|
||||
|
||||
# Phase 3: hand off to the underlying analytic teacher, then
|
||||
# apply the shared near-sheep speed modulation.
|
||||
vx, vy, mode = self.base(dog_xy, sheep_positions, pen_target)
|
||||
# Handle both old-style (dog_xy, sheep, pen) and new-style
|
||||
# (dog_xy, heading, sheep, pen, drive_mode) teachers.
|
||||
try:
|
||||
result = self.base(dog_xy, dog_heading, sheep_positions,
|
||||
pen_target, drive_mode)
|
||||
except TypeError:
|
||||
try:
|
||||
result = self.base(dog_xy, dog_heading, sheep_positions,
|
||||
pen_target)
|
||||
except TypeError:
|
||||
result = self.base(dog_xy, sheep_positions, pen_target)
|
||||
if len(result) == 4:
|
||||
vx, vy, omega, mode = result
|
||||
else:
|
||||
vx, vy, mode = result
|
||||
omega = 0.0
|
||||
vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
|
||||
self.last_action = (vx, vy)
|
||||
return vx, vy, mode
|
||||
return vx, vy, omega, mode
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
"""Universal shepherd teacher — Strömbom core + mecanum omega + straggler recovery.
|
||||
|
||||
The core collect/drive logic is **identical** to :mod:`strombom` (same
|
||||
``F_FACTOR``, ``DELTA_COLLECT``, ``DELTA_DRIVE`` thresholds and target
|
||||
computation) so it inherits the proven ~100 % success rate at n ≤ 8.
|
||||
Two additions make it useful as a universal teacher:
|
||||
|
||||
1. **Omega for mecanum.** When ``drive_mode="mecanum"``, the teacher
|
||||
outputs a non-zero ``omega`` channel so the dog **faces the
|
||||
direction of travel**. During collect the dog faces the target
|
||||
sheep; during drive it faces the pen. This gives the BC student a
|
||||
real rotation signal to learn from.
|
||||
|
||||
2. **Last-straggler recovery.** When exactly one sheep remains active
|
||||
and it is near the gate, the dog positions itself behind that
|
||||
straggler (opposite the gate) and pushes it straight through. This
|
||||
handles the edge case where the last sheep circles the gate posts.
|
||||
|
||||
Call signature::
|
||||
|
||||
vx, vy, omega, mode = compute_action(
|
||||
dog_xy, dog_heading, sheep_positions, pen_target,
|
||||
drive_mode="differential",
|
||||
)
|
||||
|
||||
For differential drive ``omega`` is always 0.0 and can be ignored.
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
from herding.world.geometry import (
|
||||
PEN_ENTRY, GATE_X, GATE_Y, in_pen,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tuning constants — match Strömbom exactly for proven success rates.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
F_FACTOR = 4.0 # collect/drive threshold scaled by √n
|
||||
DELTA_COLLECT = 1.5 # standoff behind the furthest sheep
|
||||
DELTA_DRIVE = 2.0 # standoff behind flock CoM
|
||||
|
||||
# Omega gain for mecanum (how strongly the dog turns to face target)
|
||||
OMEGA_GAIN = 0.6
|
||||
|
||||
# Recovery: push the last straggler straight through the gate.
|
||||
RECOVERY_GATE_DIST = 6.0 # only when straggler is this close to gate centre
|
||||
RECOVERY_PUSH_DIST = 1.2 # stand-off behind straggler, away from gate
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _unit(x, y):
|
||||
d = math.hypot(x, y)
|
||||
if d < 1e-6:
|
||||
return 0.0, 0.0
|
||||
return x / d, y / d
|
||||
|
||||
|
||||
def _is_active(x, y) -> bool:
|
||||
return (not in_pen(x, y)) and y > GATE_Y
|
||||
|
||||
|
||||
def _angle_diff(a, b):
|
||||
"""Signed shortest angular difference a - b, in [-π, π]."""
|
||||
return math.atan2(math.sin(a - b), math.cos(a - b))
|
||||
|
||||
|
||||
def _gate_center():
|
||||
"""Centre of the gate opening."""
|
||||
return (0.5 * (GATE_X[0] + GATE_X[1]), GATE_Y)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core teacher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def compute_action(dog_xy, dog_heading, sheep_positions,
|
||||
pen_target=PEN_ENTRY, drive_mode="differential"):
|
||||
"""Return ``(vx, vy, omega, mode)``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dog_xy : (float, float)
|
||||
Dog position in world frame.
|
||||
dog_heading : float
|
||||
Dog heading in world frame (rad), 0 = +x axis.
|
||||
sheep_positions : dict[str, (float, float)]
|
||||
Visible sheep positions.
|
||||
pen_target : (float, float)
|
||||
Centre of the pen gate (defaults to geometry.PEN_ENTRY).
|
||||
drive_mode : str
|
||||
``"differential"`` or ``"mecanum"``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
vx, vy : float
|
||||
Velocity intent in [-1, 1].
|
||||
omega : float
|
||||
Yaw intent in [-1, 1] (0 for differential).
|
||||
mode : str
|
||||
Phase label: ``"idle"``, ``"collect"``, ``"drive"``, ``"recovery"``.
|
||||
"""
|
||||
active = [(x, y) for (x, y) in sheep_positions.values()
|
||||
if _is_active(x, y)]
|
||||
if not active:
|
||||
return 0.0, 0.0, 0.0, "idle"
|
||||
|
||||
n = len(active)
|
||||
com_x = sum(p[0] for p in active) / n
|
||||
com_y = sum(p[1] for p in active) / n
|
||||
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||
radius = max(dists)
|
||||
|
||||
# ---- Last-straggler recovery (single sheep circling near gate) ----
|
||||
gc = _gate_center()
|
||||
if n == 1:
|
||||
sx, sy = active[0]
|
||||
d_to_gate = math.hypot(sx - gc[0], sy - gc[1])
|
||||
if d_to_gate < RECOVERY_GATE_DIST:
|
||||
dx_g = sx - gc[0]
|
||||
dy_g = sy - gc[1]
|
||||
d_g = math.hypot(dx_g, dy_g)
|
||||
if d_g > 0.3:
|
||||
ux, uy = dx_g / d_g, dy_g / d_g
|
||||
else:
|
||||
ux, uy = 0.0, 1.0
|
||||
tx = sx + RECOVERY_PUSH_DIST * ux
|
||||
ty = sy + RECOVERY_PUSH_DIST * uy
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
mode = "recovery"
|
||||
face_target = (sx, sy)
|
||||
omega = 0.0
|
||||
if drive_mode == "mecanum":
|
||||
desired = math.atan2(
|
||||
face_target[1] - dog_xy[1],
|
||||
face_target[0] - dog_xy[0],
|
||||
)
|
||||
err = _angle_diff(desired, dog_heading)
|
||||
omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
|
||||
return ax, ay, omega, mode
|
||||
|
||||
# ---- Standard Strömbom collect/drive (proven core) ----
|
||||
if radius > F_FACTOR * math.sqrt(n):
|
||||
# Collect: aim behind the furthest sheep, opposite the CoM.
|
||||
idx = max(range(n), key=lambda i: dists[i])
|
||||
sx, sy = active[idx]
|
||||
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||
mode = "collect"
|
||||
face_target = (sx, sy)
|
||||
else:
|
||||
# Drive: aim behind the CoM, opposite the pen.
|
||||
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||
mode = "drive"
|
||||
face_target = pen_target
|
||||
|
||||
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||
|
||||
# ---- Omega (mecanum only) ----
|
||||
omega = 0.0
|
||||
if drive_mode == "mecanum" and mode != "idle":
|
||||
desired_heading = math.atan2(
|
||||
face_target[1] - dog_xy[1],
|
||||
face_target[0] - dog_xy[0],
|
||||
)
|
||||
err = _angle_diff(desired_heading, dog_heading)
|
||||
omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
|
||||
|
||||
return ax, ay, omega, mode
|
||||
|
||||
|
||||
def compute_action_diff(dog_xy, dog_heading, sheep_positions,
|
||||
pen_target=PEN_ENTRY):
|
||||
"""Compatibility wrapper returning ``(vx, vy, mode)`` — same as Strömbom.
|
||||
|
||||
Use this when plugging into existing differential-drive code that
|
||||
doesn't expect omega.
|
||||
"""
|
||||
vx, vy, _omega, mode = compute_action(
|
||||
dog_xy, dog_heading, sheep_positions, pen_target,
|
||||
drive_mode="differential",
|
||||
)
|
||||
return vx, vy, mode
|
||||
Reference in New Issue
Block a user