Checkpoint 3

This commit is contained in:
Johnny Fernandes
2026-05-10 12:46:14 +01:00
parent 1bb9415414
commit 2a6db038df
16 changed files with 305 additions and 662 deletions
+30 -38
View File
@@ -1,26 +1,24 @@
"""Shepherd Dog controller (Webots).
Runs in one of two modes selected by the ``HERDING_MODE`` environment
variable:
Mode is selected by ``HERDING_MODE`` (env var, or via the
``herding_runtime.cfg`` file the launcher writes since Webots strips
env vars on some setups):
HERDING_MODE=rl → load an SB3 PPO policy from
HERDING_POLICY_DIR (default
training/runs/latest/best) and use its
(vx, vy) action each step.
HERDING_MODE=strombom → use the analytic Strömbom collect/drive
heuristic. This is the fallback if the RL
policy can't be loaded (e.g. SB3 not
installed in the Webots Python env, or no
checkpoint yet).
rl → load a BC-trained SB3 policy from HERDING_POLICY_DIR
and use its (vx, vy) action each step.
strombom → canonical Strömbom collect/drive heuristic.
sequential → single-target "pin and push" — drives the sheep
closest to the pen.
Both modes share the same low-level differential-drive controller
(``herding.diffdrive.velocity_to_wheels`` + clamped forward speed), so
switching modes does not retune the actuation layer.
All modes share the same low-level differential-drive controller
(``herding.diffdrive.velocity_to_wheels`` with cos(err)-clamped forward
speed), so switching modes does not retune actuation.
A safety supervisor enforces the "dog stays out of the pen" invariant:
if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
overridden with a north-driving correction. This is a hard guarantee
the policy cannot escape.
overridden with a north-driving correction. RL fallback: if the policy
zip can't be loaded (SB3 missing, file missing), the controller drops
to strombom mode automatically.
"""
import math
@@ -85,19 +83,21 @@ def _resolve_policy_dir() -> str:
"""Where to look for the trained policy.
Priority:
1. HERDING_POLICY_DIR env var (if set and points to a real dir)
2. training/runs/bc_pretrained/ (BC-only checkpoint)
3. training/runs/bc_ppo/best/ (PPO fine-tuned best)
4. training/runs/latest/best/ (legacy default)
1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
to a real directory.
2. ``training/runs/bc_flock`` — flock-style BC (current default;
requires the tight-cohesion sheep regime).
3. ``training/runs/bc_solo`` — single-target BC (1-by-1 style;
only works if ``herding/flocking_sim.py`` is reverted to the
loose-cohesion regime).
"""
env_dir = (os.environ.get("HERDING_POLICY_DIR")
or _runtime_cfg.get("HERDING_POLICY_DIR"))
if env_dir and os.path.isdir(env_dir):
return env_dir
candidates = [
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_pretrained"),
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_ppo", "best"),
os.path.join(_PROJECT_ROOT, "training", "runs", "latest", "best"),
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_flock"),
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_solo"),
]
for c in candidates:
if os.path.isdir(c):
@@ -106,30 +106,22 @@ def _resolve_policy_dir() -> str:
return env_dir or candidates[0]
POLICY_DIR = _resolve_policy_dir()
_VALID_MODES = ("rl", "strombom", "sequential")
if MODE not in _VALID_MODES:
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
MODE = "strombom"
POLICY_DIR = _resolve_policy_dir()
policy_handle = None
if MODE == "rl":
print(f"[dog] HERDING_MODE={MODE} HERDING_POLICY_DIR(env)="
f"{os.environ.get('HERDING_POLICY_DIR', '<unset>')}")
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists="
f"{os.path.isdir(POLICY_DIR)}")
if os.path.isdir(POLICY_DIR):
try:
entries = sorted(os.listdir(POLICY_DIR))
except OSError:
entries = []
print(f"[dog] dir contents: {entries}")
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}")
try:
from policy_loader import load as _load_policy
policy_handle = _load_policy(POLICY_DIR)
print(f"[dog] RL policy loaded from {POLICY_DIR}")
except Exception as exc:
print(f"[dog] RL policy load failed ({exc!r}); falling back to Strömbom.")
print(f"[dog] RL policy load failed ({exc!r}); falling back to strombom.")
MODE = "strombom"
if MODE not in ("rl", "strombom", "sequential"):
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
MODE = "strombom"
print(f"[dog] running in mode={MODE}")