Checkpoint 3
This commit is contained in:
@@ -1,26 +1,24 @@
|
||||
"""Shepherd Dog controller (Webots).
|
||||
|
||||
Runs in one of two modes selected by the ``HERDING_MODE`` environment
|
||||
variable:
|
||||
Mode is selected by ``HERDING_MODE`` (env var, or via the
|
||||
``herding_runtime.cfg`` file the launcher writes since Webots strips
|
||||
env vars on some setups):
|
||||
|
||||
HERDING_MODE=rl → load an SB3 PPO policy from
|
||||
HERDING_POLICY_DIR (default
|
||||
training/runs/latest/best) and use its
|
||||
(vx, vy) action each step.
|
||||
HERDING_MODE=strombom → use the analytic Strömbom collect/drive
|
||||
heuristic. This is the fallback if the RL
|
||||
policy can't be loaded (e.g. SB3 not
|
||||
installed in the Webots Python env, or no
|
||||
checkpoint yet).
|
||||
rl → load a BC-trained SB3 policy from HERDING_POLICY_DIR
|
||||
and use its (vx, vy) action each step.
|
||||
strombom → canonical Strömbom collect/drive heuristic.
|
||||
sequential → single-target "pin and push" — drives the sheep
|
||||
closest to the pen.
|
||||
|
||||
Both modes share the same low-level differential-drive controller
|
||||
(``herding.diffdrive.velocity_to_wheels`` + clamped forward speed), so
|
||||
switching modes does not retune the actuation layer.
|
||||
All modes share the same low-level differential-drive controller
|
||||
(``herding.diffdrive.velocity_to_wheels`` with cos(err)-clamped forward
|
||||
speed), so switching modes does not retune actuation.
|
||||
|
||||
A safety supervisor enforces the "dog stays out of the pen" invariant:
|
||||
if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
|
||||
overridden with a north-driving correction. This is a hard guarantee
|
||||
the policy cannot escape.
|
||||
overridden with a north-driving correction. RL fallback: if the policy
|
||||
zip can't be loaded (SB3 missing, file missing), the controller drops
|
||||
to strombom mode automatically.
|
||||
"""
|
||||
|
||||
import math
|
||||
@@ -85,19 +83,21 @@ def _resolve_policy_dir() -> str:
|
||||
"""Where to look for the trained policy.
|
||||
|
||||
Priority:
|
||||
1. HERDING_POLICY_DIR env var (if set and points to a real dir)
|
||||
2. training/runs/bc_pretrained/ (BC-only checkpoint)
|
||||
3. training/runs/bc_ppo/best/ (PPO fine-tuned best)
|
||||
4. training/runs/latest/best/ (legacy default)
|
||||
1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
|
||||
to a real directory.
|
||||
2. ``training/runs/bc_flock`` — flock-style BC (current default;
|
||||
requires the tight-cohesion sheep regime).
|
||||
3. ``training/runs/bc_solo`` — single-target BC (1-by-1 style;
|
||||
only works if ``herding/flocking_sim.py`` is reverted to the
|
||||
loose-cohesion regime).
|
||||
"""
|
||||
env_dir = (os.environ.get("HERDING_POLICY_DIR")
|
||||
or _runtime_cfg.get("HERDING_POLICY_DIR"))
|
||||
if env_dir and os.path.isdir(env_dir):
|
||||
return env_dir
|
||||
candidates = [
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_pretrained"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_ppo", "best"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "latest", "best"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_flock"),
|
||||
os.path.join(_PROJECT_ROOT, "training", "runs", "bc_solo"),
|
||||
]
|
||||
for c in candidates:
|
||||
if os.path.isdir(c):
|
||||
@@ -106,30 +106,22 @@ def _resolve_policy_dir() -> str:
|
||||
return env_dir or candidates[0]
|
||||
|
||||
|
||||
POLICY_DIR = _resolve_policy_dir()
|
||||
_VALID_MODES = ("rl", "strombom", "sequential")
|
||||
if MODE not in _VALID_MODES:
|
||||
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
||||
MODE = "strombom"
|
||||
|
||||
POLICY_DIR = _resolve_policy_dir()
|
||||
policy_handle = None
|
||||
if MODE == "rl":
|
||||
print(f"[dog] HERDING_MODE={MODE} HERDING_POLICY_DIR(env)="
|
||||
f"{os.environ.get('HERDING_POLICY_DIR', '<unset>')}")
|
||||
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists="
|
||||
f"{os.path.isdir(POLICY_DIR)}")
|
||||
if os.path.isdir(POLICY_DIR):
|
||||
try:
|
||||
entries = sorted(os.listdir(POLICY_DIR))
|
||||
except OSError:
|
||||
entries = []
|
||||
print(f"[dog] dir contents: {entries}")
|
||||
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}")
|
||||
try:
|
||||
from policy_loader import load as _load_policy
|
||||
policy_handle = _load_policy(POLICY_DIR)
|
||||
print(f"[dog] RL policy loaded from {POLICY_DIR}")
|
||||
except Exception as exc:
|
||||
print(f"[dog] RL policy load failed ({exc!r}); falling back to Strömbom.")
|
||||
print(f"[dog] RL policy load failed ({exc!r}); falling back to strombom.")
|
||||
MODE = "strombom"
|
||||
if MODE not in ("rl", "strombom", "sequential"):
|
||||
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
||||
MODE = "strombom"
|
||||
print(f"[dog] running in mode={MODE}")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user