Checkpoint 8

This commit is contained in:
Johnny Fernandes
2026-05-12 22:41:03 +01:00
parent a01a5c9cef
commit 5c2ee4bba5
31 changed files with 3189 additions and 380 deletions
+136 -56
View File
@@ -49,37 +49,8 @@ _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
import numpy as np
from controller import Robot
from herding.control.active_scan import ActiveScanTeacher
from herding.control.modulation import modulate_speed_near_sheep
from herding.control.sequential import compute_action as sequential_action
from herding.control.strombom import compute_action as strombom_action
from herding.perception.obs import build_obs
from herding.perception.lidar_perception import detections_from_scan
from herding.perception.sheep_tracker import SheepTracker
from herding.world.diffdrive import velocity_to_wheels
from herding.world.geometry import (
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
PEN_ENTRY, is_penned_position,
)
# ---------------------------------------------------------------------------
# Mode + policy resolution
# ---------------------------------------------------------------------------
# --- Read runtime cfg early so env vars are set before geometry import ---
def _load_runtime_config():
"""Read mode + policy_dir overrides from a runtime config file.
Webots strips HERDING_* env vars in some configurations, so the
launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
in the project root and the controller reads it here. Env vars
win if both are present; the file is the fallback.
"""
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
if not os.path.exists(cfg_path):
return {}
@@ -96,8 +67,37 @@ def _load_runtime_config():
return {}
return out
_runtime_cfg = _load_runtime_config()
# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
for _rk, _rv in _runtime_cfg.items():
if _rk.startswith("HERDING_") and _rk not in os.environ:
os.environ[_rk] = _rv
import numpy as np
from controller import Robot
from herding.control.active_scan import ActiveScanTeacher
from herding.control.modulation import modulate_speed_near_sheep
from herding.control.sequential import compute_action as sequential_action
from herding.control.strombom import compute_action as strombom_action
from herding.control.universal import compute_action as universal_action
from herding.perception.obs import build_obs
from herding.perception.lidar_perception import detections_from_scan
from herding.perception.sheep_tracker import SheepTracker
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
from herding.world.geometry import (
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
PEN_ENTRY, is_penned_position,
)
# ---------------------------------------------------------------------------
# Mode + policy resolution (cfg already loaded above)
# ---------------------------------------------------------------------------
MODE = (os.environ.get("HERDING_MODE")
or _runtime_cfg.get("HERDING_MODE")
or "bc").lower()
@@ -109,31 +109,39 @@ def _resolve_policy_dir(mode: str) -> str:
Priority:
1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
to a real directory.
2. Mode-specific default:
bc → training/runs/bc (Strömbom-imitated MLP)
rl → training/runs/rl (KL-PPO fine-tune of bc)
3. Fall back to bc.
All checkpoints are frame-stacked K = 4; ``policy_loader`` reads
the stacking factor from the policy's observation space.
2. Drive-mode-specific default:
bc → training/runs/bc_differential (or bc_mecanum)
rl → training/runs/rl_differential (or rl_mecanum)
3. Legacy path (no drive suffix):
bc → training/runs/bc
rl → training/runs/rl
"""
env_dir = (os.environ.get("HERDING_POLICY_DIR")
or _runtime_cfg.get("HERDING_POLICY_DIR"))
if env_dir and os.path.isdir(env_dir):
return env_dir
drive = DRIVE_MODE
mode_default = {
"bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
"rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
"bc": os.path.join(_PROJECT_ROOT, "training", "runs",
f"bc_{drive}"),
"rl": os.path.join(_PROJECT_ROOT, "training", "runs",
f"rl_{drive}"),
}
primary = mode_default.get(mode, mode_default["bc"])
if os.path.isdir(primary):
return primary
fallback = mode_default["bc"]
# Fallback: legacy paths without drive suffix.
legacy = {
"bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
"rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
}
fallback = legacy.get(mode, legacy["bc"])
if os.path.isdir(fallback):
return fallback
return env_dir or primary
_VALID_MODES = ("bc", "rl", "strombom", "sequential")
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
if MODE not in _VALID_MODES:
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
MODE = "strombom"
@@ -151,6 +159,15 @@ if MODE in ("bc", "rl"):
MODE = "strombom"
print(f"[dog] running in mode={MODE}")
# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
or _runtime_cfg.get("HERDING_DRIVE")
or "differential").lower()
if DRIVE_MODE not in ("differential", "mecanum"):
print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
DRIVE_MODE = "differential"
print(f"[dog] drive mode={DRIVE_MODE}")
# ---------------------------------------------------------------------------
# Control parameters
@@ -171,7 +188,8 @@ def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
return (vx, vy)
def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
def drive_diff(vx: float, vy: float, left_motor, right_motor,
compass, motor_max: float):
if math.hypot(vx, vy) < 1e-3:
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
@@ -189,6 +207,32 @@ def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: flo
right_motor.setVelocity(right)
def drive_mecanum(vx: float, vy: float, omega: float,
fl_motor, fr_motor, rl_motor, rr_motor,
compass, motor_max: float):
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
fl_motor.setVelocity(0.0)
fr_motor.setVelocity(0.0)
rl_motor.setVelocity(0.0)
rr_motor.setVelocity(0.0)
return
n = compass.getValues()
h = math.atan2(n[0], n[1])
w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
vx, vy, omega, h,
max_linear=DOG_MAX_LINEAR,
wheel_radius=DOG_WHEEL_RADIUS,
lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
max_wheel_omega=motor_max,
k_turn=4.0,
wheel_base=DOG_WHEEL_BASE,
)
fl_motor.setVelocity(w_fl)
fr_motor.setVelocity(w_fr)
rl_motor.setVelocity(w_rl)
rr_motor.setVelocity(w_rr)
# ---------------------------------------------------------------------------
# Webots devices
# ---------------------------------------------------------------------------
@@ -196,13 +240,23 @@ def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: flo
robot = Robot()
timestep = int(robot.getBasicTimeStep())
left_motor = robot.getDevice("left wheel motor")
right_motor = robot.getDevice("right wheel motor")
left_motor.setPosition(float("inf"))
right_motor.setPosition(float("inf"))
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
if DRIVE_MODE == "mecanum":
fl_motor = robot.getDevice("front left wheel motor")
fr_motor = robot.getDevice("front right wheel motor")
rl_motor = robot.getDevice("rear left wheel motor")
rr_motor = robot.getDevice("rear right wheel motor")
for m in (fl_motor, fr_motor, rl_motor, rr_motor):
m.setPosition(float("inf"))
m.setVelocity(0.0)
MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
else:
left_motor = robot.getDevice("left wheel motor")
right_motor = robot.getDevice("right wheel motor")
left_motor.setPosition(float("inf"))
right_motor.setPosition(float("inf"))
left_motor.setVelocity(0.0)
right_motor.setVelocity(0.0)
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
gps = robot.getDevice("gps"); gps.enable(timestep)
compass = robot.getDevice("compass"); compass.enable(timestep)
@@ -235,13 +289,15 @@ analytic_teacher = None
if MODE in ("strombom", "sequential"):
base_fn = strombom_action if MODE == "strombom" else sequential_action
analytic_teacher = ActiveScanTeacher(base_fn)
elif MODE == "universal":
analytic_teacher = ActiveScanTeacher(universal_action)
# GT positions from sheep emitters — used **only** for the auto-finish
# sentinel and the GT_penned diagnostic line. Never fed into control.
_gt_sheep: dict = {}
_run_done = False
prev_action = (0.0, 0.0)
prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
step_count = 0
while robot.step(timestep) != -1:
@@ -273,26 +329,43 @@ while robot.step(timestep) != -1:
single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
# ---- Action selection ----
omega = 0.0
if MODE in ("bc", "rl") and policy_handle is not None:
action = policy_handle.predict(single_obs)
vx, vy = float(action[0]), float(action[1])
if DRIVE_MODE == "mecanum" and len(action) >= 3:
omega = float(action[2])
else:
vx, vy, _mode_str = analytic_teacher(
result = analytic_teacher(
dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
DRIVE_MODE,
)
if len(result) == 4:
vx, vy, omega, _mode_str = result
else:
vx, vy, _mode_str = result
# Near-sheep speed modulation (shared by every mode).
vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
# EMA smoothing — kills frame-to-frame action jitter.
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
if DRIVE_MODE == "mecanum":
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
else:
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
# Safety: dog must never enter the pen.
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
prev_action = (vx, vy)
prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)
drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
if DRIVE_MODE == "mecanum":
drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
compass, MOTOR_MAX)
else:
drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
# Cosmetic ear wiggle.
@@ -321,7 +394,14 @@ while robot.step(timestep) != -1:
gt_penned = sum(1 for x, y in _gt_sheep.values()
if is_penned_position(x, y))
gt_total = len(_gt_sheep)
print(f"[dog mode={MODE}] step={step_count} "
print(f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
f"GT_penned={gt_penned}/{gt_total} "
f"tracks_active={tracker.n_active()} "
f"tracks_penned={tracker.n_penned()} "
f"detections={len(detections)} "
f"action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})"
if DRIVE_MODE == "mecanum" else
f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
f"GT_penned={gt_penned}/{gt_total} "
f"tracks_active={tracker.n_active()} "
f"tracks_penned={tracker.n_penned()} "