Checkpoint 8
This commit is contained in:
@@ -49,37 +49,8 @@ _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||
if _PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, _PROJECT_ROOT)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from controller import Robot
|
||||
|
||||
from herding.control.active_scan import ActiveScanTeacher
|
||||
from herding.control.modulation import modulate_speed_near_sheep
|
||||
from herding.control.sequential import compute_action as sequential_action
|
||||
from herding.control.strombom import compute_action as strombom_action
|
||||
from herding.perception.obs import build_obs
|
||||
from herding.perception.lidar_perception import detections_from_scan
|
||||
from herding.perception.sheep_tracker import SheepTracker
|
||||
from herding.world.diffdrive import velocity_to_wheels
|
||||
from herding.world.geometry import (
|
||||
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
|
||||
DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
|
||||
PEN_ENTRY, is_penned_position,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mode + policy resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# --- Read runtime cfg early so env vars are set before geometry import ---
|
||||
def _load_runtime_config():
|
||||
"""Read mode + policy_dir overrides from a runtime config file.
|
||||
|
||||
Webots strips HERDING_* env vars in some configurations, so the
|
||||
launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
|
||||
in the project root and the controller reads it here. Env vars
|
||||
win if both are present; the file is the fallback.
|
||||
"""
|
||||
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
|
||||
if not os.path.exists(cfg_path):
|
||||
return {}
|
||||
@@ -96,8 +67,37 @@ def _load_runtime_config():
|
||||
return {}
|
||||
return out
|
||||
|
||||
|
||||
_runtime_cfg = _load_runtime_config()
|
||||
# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
|
||||
for _rk, _rv in _runtime_cfg.items():
|
||||
if _rk.startswith("HERDING_") and _rk not in os.environ:
|
||||
os.environ[_rk] = _rv
|
||||
|
||||
import numpy as np
|
||||
|
||||
from controller import Robot
|
||||
|
||||
from herding.control.active_scan import ActiveScanTeacher
|
||||
from herding.control.modulation import modulate_speed_near_sheep
|
||||
from herding.control.sequential import compute_action as sequential_action
|
||||
from herding.control.strombom import compute_action as strombom_action
|
||||
from herding.control.universal import compute_action as universal_action
|
||||
from herding.perception.obs import build_obs
|
||||
from herding.perception.lidar_perception import detections_from_scan
|
||||
from herding.perception.sheep_tracker import SheepTracker
|
||||
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
|
||||
from herding.world.geometry import (
|
||||
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
|
||||
DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
|
||||
DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
|
||||
PEN_ENTRY, is_penned_position,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mode + policy resolution (cfg already loaded above)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MODE = (os.environ.get("HERDING_MODE")
|
||||
or _runtime_cfg.get("HERDING_MODE")
|
||||
or "bc").lower()
|
||||
@@ -109,31 +109,39 @@ def _resolve_policy_dir(mode: str) -> str:
|
||||
Priority:
|
||||
1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
|
||||
to a real directory.
|
||||
2. Mode-specific default:
|
||||
bc → training/runs/bc (Strömbom-imitated MLP)
|
||||
rl → training/runs/rl (KL-PPO fine-tune of bc)
|
||||
3. Fall back to bc.
|
||||
All checkpoints are frame-stacked K = 4; ``policy_loader`` reads
|
||||
the stacking factor from the policy's observation space.
|
||||
2. Drive-mode-specific default:
|
||||
bc → training/runs/bc_differential (or bc_mecanum)
|
||||
rl → training/runs/rl_differential (or rl_mecanum)
|
||||
3. Legacy path (no drive suffix):
|
||||
bc → training/runs/bc
|
||||
rl → training/runs/rl
|
||||
"""
|
||||
env_dir = (os.environ.get("HERDING_POLICY_DIR")
|
||||
or _runtime_cfg.get("HERDING_POLICY_DIR"))
|
||||
if env_dir and os.path.isdir(env_dir):
|
||||
return env_dir
|
||||
drive = DRIVE_MODE
|
||||
mode_default = {
|
||||
"bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
|
||||
"rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
|
||||
"bc": os.path.join(_PROJECT_ROOT, "training", "runs",
|
||||
f"bc_{drive}"),
|
||||
"rl": os.path.join(_PROJECT_ROOT, "training", "runs",
|
||||
f"rl_{drive}"),
|
||||
}
|
||||
primary = mode_default.get(mode, mode_default["bc"])
|
||||
if os.path.isdir(primary):
|
||||
return primary
|
||||
fallback = mode_default["bc"]
|
||||
# Fallback: legacy paths without drive suffix.
|
||||
legacy = {
|
||||
"bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
|
||||
"rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
|
||||
}
|
||||
fallback = legacy.get(mode, legacy["bc"])
|
||||
if os.path.isdir(fallback):
|
||||
return fallback
|
||||
return env_dir or primary
|
||||
|
||||
|
||||
_VALID_MODES = ("bc", "rl", "strombom", "sequential")
|
||||
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
|
||||
if MODE not in _VALID_MODES:
|
||||
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
||||
MODE = "strombom"
|
||||
@@ -151,6 +159,15 @@ if MODE in ("bc", "rl"):
|
||||
MODE = "strombom"
|
||||
print(f"[dog] running in mode={MODE}")
|
||||
|
||||
# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
|
||||
DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
|
||||
or _runtime_cfg.get("HERDING_DRIVE")
|
||||
or "differential").lower()
|
||||
if DRIVE_MODE not in ("differential", "mecanum"):
|
||||
print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
|
||||
DRIVE_MODE = "differential"
|
||||
print(f"[dog] drive mode={DRIVE_MODE}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Control parameters
|
||||
@@ -171,7 +188,8 @@ def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
|
||||
return (vx, vy)
|
||||
|
||||
|
||||
def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
|
||||
def drive_diff(vx: float, vy: float, left_motor, right_motor,
|
||||
compass, motor_max: float):
|
||||
if math.hypot(vx, vy) < 1e-3:
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
@@ -189,6 +207,32 @@ def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: flo
|
||||
right_motor.setVelocity(right)
|
||||
|
||||
|
||||
def drive_mecanum(vx: float, vy: float, omega: float,
|
||||
fl_motor, fr_motor, rl_motor, rr_motor,
|
||||
compass, motor_max: float):
|
||||
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
|
||||
fl_motor.setVelocity(0.0)
|
||||
fr_motor.setVelocity(0.0)
|
||||
rl_motor.setVelocity(0.0)
|
||||
rr_motor.setVelocity(0.0)
|
||||
return
|
||||
n = compass.getValues()
|
||||
h = math.atan2(n[0], n[1])
|
||||
w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
|
||||
vx, vy, omega, h,
|
||||
max_linear=DOG_MAX_LINEAR,
|
||||
wheel_radius=DOG_WHEEL_RADIUS,
|
||||
lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
|
||||
max_wheel_omega=motor_max,
|
||||
k_turn=4.0,
|
||||
wheel_base=DOG_WHEEL_BASE,
|
||||
)
|
||||
fl_motor.setVelocity(w_fl)
|
||||
fr_motor.setVelocity(w_fr)
|
||||
rl_motor.setVelocity(w_rl)
|
||||
rr_motor.setVelocity(w_rr)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Webots devices
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -196,13 +240,23 @@ def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: flo
|
||||
robot = Robot()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
right_motor = robot.getDevice("right wheel motor")
|
||||
left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
||||
if DRIVE_MODE == "mecanum":
|
||||
fl_motor = robot.getDevice("front left wheel motor")
|
||||
fr_motor = robot.getDevice("front right wheel motor")
|
||||
rl_motor = robot.getDevice("rear left wheel motor")
|
||||
rr_motor = robot.getDevice("rear right wheel motor")
|
||||
for m in (fl_motor, fr_motor, rl_motor, rr_motor):
|
||||
m.setPosition(float("inf"))
|
||||
m.setVelocity(0.0)
|
||||
MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
||||
else:
|
||||
left_motor = robot.getDevice("left wheel motor")
|
||||
right_motor = robot.getDevice("right wheel motor")
|
||||
left_motor.setPosition(float("inf"))
|
||||
right_motor.setPosition(float("inf"))
|
||||
left_motor.setVelocity(0.0)
|
||||
right_motor.setVelocity(0.0)
|
||||
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
||||
|
||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||
@@ -235,13 +289,15 @@ analytic_teacher = None
|
||||
if MODE in ("strombom", "sequential"):
|
||||
base_fn = strombom_action if MODE == "strombom" else sequential_action
|
||||
analytic_teacher = ActiveScanTeacher(base_fn)
|
||||
elif MODE == "universal":
|
||||
analytic_teacher = ActiveScanTeacher(universal_action)
|
||||
|
||||
# GT positions from sheep emitters — used **only** for the auto-finish
|
||||
# sentinel and the GT_penned diagnostic line. Never fed into control.
|
||||
_gt_sheep: dict = {}
|
||||
_run_done = False
|
||||
|
||||
prev_action = (0.0, 0.0)
|
||||
prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
|
||||
step_count = 0
|
||||
|
||||
while robot.step(timestep) != -1:
|
||||
@@ -273,26 +329,43 @@ while robot.step(timestep) != -1:
|
||||
single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
|
||||
|
||||
# ---- Action selection ----
|
||||
omega = 0.0
|
||||
if MODE in ("bc", "rl") and policy_handle is not None:
|
||||
action = policy_handle.predict(single_obs)
|
||||
vx, vy = float(action[0]), float(action[1])
|
||||
if DRIVE_MODE == "mecanum" and len(action) >= 3:
|
||||
omega = float(action[2])
|
||||
else:
|
||||
vx, vy, _mode_str = analytic_teacher(
|
||||
result = analytic_teacher(
|
||||
dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
|
||||
DRIVE_MODE,
|
||||
)
|
||||
if len(result) == 4:
|
||||
vx, vy, omega, _mode_str = result
|
||||
else:
|
||||
vx, vy, _mode_str = result
|
||||
|
||||
# Near-sheep speed modulation (shared by every mode).
|
||||
vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
|
||||
|
||||
# EMA smoothing — kills frame-to-frame action jitter.
|
||||
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
||||
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
||||
if DRIVE_MODE == "mecanum":
|
||||
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
||||
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
||||
omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
|
||||
else:
|
||||
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
||||
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
||||
|
||||
# Safety: dog must never enter the pen.
|
||||
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
|
||||
prev_action = (vx, vy)
|
||||
prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)
|
||||
|
||||
drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
|
||||
if DRIVE_MODE == "mecanum":
|
||||
drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
|
||||
compass, MOTOR_MAX)
|
||||
else:
|
||||
drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
|
||||
emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
|
||||
|
||||
# Cosmetic ear wiggle.
|
||||
@@ -321,7 +394,14 @@ while robot.step(timestep) != -1:
|
||||
gt_penned = sum(1 for x, y in _gt_sheep.values()
|
||||
if is_penned_position(x, y))
|
||||
gt_total = len(_gt_sheep)
|
||||
print(f"[dog mode={MODE}] step={step_count} "
|
||||
print(f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
|
||||
f"GT_penned={gt_penned}/{gt_total} "
|
||||
f"tracks_active={tracker.n_active()} "
|
||||
f"tracks_penned={tracker.n_penned()} "
|
||||
f"detections={len(detections)} "
|
||||
f"action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})"
|
||||
if DRIVE_MODE == "mecanum" else
|
||||
f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
|
||||
f"GT_penned={gt_penned}/{gt_total} "
|
||||
f"tracks_active={tracker.n_active()} "
|
||||
f"tracks_penned={tracker.n_penned()} "
|
||||
|
||||
Reference in New Issue
Block a user