Checkpoint 8

2026-05-12 22:41:03 +01:00
parent a01a5c9cef
commit 5c2ee4bba5
31 changed files with 3189 additions and 380 deletions
@@ -49,37 +49,8 @@ _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
 if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

-import numpy as np
-
-from controller import Robot
-
-from herding.control.active_scan import ActiveScanTeacher
-from herding.control.modulation import modulate_speed_near_sheep
-from herding.control.sequential import compute_action as sequential_action
-from herding.control.strombom import compute_action as strombom_action
-from herding.perception.obs import build_obs
-from herding.perception.lidar_perception import detections_from_scan
-from herding.perception.sheep_tracker import SheepTracker
-from herding.world.diffdrive import velocity_to_wheels
-from herding.world.geometry import (
-    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
-    DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
-    PEN_ENTRY, is_penned_position,
-)
-
-
-# ---------------------------------------------------------------------------
-# Mode + policy resolution
-# ---------------------------------------------------------------------------
-
+# --- Read runtime cfg early so env vars are set before geometry import ---
 def _load_runtime_config():
-    """Read mode + policy_dir overrides from a runtime config file.
-
-    Webots strips HERDING_* env vars in some configurations, so the
-    launcher writes a tiny ``herding_runtime.cfg`` (key=value lines)
-    in the project root and the controller reads it here. Env vars
-    win if both are present; the file is the fallback.
-    """
    cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
    if not os.path.exists(cfg_path):
        return {}
@@ -96,8 +67,37 @@ def _load_runtime_config():
        return {}
    return out

-
 _runtime_cfg = _load_runtime_config()
+# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
+for _rk, _rv in _runtime_cfg.items():
+    if _rk.startswith("HERDING_") and _rk not in os.environ:
+        os.environ[_rk] = _rv
+
+import numpy as np
+
+from controller import Robot
+
+from herding.control.active_scan import ActiveScanTeacher
+from herding.control.modulation import modulate_speed_near_sheep
+from herding.control.sequential import compute_action as sequential_action
+from herding.control.strombom import compute_action as strombom_action
+from herding.control.universal import compute_action as universal_action
+from herding.perception.obs import build_obs
+from herding.perception.lidar_perception import detections_from_scan
+from herding.perception.sheep_tracker import SheepTracker
+from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
+from herding.world.geometry import (
+    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
+    DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X,
+    DOG_WHEEL_BASE_Y, DOG_WHEEL_RADIUS,
+    PEN_ENTRY, is_penned_position,
+)
+
+
+# ---------------------------------------------------------------------------
+# Mode + policy resolution (cfg already loaded above)
+# ---------------------------------------------------------------------------
+
 MODE = (os.environ.get("HERDING_MODE")
        or _runtime_cfg.get("HERDING_MODE")
        or "bc").lower()
@@ -109,31 +109,39 @@ def _resolve_policy_dir(mode: str) -> str:
    Priority:
      1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
         to a real directory.
-      2. Mode-specific default:
-            bc → training/runs/bc  (Strömbom-imitated MLP)
-            rl → training/runs/rl  (KL-PPO fine-tune of bc)
-      3. Fall back to bc.
-    All checkpoints are frame-stacked K = 4; ``policy_loader`` reads
-    the stacking factor from the policy's observation space.
+      2. Drive-mode-specific default:
+            bc → training/runs/bc_differential (or bc_mecanum)
+            rl → training/runs/rl_differential (or rl_mecanum)
+      3. Legacy path (no drive suffix):
+            bc → training/runs/bc
+            rl → training/runs/rl
    """
    env_dir = (os.environ.get("HERDING_POLICY_DIR")
               or _runtime_cfg.get("HERDING_POLICY_DIR"))
    if env_dir and os.path.isdir(env_dir):
        return env_dir
+    drive = DRIVE_MODE
    mode_default = {
-        "bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
-        "rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
+        "bc": os.path.join(_PROJECT_ROOT, "training", "runs",
+                           f"bc_{drive}"),
+        "rl": os.path.join(_PROJECT_ROOT, "training", "runs",
+                           f"rl_{drive}"),
    }
    primary = mode_default.get(mode, mode_default["bc"])
    if os.path.isdir(primary):
        return primary
-    fallback = mode_default["bc"]
+    # Fallback: legacy paths without drive suffix.
+    legacy = {
+        "bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
+        "rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
+    }
+    fallback = legacy.get(mode, legacy["bc"])
    if os.path.isdir(fallback):
        return fallback
    return env_dir or primary


-_VALID_MODES = ("bc", "rl", "strombom", "sequential")
+_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal")
 if MODE not in _VALID_MODES:
    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
    MODE = "strombom"
@@ -151,6 +159,15 @@ if MODE in ("bc", "rl"):
        MODE = "strombom"
 print(f"[dog] running in mode={MODE}")

+# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
+DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
+              or _runtime_cfg.get("HERDING_DRIVE")
+              or "differential").lower()
+if DRIVE_MODE not in ("differential", "mecanum"):
+    print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
+    DRIVE_MODE = "differential"
+print(f"[dog] drive mode={DRIVE_MODE}")
+

 # ---------------------------------------------------------------------------
 # Control parameters
@@ -171,7 +188,8 @@ def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
    return (vx, vy)


-def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
+def drive_diff(vx: float, vy: float, left_motor, right_motor,
+               compass, motor_max: float):
    if math.hypot(vx, vy) < 1e-3:
        left_motor.setVelocity(0.0)
        right_motor.setVelocity(0.0)
@@ -189,6 +207,32 @@ def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: flo
    right_motor.setVelocity(right)


+def drive_mecanum(vx: float, vy: float, omega: float,
+                  fl_motor, fr_motor, rl_motor, rr_motor,
+                  compass, motor_max: float):
+    if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
+        fl_motor.setVelocity(0.0)
+        fr_motor.setVelocity(0.0)
+        rl_motor.setVelocity(0.0)
+        rr_motor.setVelocity(0.0)
+        return
+    n = compass.getValues()
+    h = math.atan2(n[0], n[1])
+    w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
+        vx, vy, omega, h,
+        max_linear=DOG_MAX_LINEAR,
+        wheel_radius=DOG_WHEEL_RADIUS,
+        lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
+        max_wheel_omega=motor_max,
+        k_turn=4.0,
+        wheel_base=DOG_WHEEL_BASE,
+    )
+    fl_motor.setVelocity(w_fl)
+    fr_motor.setVelocity(w_fr)
+    rl_motor.setVelocity(w_rl)
+    rr_motor.setVelocity(w_rr)
+
+
 # ---------------------------------------------------------------------------
 # Webots devices
 # ---------------------------------------------------------------------------
@@ -196,13 +240,23 @@ def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: flo
 robot = Robot()
 timestep = int(robot.getBasicTimeStep())

-left_motor = robot.getDevice("left wheel motor")
-right_motor = robot.getDevice("right wheel motor")
-left_motor.setPosition(float("inf"))
-right_motor.setPosition(float("inf"))
-left_motor.setVelocity(0.0)
-right_motor.setVelocity(0.0)
-MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
+if DRIVE_MODE == "mecanum":
+    fl_motor = robot.getDevice("front left wheel motor")
+    fr_motor = robot.getDevice("front right wheel motor")
+    rl_motor = robot.getDevice("rear left wheel motor")
+    rr_motor = robot.getDevice("rear right wheel motor")
+    for m in (fl_motor, fr_motor, rl_motor, rr_motor):
+        m.setPosition(float("inf"))
+        m.setVelocity(0.0)
+    MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
+else:
+    left_motor = robot.getDevice("left wheel motor")
+    right_motor = robot.getDevice("right wheel motor")
+    left_motor.setPosition(float("inf"))
+    right_motor.setPosition(float("inf"))
+    left_motor.setVelocity(0.0)
+    right_motor.setVelocity(0.0)
+    MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)

 gps = robot.getDevice("gps");           gps.enable(timestep)
 compass = robot.getDevice("compass");   compass.enable(timestep)
@@ -235,13 +289,15 @@ analytic_teacher = None
 if MODE in ("strombom", "sequential"):
    base_fn = strombom_action if MODE == "strombom" else sequential_action
    analytic_teacher = ActiveScanTeacher(base_fn)
+elif MODE == "universal":
+    analytic_teacher = ActiveScanTeacher(universal_action)

 # GT positions from sheep emitters — used **only** for the auto-finish
 # sentinel and the GT_penned diagnostic line. Never fed into control.
 _gt_sheep: dict = {}
 _run_done = False

-prev_action = (0.0, 0.0)
+prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
 step_count = 0

 while robot.step(timestep) != -1:
@@ -273,26 +329,43 @@ while robot.step(timestep) != -1:
    single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)

    # ---- Action selection ----
+    omega = 0.0
    if MODE in ("bc", "rl") and policy_handle is not None:
        action = policy_handle.predict(single_obs)
        vx, vy = float(action[0]), float(action[1])
+        if DRIVE_MODE == "mecanum" and len(action) >= 3:
+            omega = float(action[2])
    else:
-        vx, vy, _mode_str = analytic_teacher(
+        result = analytic_teacher(
            dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
+            DRIVE_MODE,
        )
+        if len(result) == 4:
+            vx, vy, omega, _mode_str = result
+        else:
+            vx, vy, _mode_str = result

    # Near-sheep speed modulation (shared by every mode).
    vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)

    # EMA smoothing — kills frame-to-frame action jitter.
-    vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
-    vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
+    if DRIVE_MODE == "mecanum":
+        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
+        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
+        omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
+    else:
+        vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
+        vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy

    # Safety: dog must never enter the pen.
    vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
-    prev_action = (vx, vy)
+    prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)

-    drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
+    if DRIVE_MODE == "mecanum":
+        drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
+                      compass, MOTOR_MAX)
+    else:
+        drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
    emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")

    # Cosmetic ear wiggle.
@@ -321,7 +394,14 @@ while robot.step(timestep) != -1:
        gt_penned = sum(1 for x, y in _gt_sheep.values()
                        if is_penned_position(x, y))
        gt_total = len(_gt_sheep)
-        print(f"[dog mode={MODE}] step={step_count} "
+        print(f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
+              f"GT_penned={gt_penned}/{gt_total} "
+              f"tracks_active={tracker.n_active()} "
+              f"tracks_penned={tracker.n_penned()} "
+              f"detections={len(detections)} "
+              f"action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})"
+              if DRIVE_MODE == "mecanum" else
+              f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
              f"GT_penned={gt_penned}/{gt_total} "
              f"tracks_active={tracker.n_active()} "
              f"tracks_penned={tracker.n_penned()} "