Approach refinement

2026-04-26 12:59:04 +01:00
parent acf0810425
commit a44ddb7b08
17 changed files with 10593 additions and 194 deletions
@@ -4,7 +4,7 @@ Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
 Setup
 -----
 1. Copy your trained files into this directory:
-       controllers/shepherd_dog_rl/best_model.zip
+       controllers/shepherd_dog_rl/final_model.zip
       controllers/shepherd_dog_rl/vecnorm.pkl

 2. In field.wbt, set the ShepherdDog robot's controller field to
@@ -15,9 +15,15 @@ Setup
   from the default of 5.

 The controller reads GPS (dog position) and Receiver (sheep broadcasts),
-builds the same 13-dim flock observation the training env used, normalises
+builds the same 16-dim flock observation the training env used, normalises
 it with the saved VecNormalize stats, and converts the (vx, vy) policy
 output into differential wheel speeds.
+
+Debug logging
+-------------
+Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
+raw obs, normalised obs, action) to debug.csv alongside this script. Use
+plot_debug.py to render trajectories from it.
 """

 import sys
@@ -48,8 +54,10 @@ EAR_AMPLITUDE = 0.35
 EAR_RATE      = 8.0

 # ── model paths ─────────────────────────────────────────────────────────────
-MODEL_PATH  = os.path.join(_HERE, "best_model.zip")
+MODEL_PATH   = os.path.join(_HERE, "final_model.zip")
 VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
+DEBUG_CSV    = os.path.join(_HERE, "debug.csv")
+DEBUG_ENABLED = True   # set False to disable debug.csv logging


 def norm_angle(a: float) -> float:
@@ -148,13 +156,26 @@ vecnorm   = VecNormalize.load(VECNORM_PATH, dummy_env)
 vecnorm.training    = False
 vecnorm.norm_reward = False

-model = PPO.load(MODEL_PATH)
+model = PPO.load(MODEL_PATH, device="cpu")
 print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")

 # ── Runtime state ─────────────────────────────────────────────────────────────
 sheep_positions: dict = {}   # {name: (x, y)} — updated every step from receiver
 step_count = 0

+# Debug CSV — written every step when DOG_DEBUG=1
+debug_file = None
+if DEBUG_ENABLED:
+    import csv
+    debug_file = open(DEBUG_CSV, "w", newline="")
+    debug_writer = csv.writer(debug_file)
+    debug_writer.writerow([
+        "step", "dog_x", "dog_y", "heading",
+        "sheep_xs", "sheep_ys", "n_active", "n_penned",
+        "raw_obs", "norm_obs", "vx", "vy",
+    ])
+    print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
+

 def bearing() -> float:
    """Current robot heading in world frame (radians)."""
@@ -226,5 +247,22 @@ while robot.step(timestep) != -1:
    if step_count % 100 == 0:
        n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
        print(f"[RL dog] step={step_count}  known_sheep={len(sheep_positions)}"
-              f"  penned={n_in_pen}/{n_sheep}"
+              f"  penned={n_in_pen}/{n_sheep}  dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
              f"  action=({vx:.2f}, {vy:.2f})")
+
+    # Debug CSV row
+    if debug_file is not None:
+        n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
+        n_in_pen = len(sheep_positions) - n_active
+        debug_writer.writerow([
+            step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
+            f"{bearing():.4f}",
+            ";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
+            ";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
+            n_active, n_in_pen,
+            ";".join(f"{x:.4f}" for x in raw_obs),
+            ";".join(f"{x:.4f}" for x in obs_norm[0]),
+            f"{vx:.4f}", f"{vy:.4f}",
+        ])
+        if step_count % 200 == 0:
+            debug_file.flush()