Approach refinement

2026-04-26 12:59:04 +01:00
parent acf0810425
commit a44ddb7b08
17 changed files with 10593 additions and 194 deletions
@@ -0,0 +1,153 @@
+"""
+Render Webots-side debug trajectory from debug.csv.
+
+The shepherd_dog_rl controller writes per-step state to debug.csv when
+DOG_DEBUG=1. This script reads it and produces:
+
+  trajectory.png   — dog path + sheep paths overlaid on the field
+  obs_drift.png    — normalized observation distribution over time
+  actions.png      — vx, vy time series
+
+Run:
+    python plot_debug.py                    # uses debug.csv next to this file
+    python plot_debug.py --csv path/to.csv --out-dir somewhere/
+"""
+import argparse
+import csv
+import os
+import sys
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+
+
+def load_csv(path):
+    rows = []
+    with open(path) as f:
+        rd = csv.DictReader(f)
+        for r in rd:
+            rows.append(r)
+    if not rows:
+        sys.exit(f"empty CSV: {path}")
+    return rows
+
+
+def parse_floats(s):
+    return [float(x) for x in s.split(";") if x]
+
+
+def plot_trajectory(rows, out_path):
+    fig, ax = plt.subplots(figsize=(7, 7))
+    ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
+    ax.set_facecolor("#dcedc8")
+    ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
+                 fill=False, edgecolor="#795548", lw=2))
+    ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
+                 facecolor="#ffe082", edgecolor="#795548", lw=2))
+    ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
+
+    dog_x = [float(r["dog_x"]) for r in rows]
+    dog_y = [float(r["dog_y"]) for r in rows]
+    ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
+    ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
+    ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
+
+    # Sheep — re-shape into per-sheep tracks
+    sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
+    sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
+    if sx_all and sx_all[-1]:
+        n_sheep = len(sx_all[-1])
+        palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
+                   "#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
+        for i in range(n_sheep):
+            xs = [r[i] if i < len(r) else None for r in sx_all]
+            ys = [r[i] if i < len(r) else None for r in sy_all]
+            xs = [x for x in xs if x is not None]
+            ys = [y for y in ys if y is not None]
+            if xs:
+                c = palette[i % len(palette)]
+                ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
+                ax.plot(xs[0], ys[0], "o", color=c, ms=6)
+                ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
+
+    n_in_pen = int(rows[-1]["n_penned"])
+    ax.set_title(f"Webots trajectory  {len(rows)} steps  penned={n_in_pen}",
+                 fontsize=12)
+    ax.legend(loc="upper left", fontsize=7, ncol=2)
+    plt.tight_layout()
+    fig.savefig(out_path, dpi=120)
+    plt.close(fig)
+
+
+def plot_actions(rows, out_path):
+    t = np.arange(len(rows))
+    vx = np.array([float(r["vx"]) for r in rows])
+    vy = np.array([float(r["vy"]) for r in rows])
+    mag = np.sqrt(vx ** 2 + vy ** 2)
+
+    fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
+    axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
+    axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
+    axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
+    axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
+    axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
+    axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
+    axes[2].axhline(1.0, color="gray", ls="--", lw=1)
+    axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
+    fig.suptitle("Webots action time series")
+    plt.tight_layout()
+    fig.savefig(out_path, dpi=120)
+    plt.close(fig)
+
+
+def plot_obs(rows, out_path):
+    norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
+    raw  = np.array([parse_floats(r["raw_obs"])  for r in rows])
+    if norm.size == 0:
+        return
+    n_dims = norm.shape[1]
+    labels = [
+        "dog_x", "dog_y", "com-dog_x", "com-dog_y",
+        "far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
+        "far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
+        "pen-far1_x", "pen-far1_y", "radius", "frac_active",
+    ][:n_dims]
+
+    t = np.arange(norm.shape[0])
+    fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
+    if n_dims == 1: axes = [axes]
+    for i in range(n_dims):
+        axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
+        axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
+        axes[i].set_ylabel(labels[i], fontsize=8)
+        axes[i].tick_params(labelsize=7)
+        if i == 0:
+            axes[i].legend(fontsize=7, loc="upper right")
+    axes[-1].set_xlabel("step")
+    fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
+    plt.tight_layout()
+    fig.savefig(out_path, dpi=110)
+    plt.close(fig)
+
+
+def main():
+    p = argparse.ArgumentParser()
+    here = os.path.dirname(os.path.abspath(__file__))
+    p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
+    p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
+    args = p.parse_args()
+
+    rows = load_csv(args.csv)
+    os.makedirs(args.out_dir, exist_ok=True)
+    print(f"loaded {len(rows)} rows from {args.csv}")
+    plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
+    plot_actions(rows,    os.path.join(args.out_dir, "actions.png"))
+    plot_obs(rows,        os.path.join(args.out_dir, "obs.png"))
+    print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
+
+
+if __name__ == "__main__":
+    main()
@@ -4,7 +4,7 @@ Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
 Setup
 -----
 1. Copy your trained files into this directory:
-       controllers/shepherd_dog_rl/best_model.zip
+       controllers/shepherd_dog_rl/final_model.zip
       controllers/shepherd_dog_rl/vecnorm.pkl

 2. In field.wbt, set the ShepherdDog robot's controller field to
@@ -15,9 +15,15 @@ Setup
   from the default of 5.

 The controller reads GPS (dog position) and Receiver (sheep broadcasts),
-builds the same 13-dim flock observation the training env used, normalises
+builds the same 16-dim flock observation the training env used, normalises
 it with the saved VecNormalize stats, and converts the (vx, vy) policy
 output into differential wheel speeds.
+
+Debug logging
+-------------
+Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
+raw obs, normalised obs, action) to debug.csv alongside this script. Use
+plot_debug.py to render trajectories from it.
 """

 import sys
@@ -48,8 +54,10 @@ EAR_AMPLITUDE = 0.35
 EAR_RATE      = 8.0

 # ── model paths ─────────────────────────────────────────────────────────────
-MODEL_PATH  = os.path.join(_HERE, "best_model.zip")
+MODEL_PATH   = os.path.join(_HERE, "final_model.zip")
 VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
+DEBUG_CSV    = os.path.join(_HERE, "debug.csv")
+DEBUG_ENABLED = True   # set False to disable debug.csv logging


 def norm_angle(a: float) -> float:
@@ -148,13 +156,26 @@ vecnorm   = VecNormalize.load(VECNORM_PATH, dummy_env)
 vecnorm.training    = False
 vecnorm.norm_reward = False

-model = PPO.load(MODEL_PATH)
+model = PPO.load(MODEL_PATH, device="cpu")
 print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")

 # ── Runtime state ─────────────────────────────────────────────────────────────
 sheep_positions: dict = {}   # {name: (x, y)} — updated every step from receiver
 step_count = 0

+# Debug CSV — written every step when DOG_DEBUG=1
+debug_file = None
+if DEBUG_ENABLED:
+    import csv
+    debug_file = open(DEBUG_CSV, "w", newline="")
+    debug_writer = csv.writer(debug_file)
+    debug_writer.writerow([
+        "step", "dog_x", "dog_y", "heading",
+        "sheep_xs", "sheep_ys", "n_active", "n_penned",
+        "raw_obs", "norm_obs", "vx", "vy",
+    ])
+    print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
+

 def bearing() -> float:
    """Current robot heading in world frame (radians)."""
@@ -226,5 +247,22 @@ while robot.step(timestep) != -1:
    if step_count % 100 == 0:
        n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
        print(f"[RL dog] step={step_count}  known_sheep={len(sheep_positions)}"
-              f"  penned={n_in_pen}/{n_sheep}"
+              f"  penned={n_in_pen}/{n_sheep}  dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
              f"  action=({vx:.2f}, {vy:.2f})")
+
+    # Debug CSV row
+    if debug_file is not None:
+        n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
+        n_in_pen = len(sheep_positions) - n_active
+        debug_writer.writerow([
+            step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
+            f"{bearing():.4f}",
+            ";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
+            ";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
+            n_active, n_in_pen,
+            ";".join(f"{x:.4f}" for x in raw_obs),
+            ";".join(f"{x:.4f}" for x in obs_norm[0]),
+            f"{vx:.4f}", f"{vy:.4f}",
+        ])
+        if step_count % 200 == 0:
+            debug_file.flush()