Approach refinement

2026-04-26 12:59:04 +01:00
parent acf0810425
commit a44ddb7b08
17 changed files with 10593 additions and 194 deletions
@@ -0,0 +1,153 @@
 """
 Render Webots-side debug trajectory from debug.csv.
 The shepherd_dog_rl controller writes per-step state to debug.csv when
 DOG_DEBUG=1. This script reads it and produces:
  trajectory.png   — dog path + sheep paths overlaid on the field
  obs_drift.png    — normalized observation distribution over time
  actions.png      — vx, vy time series
 Run:
    python plot_debug.py                    # uses debug.csv next to this file
    python plot_debug.py --csv path/to.csv --out-dir somewhere/
 """
 import argparse
 import csv
 import os
 import sys
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 import numpy as np
 def load_csv(path):
    rows = []
    with open(path) as f:
        rd = csv.DictReader(f)
        for r in rd:
            rows.append(r)
    if not rows:
        sys.exit(f"empty CSV: {path}")
    return rows
 def parse_floats(s):
    return [float(x) for x in s.split(";") if x]
 def plot_trajectory(rows, out_path):
    fig, ax = plt.subplots(figsize=(7, 7))
    ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
    ax.set_facecolor("#dcedc8")
    ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
                 fill=False, edgecolor="#795548", lw=2))
    ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
                 facecolor="#ffe082", edgecolor="#795548", lw=2))
    ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
    dog_x = [float(r["dog_x"]) for r in rows]
    dog_y = [float(r["dog_y"]) for r in rows]
    ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
    ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
    ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
    # Sheep — re-shape into per-sheep tracks
    sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
    sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
    if sx_all and sx_all[-1]:
        n_sheep = len(sx_all[-1])
        palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
                   "#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
        for i in range(n_sheep):
            xs = [r[i] if i < len(r) else None for r in sx_all]
            ys = [r[i] if i < len(r) else None for r in sy_all]
            xs = [x for x in xs if x is not None]
            ys = [y for y in ys if y is not None]
            if xs:
                c = palette[i % len(palette)]
                ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
                ax.plot(xs[0], ys[0], "o", color=c, ms=6)
                ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
    n_in_pen = int(rows[-1]["n_penned"])
    ax.set_title(f"Webots trajectory  {len(rows)} steps  penned={n_in_pen}",
                 fontsize=12)
    ax.legend(loc="upper left", fontsize=7, ncol=2)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_actions(rows, out_path):
    t = np.arange(len(rows))
    vx = np.array([float(r["vx"]) for r in rows])
    vy = np.array([float(r["vy"]) for r in rows])
    mag = np.sqrt(vx ** 2 + vy ** 2)
    fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
    axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
    axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
    axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
    axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
    axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
    axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
    axes[2].axhline(1.0, color="gray", ls="--", lw=1)
    axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
    fig.suptitle("Webots action time series")
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_obs(rows, out_path):
    norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
    raw  = np.array([parse_floats(r["raw_obs"])  for r in rows])
    if norm.size == 0:
        return
    n_dims = norm.shape[1]
    labels = [
        "dog_x", "dog_y", "com-dog_x", "com-dog_y",
        "far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
        "far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
        "pen-far1_x", "pen-far1_y", "radius", "frac_active",
    ][:n_dims]
    t = np.arange(norm.shape[0])
    fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
    if n_dims == 1: axes = [axes]
    for i in range(n_dims):
        axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
        axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
        axes[i].set_ylabel(labels[i], fontsize=8)
        axes[i].tick_params(labelsize=7)
        if i == 0:
            axes[i].legend(fontsize=7, loc="upper right")
    axes[-1].set_xlabel("step")
    fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
    plt.tight_layout()
    fig.savefig(out_path, dpi=110)
    plt.close(fig)
 def main():
    p = argparse.ArgumentParser()
    here = os.path.dirname(os.path.abspath(__file__))
    p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
    p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
    args = p.parse_args()
    rows = load_csv(args.csv)
    os.makedirs(args.out_dir, exist_ok=True)
    print(f"loaded {len(rows)} rows from {args.csv}")
    plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
    plot_actions(rows,    os.path.join(args.out_dir, "actions.png"))
    plot_obs(rows,        os.path.join(args.out_dir, "obs.png"))
    print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
 if __name__ == "__main__":
    main()
@@ -4,7 +4,7 @@ Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
 Setup
 -----
 1. Copy your trained files into this directory:
-       controllers/shepherd_dog_rl/best_model.zip
+       controllers/shepherd_dog_rl/final_model.zip
       controllers/shepherd_dog_rl/vecnorm.pkl
 2. In field.wbt, set the ShepherdDog robot's controller field to
@@ -15,9 +15,15 @@ Setup
   from the default of 5.
 The controller reads GPS (dog position) and Receiver (sheep broadcasts),
-builds the same 13-dim flock observation the training env used, normalises
+builds the same 16-dim flock observation the training env used, normalises
 it with the saved VecNormalize stats, and converts the (vx, vy) policy
 output into differential wheel speeds.
 Debug logging
 -------------
 Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
 raw obs, normalised obs, action) to debug.csv alongside this script. Use
 plot_debug.py to render trajectories from it.
 """
 import sys
@@ -48,8 +54,10 @@ EAR_AMPLITUDE = 0.35
 EAR_RATE      = 8.0
 # ── model paths ─────────────────────────────────────────────────────────────
-MODEL_PATH  = os.path.join(_HERE, "best_model.zip")
+MODEL_PATH   = os.path.join(_HERE, "final_model.zip")
 VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
 DEBUG_CSV    = os.path.join(_HERE, "debug.csv")
 DEBUG_ENABLED = True   # set False to disable debug.csv logging
 def norm_angle(a: float) -> float:
@@ -148,13 +156,26 @@ vecnorm   = VecNormalize.load(VECNORM_PATH, dummy_env)
 vecnorm.training    = False
 vecnorm.norm_reward = False
-model = PPO.load(MODEL_PATH)
+model = PPO.load(MODEL_PATH, device="cpu")
 print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
 # ── Runtime state ─────────────────────────────────────────────────────────────
 sheep_positions: dict = {}   # {name: (x, y)} — updated every step from receiver
 step_count = 0
 # Debug CSV — written every step when DOG_DEBUG=1
 debug_file = None
 if DEBUG_ENABLED:
    import csv
    debug_file = open(DEBUG_CSV, "w", newline="")
    debug_writer = csv.writer(debug_file)
    debug_writer.writerow([
        "step", "dog_x", "dog_y", "heading",
        "sheep_xs", "sheep_ys", "n_active", "n_penned",
        "raw_obs", "norm_obs", "vx", "vy",
    ])
    print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
 def bearing() -> float:
    """Current robot heading in world frame (radians)."""
@@ -226,5 +247,22 @@ while robot.step(timestep) != -1:
    if step_count % 100 == 0:
        n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
        print(f"[RL dog] step={step_count}  known_sheep={len(sheep_positions)}"
-              f"  penned={n_in_pen}/{n_sheep}"
+              f"  penned={n_in_pen}/{n_sheep}  dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
              f"  action=({vx:.2f}, {vy:.2f})")
    # Debug CSV row
    if debug_file is not None:
        n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
        n_in_pen = len(sheep_positions) - n_active
        debug_writer.writerow([
            step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
            f"{bearing():.4f}",
            ";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
            ";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
            n_active, n_in_pen,
            ";".join(f"{x:.4f}" for x in raw_obs),
            ";".join(f"{x:.4f}" for x in obs_norm[0]),
            f"{vx:.4f}", f"{vy:.4f}",
        ])
        if step_count % 200 == 0:
            debug_file.flush()
@@ -5,7 +5,7 @@
    "W_COMPLETE": 100.0,
    "W_STEP_COST": 0.02,
    "W_COMPACT": 0.0,
-    "W_WALL_TOUCH": 0.01,
+    "W_WALL_TOUCH": 0.0,
    "WALL_TOUCH_BUFFER": 0.4,
    "ALIGN_SHAPE": "standoff",
    "ALIGN_GATED": true,
@@ -25,12 +25,7 @@ import os
 import time
 from copy import deepcopy
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 import numpy as np
 from matplotlib.collections import LineCollection
 from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import BaseCallback
 from stable_baselines3.common.vec_env import (
@@ -40,15 +35,13 @@ from stable_baselines3.common.vec_env import (
 )
 from herding_env import HerdingEnv
-
+from viz import (
-
+    run_and_record,
-# ── Colours ──────────────────────────────────────────────────────────────────
+    plot_trajectory,
-
+    plot_timeseries,
-SHEEP_COLORS = [
+    plot_success_rate,
-    "#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
+    save_episode_gif,
-    "#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
+)
 ]
 DOG_COLOR = "#4e342e"
 # ── Callbacks ────────────────────────────────────────────────────────────────
@@ -198,178 +191,6 @@ def evaluate(model, vn_template, n_sheep, n_episodes, max_steps,
    return result
 # ── Visualization helpers ────────────────────────────────────────────────────
 def _draw_field(ax):
    ax.set_xlim(-16, 16)
    ax.set_ylim(-16, 16)
    ax.set_aspect("equal")
    ax.set_facecolor("#dcedc8")
    ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
                 fill=False, edgecolor="#795548", lw=2))
    ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
                 facecolor="#ffe082", edgecolor="#795548", lw=2))
    ax.text(11.5, -11.5, "pen", ha="center", va="center",
            fontsize=8, color="#795548")
 def _faded_path(ax, xs, ys, color, lw=1.5, label=None):
    n = len(xs)
    if n < 2:
        return
    points = np.array([xs, ys]).T.reshape(-1, 1, 2)
    segs = np.concatenate([points[:-1], points[1:]], axis=1)
    alphas = np.linspace(0.15, 1.0, len(segs))
    colors = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
    ax.add_collection(LineCollection(segs, colors=colors, linewidth=lw))
    if label:
        ax.plot([], [], color=color, lw=lw, label=label)
 def run_and_record(model, vn_template, n_sheep, max_steps,
                   reward_cfg=None, seed=42):
    """Run one deterministic episode and return full history."""
    raw = DummyVecEnv([make_env(n_sheep, seed, max_steps, reward_cfg)])
    vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
    vn.obs_rms = deepcopy(vn_template.obs_rms)
    vn.ret_rms = deepcopy(vn_template.ret_rms)
    obs = vn.reset()
    inner = vn.envs[0]
    done = False
    dog_xs, dog_ys = [], []
    sheep_xs = [[] for _ in range(n_sheep)]
    sheep_ys = [[] for _ in range(n_sheep)]
    radii = []
    pen_dists = [[] for _ in range(n_sheep)]
    action_mags = []
    rewards = []
    penned_at = [None] * n_sheep
    step = 0
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, dones, infos = vn.step(action)
        done = dones[0]
        step += 1
        dog_xs.append(float(inner.dog_pos[0]))
        dog_ys.append(float(inner.dog_pos[1]))
        com, radius, _ = inner._flock_stats()
        radii.append(radius)
        rewards.append(float(reward[0]))
        action_mags.append(float(np.linalg.norm(action[0])))
        for i in range(n_sheep):
            sheep_xs[i].append(float(inner.sheep_pos[i][0]))
            sheep_ys[i].append(float(inner.sheep_pos[i][1]))
            pen_dists[i].append(
                float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
            if inner.penned[i] and penned_at[i] is None:
                penned_at[i] = step
    n_penned = infos[0].get("n_penned", 0)
    vn.close()
    return dict(
        dog_xs=dog_xs, dog_ys=dog_ys,
        sheep_xs=sheep_xs, sheep_ys=sheep_ys,
        radii=radii, pen_dists=pen_dists,
        action_mags=action_mags, rewards=rewards,
        penned_at=penned_at,
        n_penned=n_penned, n_sheep=n_sheep,
        success=n_penned == n_sheep, steps=step,
    )
 def plot_trajectory(hist, out_path):
    fig, ax = plt.subplots(figsize=(7, 7))
    _draw_field(ax)
    for i in range(hist["n_sheep"]):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
        _faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
        ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
        end = hist["penned_at"][i] if hist["penned_at"][i] is not None else -1
        ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
    _faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0,
                label="dog")
    ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR,
            ms=10, zorder=5)
    ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR,
            ms=10, zorder=5)
    result = ("SUCCESS" if hist["success"]
              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
    ax.set_title(f"n={hist['n_sheep']}  {result}  {hist['steps']} steps",
                 fontsize=12)
    ax.legend(loc="upper left", fontsize=8)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_timeseries(hist, out_path):
    t = np.arange(hist["steps"])
    fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
    axes[0].plot(t, hist["radii"], color="steelblue")
    axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact (5m)")
    axes[0].set_ylabel("flock radius (m)")
    axes[0].legend(fontsize=8)
    axes[0].set_title("Flock radius")
    for i in range(hist["n_sheep"]):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1,
                     label=f"sheep {i+1}")
        if hist["penned_at"][i] is not None:
            axes[1].axvline(hist["penned_at"][i], color=c, ls=":", lw=1)
    axes[1].set_ylabel("dist to pen (m)")
    axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
    axes[1].set_title("Per-sheep distance to pen")
    axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
    axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
    axes[2].set_ylabel("action ||(vx,vy)||")
    axes[2].set_ylim(0, 1.5)
    axes[2].set_title("Dog action magnitude")
    axes[2].legend(fontsize=8)
    axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
    axes[3].axhline(0, color="black", lw=0.5)
    axes[3].set_ylabel("reward")
    axes[3].set_xlabel("step")
    axes[3].set_title("Reward per step")
    result = ("SUCCESS" if hist["success"]
              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
    fig.suptitle(f"n_sheep={hist['n_sheep']}  {result}  {hist['steps']} steps",
                 fontsize=13)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_success_rate(stage_results, out_path):
    fig, ax = plt.subplots(figsize=(8, 4))
    ns = [r["n_sheep"] for r in stage_results]
    srs = [r["sr"] * 100 for r in stage_results]
    bars = ax.bar(ns, srs, color="steelblue", edgecolor="white")
    ax.set_xlabel("Sheep count")
    ax.set_ylabel("Success rate (%)")
    ax.set_ylim(0, 105)
    ax.axhline(90, color="orange", ls="--", lw=1, label="90% target")
    for bar, sr in zip(bars, srs):
        ax.text(bar.get_x() + bar.get_width() / 2,
                bar.get_height() + 1, f"{sr:.0f}%",
                ha="center", fontsize=9)
    ax.legend()
    ax.set_title("Evaluation success rate per sheep count")
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 # ── CLI ──────────────────────────────────────────────────────────────────────
@@ -400,6 +221,11 @@ def parse_args():
    p.add_argument("--max-steps", type=int, default=2500)
    p.add_argument("--eval-episodes", type=int, default=30)
    p.add_argument("--run-dir", type=str, default=None)
    p.add_argument("--no-gif", action="store_true",
                   help="Skip per-stage GIF rendering (PNGs still produced).")
    p.add_argument("--gif-fps", type=int, default=20)
    p.add_argument("--gif-skip", type=int, default=3,
                   help="Keep every Nth frame (smaller GIF; default 3).")
    return p.parse_args()
@@ -441,13 +267,15 @@ def main():
    vn = VecNormalize(train_env, norm_obs=True, norm_reward=True,
                      clip_obs=10.0)
-    # Model
+    # Model — force CPU (PPO with MLP runs faster on CPU than GPU; SB3 warns
    # about this otherwise).
    model = PPO(
        "MlpPolicy", vn,
        learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
        gamma=0.995, gae_lambda=0.95, clip_range=0.2,
        ent_coef=cfg.get("ent_coef", 0.02), vf_coef=0.5, max_grad_norm=0.5,
        policy_kwargs=dict(net_arch=[256, 256]),
        device="cpu",
        verbose=0,
    )
@@ -488,7 +316,7 @@ def main():
                print(f"  reward/step: " + "  ".join(
                    f"{k}={v:+.4f}" for k, v in rps.items()))
-            # Episode visualization
+            # Episode visualisation: trajectory + timeseries + animated GIF
            hist = run_and_record(model, vn, n, args.max_steps, rcfg,
                                  seed=1000 + n)
            tag = "success" if hist["success"] else "fail"
@@ -498,6 +326,11 @@ def main():
            plot_timeseries(
                hist,
                os.path.join(eval_dir, f"ts_{n}s_{tag}.png"))
            if not args.no_gif:
                save_episode_gif(
                    hist,
                    os.path.join(eval_dir, f"ep_{n}s_{tag}.gif"),
                    fps=args.gif_fps, skip=args.gif_skip)
            r["n_sheep"] = n
            stage_results.append(r)
@@ -0,0 +1,341 @@
 """
 All visualization for the herding policy: trajectory plots, timeseries plots,
 success-rate bar chart, and animated GIFs.
 Used both by train.py (auto-rendered after each curriculum stage) and as a CLI
 to render a fresh episode against a saved model.
 CLI usage:
    python viz.py --run-dir runs/v1 --n-sheep 5
    python viz.py --run-dir runs/v1 --n-sheep 10 --no-gif
    python viz.py --model runs/v1/final_model.zip --vecnorm runs/v1/vecnorm.pkl \\
        --n-sheep 3 --out-dir vis_v1_3sheep
 """
 import argparse
 import os
 import json
 from copy import deepcopy
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 import matplotlib.animation as animation
 from matplotlib.collections import LineCollection
 import numpy as np
 from stable_baselines3 import PPO
 from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
 from herding_env import HerdingEnv
 # ── Palette ──────────────────────────────────────────────────────────────────
 SHEEP_COLORS = [
    "#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
    "#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
 ]
 DOG_COLOR = "#4e342e"
 # ── Common drawing primitives ────────────────────────────────────────────────
 def draw_field(ax):
    ax.set_xlim(-16, 16)
    ax.set_ylim(-16, 16)
    ax.set_aspect("equal")
    ax.set_facecolor("#dcedc8")
    ax.add_patch(mpatches.Rectangle(
        (-15, -15), 30, 30, fill=False, edgecolor="#795548", lw=2))
    ax.add_patch(mpatches.Rectangle(
        (10, -15), 3, 7, facecolor="#ffe082", edgecolor="#795548", lw=2))
    ax.text(11.5, -11.5, "pen", ha="center", va="center",
            fontsize=8, color="#795548")
 def faded_path(ax, xs, ys, color, lw=1.5, label=None):
    n = len(xs)
    if n < 2:
        return
    points = np.array([xs, ys]).T.reshape(-1, 1, 2)
    segs = np.concatenate([points[:-1], points[1:]], axis=1)
    alphas = np.linspace(0.15, 1.0, len(segs))
    colors = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
    ax.add_collection(LineCollection(segs, colors=colors, linewidth=lw))
    if label:
        ax.plot([], [], color=color, lw=lw, label=label)
 # ── Episode rollout ──────────────────────────────────────────────────────────
 def make_eval_env(n_sheep, seed, max_steps, reward_cfg=None):
    def _init():
        env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
                         reward_cfg=reward_cfg)
        env.reset(seed=seed)
        return env
    return _init
 def run_and_record(model, vn_template, n_sheep, max_steps,
                   reward_cfg=None, seed=42):
    """Run one deterministic episode and return full trajectory history."""
    raw = DummyVecEnv([make_eval_env(n_sheep, seed, max_steps, reward_cfg)])
    vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
    vn.obs_rms = deepcopy(vn_template.obs_rms)
    vn.ret_rms = deepcopy(vn_template.ret_rms)
    obs = vn.reset()
    inner = vn.envs[0]
    done = False
    dog_xs, dog_ys = [], []
    sheep_xs = [[] for _ in range(n_sheep)]
    sheep_ys = [[] for _ in range(n_sheep)]
    sheep_penned = [[] for _ in range(n_sheep)]
    radii = []
    pen_dists = [[] for _ in range(n_sheep)]
    action_mags = []
    rewards = []
    penned_at = [None] * n_sheep
    step = 0
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, dones, infos = vn.step(action)
        done = dones[0]
        step += 1
        dog_xs.append(float(inner.dog_pos[0]))
        dog_ys.append(float(inner.dog_pos[1]))
        com, radius, _ = inner._flock_stats()
        radii.append(radius)
        rewards.append(float(reward[0]))
        action_mags.append(float(np.linalg.norm(action[0])))
        for i in range(n_sheep):
            sheep_xs[i].append(float(inner.sheep_pos[i][0]))
            sheep_ys[i].append(float(inner.sheep_pos[i][1]))
            sheep_penned[i].append(bool(inner.penned[i]))
            pen_dists[i].append(
                float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
            if inner.penned[i] and penned_at[i] is None:
                penned_at[i] = step
    n_penned = infos[0].get("n_penned", 0)
    vn.close()
    return dict(
        dog_xs=dog_xs, dog_ys=dog_ys,
        sheep_xs=sheep_xs, sheep_ys=sheep_ys,
        sheep_penned=sheep_penned,
        radii=radii, pen_dists=pen_dists,
        action_mags=action_mags, rewards=rewards,
        penned_at=penned_at,
        n_penned=n_penned, n_sheep=n_sheep,
        success=n_penned == n_sheep, steps=step,
    )
 # ── Static plots ─────────────────────────────────────────────────────────────
 def plot_trajectory(hist, out_path):
    fig, ax = plt.subplots(figsize=(7, 7))
    draw_field(ax)
    for i in range(hist["n_sheep"]):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
        faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
        ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
        end = hist["penned_at"][i] if hist["penned_at"][i] is not None else -1
        ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
    faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0,
               label="dog")
    ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR,
            ms=10, zorder=5)
    ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR,
            ms=10, zorder=5)
    result = ("SUCCESS" if hist["success"]
              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
    ax.set_title(f"n={hist['n_sheep']}  {result}  {hist['steps']} steps",
                 fontsize=12)
    ax.legend(loc="upper left", fontsize=8)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_timeseries(hist, out_path):
    t = np.arange(hist["steps"])
    fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
    axes[0].plot(t, hist["radii"], color="steelblue")
    axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact (5m)")
    axes[0].set_ylabel("flock radius (m)")
    axes[0].legend(fontsize=8)
    axes[0].set_title("Flock radius")
    for i in range(hist["n_sheep"]):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1,
                     label=f"sheep {i+1}")
        if hist["penned_at"][i] is not None:
            axes[1].axvline(hist["penned_at"][i], color=c, ls=":", lw=1)
    axes[1].set_ylabel("dist to pen (m)")
    axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
    axes[1].set_title("Per-sheep distance to pen")
    axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
    axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
    axes[2].set_ylabel("action ||(vx,vy)||")
    axes[2].set_ylim(0, 1.5)
    axes[2].set_title("Dog action magnitude")
    axes[2].legend(fontsize=8)
    axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
    axes[3].axhline(0, color="black", lw=0.5)
    axes[3].set_ylabel("reward")
    axes[3].set_xlabel("step")
    axes[3].set_title("Reward per step")
    result = ("SUCCESS" if hist["success"]
              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
    fig.suptitle(f"n_sheep={hist['n_sheep']}  {result}  {hist['steps']} steps",
                 fontsize=13)
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 def plot_success_rate(stage_results, out_path):
    fig, ax = plt.subplots(figsize=(8, 4))
    ns = [r["n_sheep"] for r in stage_results]
    srs = [r["sr"] * 100 for r in stage_results]
    bars = ax.bar(ns, srs, color="steelblue", edgecolor="white")
    ax.set_xlabel("Sheep count")
    ax.set_ylabel("Success rate (%)")
    ax.set_ylim(0, 105)
    ax.axhline(90, color="orange", ls="--", lw=1, label="90% target")
    for bar, sr in zip(bars, srs):
        ax.text(bar.get_x() + bar.get_width() / 2,
                bar.get_height() + 1, f"{sr:.0f}%",
                ha="center", fontsize=9)
    ax.legend()
    ax.set_title("Evaluation success rate per sheep count")
    plt.tight_layout()
    fig.savefig(out_path, dpi=120)
    plt.close(fig)
 # ── Animated GIF ─────────────────────────────────────────────────────────────
 def save_episode_gif(hist, out_path, fps=20, skip=3):
    """Render hist as an animated GIF. `skip` keeps every Nth frame (smaller file)."""
    n_sheep = hist["n_sheep"]
    frames = list(range(0, hist["steps"], max(1, skip)))
    if frames[-1] != hist["steps"] - 1:
        frames.append(hist["steps"] - 1)
    fig, ax = plt.subplots(figsize=(6, 6))
    draw_field(ax)
    title = ax.text(0, 16.5, "", ha="center", fontsize=11)
    dog_marker, = ax.plot([], [], "s", color=DOG_COLOR, ms=12,
                          markeredgecolor="black", markeredgewidth=1.5,
                          zorder=5)
    sheep_markers = []
    for i in range(n_sheep):
        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
        m, = ax.plot([], [], "o", color=c, ms=10,
                     markeredgecolor="#333", markeredgewidth=1, zorder=4)
        sheep_markers.append(m)
    dog_trail, = ax.plot([], [], color=DOG_COLOR, lw=1.0, alpha=0.5)
    def update(k):
        title.set_text(
            f"n={n_sheep}  step {k+1}/{hist['steps']}  "
            f"penned {sum(hist['sheep_penned'][i][k] for i in range(n_sheep))}/{n_sheep}")
        dog_marker.set_data([hist["dog_xs"][k]], [hist["dog_ys"][k]])
        dog_trail.set_data(hist["dog_xs"][:k+1], hist["dog_ys"][:k+1])
        for i, m in enumerate(sheep_markers):
            m.set_data([hist["sheep_xs"][i][k]], [hist["sheep_ys"][i][k]])
            penned = hist["sheep_penned"][i][k]
            m.set_color("deeppink" if penned else SHEEP_COLORS[i % len(SHEEP_COLORS)])
        return [title, dog_marker, dog_trail, *sheep_markers]
    anim = animation.FuncAnimation(
        fig, update, frames=frames, interval=1000 / fps, blit=False)
    anim.save(out_path, writer=animation.PillowWriter(fps=fps), dpi=80)
    plt.close(fig)
 # ── CLI ──────────────────────────────────────────────────────────────────────
 def _resolve_paths(args):
    if args.run_dir:
        model_path  = os.path.join(args.run_dir, "final_model.zip")
        vn_path     = os.path.join(args.run_dir, "vecnorm.pkl")
        cfg_path    = os.path.join(args.run_dir, "config.json")
    else:
        model_path  = args.model
        vn_path     = args.vecnorm
        cfg_path    = args.config
    return model_path, vn_path, cfg_path
 def main():
    p = argparse.ArgumentParser(
        description="Render trajectory + timeseries + GIF for a saved policy.")
    p.add_argument("--run-dir", type=str, default=None,
                   help="Run directory containing final_model.zip + vecnorm.pkl + config.json")
    p.add_argument("--model",   type=str, default=None)
    p.add_argument("--vecnorm", type=str, default=None)
    p.add_argument("--config",  type=str, default=None)
    p.add_argument("--n-sheep", type=int, default=3)
    p.add_argument("--seed",    type=int, default=42)
    p.add_argument("--max-steps", type=int, default=2500)
    p.add_argument("--out-dir", type=str, default=None)
    p.add_argument("--no-gif",  action="store_true",
                   help="Skip the animated GIF (PNG-only is faster).")
    p.add_argument("--gif-fps", type=int, default=20)
    p.add_argument("--gif-skip", type=int, default=3)
    args = p.parse_args()
    model_path, vn_path, cfg_path = _resolve_paths(args)
    if not (model_path and vn_path):
        p.error("either --run-dir or both --model and --vecnorm are required")
    rcfg = None
    if cfg_path and os.path.exists(cfg_path):
        with open(cfg_path) as f:
            cfg = json.load(f)
        rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
    out_dir = args.out_dir or os.path.join(
        os.path.dirname(os.path.abspath(model_path)),
        f"vis_{args.n_sheep}s")
    os.makedirs(out_dir, exist_ok=True)
    print(f"Loading model:   {model_path}")
    print(f"Loading vecnorm: {vn_path}")
    model = PPO.load(model_path, device="cpu")
    raw = DummyVecEnv([make_eval_env(args.n_sheep, args.seed, args.max_steps, rcfg)])
    vn = VecNormalize.load(vn_path, raw)
    print(f"Rolling out n_sheep={args.n_sheep} (seed={args.seed})...")
    hist = run_and_record(model, vn, args.n_sheep, args.max_steps,
                          reward_cfg=rcfg, seed=args.seed)
    result = "SUCCESS" if hist["success"] else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})"
    print(f"  {result} in {hist['steps']} steps")
    plot_trajectory(hist, os.path.join(out_dir, "trajectory.png"))
    plot_timeseries(hist, os.path.join(out_dir, "timeseries.png"))
    print(f"  saved trajectory.png + timeseries.png to {out_dir}/")
    if not args.no_gif:
        gif_path = os.path.join(out_dir, "episode.gif")
        print(f"  rendering GIF (fps={args.gif_fps}, skip={args.gif_skip})...")
        save_episode_gif(hist, gif_path, fps=args.gif_fps, skip=args.gif_skip)
        print(f"  saved {gif_path}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,9 @@
 Webots Project File version R2025a
 perspectives: 000000ff00000000fd00000002000000010000011c00000298fc0200000001fb0000001400540065007800740045006400690074006f00720100000000000002980000003f00ffffff000000030000084300000238fc0100000001fb0000001a0043006f006e0073006f006c00650041006c006c0041006c006c0100000000000008430000006900ffffff000007250000029800000001000000020000000100000008fc00000000
 simulationViewPerspectives: 000000ff000000010000000200000100000006250100000002010000000100
 sceneTreePerspectives: 000000ff00000001000000030000001f000000c0000000000100000002010000000200
 maximizedDockId: -1
 centralWidgetVisible: 1
 orthographicViewHeight: 1
 textFiles: -1
 consoles: Console:All:All