Approach refinement

2026-04-26 12:59:04 +01:00
parent acf0810425
commit a44ddb7b08
17 changed files with 10593 additions and 194 deletions
@@ -25,12 +25,7 @@ import os
 import time
 from copy import deepcopy

-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-import matplotlib.patches as mpatches
 import numpy as np
-from matplotlib.collections import LineCollection
 from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import BaseCallback
 from stable_baselines3.common.vec_env import (
@@ -40,15 +35,13 @@ from stable_baselines3.common.vec_env import (
 )

 from herding_env import HerdingEnv
-
-
-# ── Colours ──────────────────────────────────────────────────────────────────
-
-SHEEP_COLORS = [
-    "#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
-    "#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
-]
-DOG_COLOR = "#4e342e"
+from viz import (
+    run_and_record,
+    plot_trajectory,
+    plot_timeseries,
+    plot_success_rate,
+    save_episode_gif,
+)


 # ── Callbacks ────────────────────────────────────────────────────────────────
@@ -198,178 +191,6 @@ def evaluate(model, vn_template, n_sheep, n_episodes, max_steps,
    return result


-# ── Visualization helpers ────────────────────────────────────────────────────
-
-def _draw_field(ax):
-    ax.set_xlim(-16, 16)
-    ax.set_ylim(-16, 16)
-    ax.set_aspect("equal")
-    ax.set_facecolor("#dcedc8")
-    ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
-                 fill=False, edgecolor="#795548", lw=2))
-    ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
-                 facecolor="#ffe082", edgecolor="#795548", lw=2))
-    ax.text(11.5, -11.5, "pen", ha="center", va="center",
-            fontsize=8, color="#795548")
-
-
-def _faded_path(ax, xs, ys, color, lw=1.5, label=None):
-    n = len(xs)
-    if n < 2:
-        return
-    points = np.array([xs, ys]).T.reshape(-1, 1, 2)
-    segs = np.concatenate([points[:-1], points[1:]], axis=1)
-    alphas = np.linspace(0.15, 1.0, len(segs))
-    colors = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
-    ax.add_collection(LineCollection(segs, colors=colors, linewidth=lw))
-    if label:
-        ax.plot([], [], color=color, lw=lw, label=label)
-
-
-def run_and_record(model, vn_template, n_sheep, max_steps,
-                   reward_cfg=None, seed=42):
-    """Run one deterministic episode and return full history."""
-    raw = DummyVecEnv([make_env(n_sheep, seed, max_steps, reward_cfg)])
-    vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
-    vn.obs_rms = deepcopy(vn_template.obs_rms)
-    vn.ret_rms = deepcopy(vn_template.ret_rms)
-
-    obs = vn.reset()
-    inner = vn.envs[0]
-    done = False
-
-    dog_xs, dog_ys = [], []
-    sheep_xs = [[] for _ in range(n_sheep)]
-    sheep_ys = [[] for _ in range(n_sheep)]
-    radii = []
-    pen_dists = [[] for _ in range(n_sheep)]
-    action_mags = []
-    rewards = []
-    penned_at = [None] * n_sheep
-    step = 0
-
-    while not done:
-        action, _ = model.predict(obs, deterministic=True)
-        obs, reward, dones, infos = vn.step(action)
-        done = dones[0]
-        step += 1
-
-        dog_xs.append(float(inner.dog_pos[0]))
-        dog_ys.append(float(inner.dog_pos[1]))
-        com, radius, _ = inner._flock_stats()
-        radii.append(radius)
-        rewards.append(float(reward[0]))
-        action_mags.append(float(np.linalg.norm(action[0])))
-
-        for i in range(n_sheep):
-            sheep_xs[i].append(float(inner.sheep_pos[i][0]))
-            sheep_ys[i].append(float(inner.sheep_pos[i][1]))
-            pen_dists[i].append(
-                float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
-            if inner.penned[i] and penned_at[i] is None:
-                penned_at[i] = step
-
-    n_penned = infos[0].get("n_penned", 0)
-    vn.close()
-
-    return dict(
-        dog_xs=dog_xs, dog_ys=dog_ys,
-        sheep_xs=sheep_xs, sheep_ys=sheep_ys,
-        radii=radii, pen_dists=pen_dists,
-        action_mags=action_mags, rewards=rewards,
-        penned_at=penned_at,
-        n_penned=n_penned, n_sheep=n_sheep,
-        success=n_penned == n_sheep, steps=step,
-    )
-
-
-def plot_trajectory(hist, out_path):
-    fig, ax = plt.subplots(figsize=(7, 7))
-    _draw_field(ax)
-    for i in range(hist["n_sheep"]):
-        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
-        xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
-        _faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
-        ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
-        end = hist["penned_at"][i] if hist["penned_at"][i] is not None else -1
-        ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
-    _faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0,
-                label="dog")
-    ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR,
-            ms=10, zorder=5)
-    ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR,
-            ms=10, zorder=5)
-    result = ("SUCCESS" if hist["success"]
-              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
-    ax.set_title(f"n={hist['n_sheep']}  {result}  {hist['steps']} steps",
-                 fontsize=12)
-    ax.legend(loc="upper left", fontsize=8)
-    plt.tight_layout()
-    fig.savefig(out_path, dpi=120)
-    plt.close(fig)
-
-
-def plot_timeseries(hist, out_path):
-    t = np.arange(hist["steps"])
-    fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
-
-    axes[0].plot(t, hist["radii"], color="steelblue")
-    axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact (5m)")
-    axes[0].set_ylabel("flock radius (m)")
-    axes[0].legend(fontsize=8)
-    axes[0].set_title("Flock radius")
-
-    for i in range(hist["n_sheep"]):
-        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
-        axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1,
-                     label=f"sheep {i+1}")
-        if hist["penned_at"][i] is not None:
-            axes[1].axvline(hist["penned_at"][i], color=c, ls=":", lw=1)
-    axes[1].set_ylabel("dist to pen (m)")
-    axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
-    axes[1].set_title("Per-sheep distance to pen")
-
-    axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
-    axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
-    axes[2].set_ylabel("action ||(vx,vy)||")
-    axes[2].set_ylim(0, 1.5)
-    axes[2].set_title("Dog action magnitude")
-    axes[2].legend(fontsize=8)
-
-    axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
-    axes[3].axhline(0, color="black", lw=0.5)
-    axes[3].set_ylabel("reward")
-    axes[3].set_xlabel("step")
-    axes[3].set_title("Reward per step")
-
-    result = ("SUCCESS" if hist["success"]
-              else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
-    fig.suptitle(f"n_sheep={hist['n_sheep']}  {result}  {hist['steps']} steps",
-                 fontsize=13)
-    plt.tight_layout()
-    fig.savefig(out_path, dpi=120)
-    plt.close(fig)
-
-
-def plot_success_rate(stage_results, out_path):
-    fig, ax = plt.subplots(figsize=(8, 4))
-    ns = [r["n_sheep"] for r in stage_results]
-    srs = [r["sr"] * 100 for r in stage_results]
-    bars = ax.bar(ns, srs, color="steelblue", edgecolor="white")
-    ax.set_xlabel("Sheep count")
-    ax.set_ylabel("Success rate (%)")
-    ax.set_ylim(0, 105)
-    ax.axhline(90, color="orange", ls="--", lw=1, label="90% target")
-    for bar, sr in zip(bars, srs):
-        ax.text(bar.get_x() + bar.get_width() / 2,
-                bar.get_height() + 1, f"{sr:.0f}%",
-                ha="center", fontsize=9)
-    ax.legend()
-    ax.set_title("Evaluation success rate per sheep count")
-    plt.tight_layout()
-    fig.savefig(out_path, dpi=120)
-    plt.close(fig)
-

 # ── CLI ──────────────────────────────────────────────────────────────────────

@@ -400,6 +221,11 @@ def parse_args():
    p.add_argument("--max-steps", type=int, default=2500)
    p.add_argument("--eval-episodes", type=int, default=30)
    p.add_argument("--run-dir", type=str, default=None)
+    p.add_argument("--no-gif", action="store_true",
+                   help="Skip per-stage GIF rendering (PNGs still produced).")
+    p.add_argument("--gif-fps", type=int, default=20)
+    p.add_argument("--gif-skip", type=int, default=3,
+                   help="Keep every Nth frame (smaller GIF; default 3).")
    return p.parse_args()


@@ -441,13 +267,15 @@ def main():
    vn = VecNormalize(train_env, norm_obs=True, norm_reward=True,
                      clip_obs=10.0)

-    # Model
+    # Model — force CPU (PPO with MLP runs faster on CPU than GPU; SB3 warns
+    # about this otherwise).
    model = PPO(
        "MlpPolicy", vn,
        learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
        gamma=0.995, gae_lambda=0.95, clip_range=0.2,
        ent_coef=cfg.get("ent_coef", 0.02), vf_coef=0.5, max_grad_norm=0.5,
        policy_kwargs=dict(net_arch=[256, 256]),
+        device="cpu",
        verbose=0,
    )

@@ -488,7 +316,7 @@ def main():
                print(f"  reward/step: " + "  ".join(
                    f"{k}={v:+.4f}" for k, v in rps.items()))

-            # Episode visualization
+            # Episode visualisation: trajectory + timeseries + animated GIF
            hist = run_and_record(model, vn, n, args.max_steps, rcfg,
                                  seed=1000 + n)
            tag = "success" if hist["success"] else "fail"
@@ -498,6 +326,11 @@ def main():
            plot_timeseries(
                hist,
                os.path.join(eval_dir, f"ts_{n}s_{tag}.png"))
+            if not args.no_gif:
+                save_episode_gif(
+                    hist,
+                    os.path.join(eval_dir, f"ep_{n}s_{tag}.gif"),
+                    fps=args.gif_fps, skip=args.gif_skip)

            r["n_sheep"] = n
            stage_results.append(r)