Sheep training flock of 10 fix?

2026-04-24 16:46:02 +01:00
parent 3574d57ba2
commit e0426bf320
1 changed files with 316 additions and 0 deletions
@@ -0,0 +1,316 @@
+"""
+Single-episode visualization for the herding policy.
+
+Outputs (all saved to --out-dir):
+  trajectory.png  — full field view: dog path + every sheep path
+  timeseries.png  — radius, per-sheep pen distance, action magnitude, reward
+  episode.gif     — animated replay (slow enough to read)
+
+Run with no model to watch a RANDOM policy (useful baseline):
+  python visualize.py --random --n-sheep 3 --out-dir vis_random/
+
+Usage:
+  python visualize.py \\
+      --model runs/ppo_consolidation/final_model.zip \\
+      --vecnorm runs/ppo_consolidation/vecnorm.pkl \\
+      --n-sheep 3 --out-dir vis_out/
+"""
+
+import argparse
+import os
+import math
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import matplotlib.animation as animation
+from matplotlib.collections import LineCollection
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
+from herding_env import HerdingEnv
+
+
+# ── colours ──────────────────────────────────────────────────────────────────
+SHEEP_COLORS = [
+    "#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
+    "#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
+]
+DOG_COLOR   = "#4e342e"
+PEN_COLOR   = "#ffe082"
+FIELD_COLOR = "#dcedc8"
+
+
+def make_env(n_sheep, max_steps, seed=42):
+    def _init():
+        env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps)
+        env.reset(seed=seed)
+        return env
+    return _init
+
+
+def run_episode(model, env, n_sheep, max_steps):
+    """Run one deterministic episode; return recorded history."""
+    obs      = env.reset()
+    inner    = env.envs[0]
+    done     = False
+
+    dog_xs, dog_ys   = [], []
+    sheep_xs         = [[] for _ in range(n_sheep)]
+    sheep_ys         = [[] for _ in range(n_sheep)]
+    radii            = []
+    pen_dists        = [[] for _ in range(n_sheep)]
+    action_mags      = []
+    rewards          = []
+    penned_at        = [None] * n_sheep   # step when each sheep was penned
+
+    step = 0
+    while not done:
+        if model is None:
+            action = env.action_space.sample()[np.newaxis]
+        else:
+            action, _ = model.predict(obs, deterministic=True)
+
+        obs, reward, dones, infos = env.step(action)
+        done = dones[0]
+        step += 1
+
+        dx, dy = float(inner.dog_pos[0]), float(inner.dog_pos[1])
+        dog_xs.append(dx); dog_ys.append(dy)
+
+        com, radius, _ = inner._flock_stats()
+        radii.append(radius)
+        rewards.append(float(reward[0]))
+
+        act = action[0]
+        action_mags.append(float(np.linalg.norm(act)))
+
+        for i in range(n_sheep):
+            sx, sy = float(inner.sheep_pos[i][0]), float(inner.sheep_pos[i][1])
+            sheep_xs[i].append(sx)
+            sheep_ys[i].append(sy)
+            pen_dists[i].append(float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
+            if inner.penned[i] and penned_at[i] is None:
+                penned_at[i] = step
+
+    info = infos[0]
+    n_penned = info.get("n_penned", 0)
+    success  = n_penned == n_sheep
+
+    return dict(
+        dog_xs=dog_xs, dog_ys=dog_ys,
+        sheep_xs=sheep_xs, sheep_ys=sheep_ys,
+        radii=radii, pen_dists=pen_dists,
+        action_mags=action_mags, rewards=rewards,
+        penned_at=penned_at,
+        n_penned=n_penned, n_sheep=n_sheep,
+        success=success, steps=step,
+    )
+
+
+# ── plot helpers ─────────────────────────────────────────────────────────────
+
+def draw_field(ax):
+    ax.set_xlim(-16, 16); ax.set_ylim(-16, 16)
+    ax.set_aspect("equal"); ax.set_facecolor(FIELD_COLOR)
+    ax.add_patch(mpatches.Rectangle((-15,-15), 30, 30,
+                 fill=False, edgecolor="#795548", lw=2))
+    ax.add_patch(mpatches.Rectangle((10,-15), 3, 7,
+                 facecolor=PEN_COLOR, edgecolor="#795548", lw=2))
+    ax.text(11.5, -11.5, "pen", ha="center", va="center",
+            fontsize=8, color="#795548")
+
+
+def faded_path(ax, xs, ys, color, lw=1.5, label=None):
+    """Draw a path with alpha fading from start (transparent) to end (opaque)."""
+    n = len(xs)
+    if n < 2:
+        return
+    points  = np.array([xs, ys]).T.reshape(-1, 1, 2)
+    segs    = np.concatenate([points[:-1], points[1:]], axis=1)
+    alphas  = np.linspace(0.15, 1.0, len(segs))
+    colors  = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
+    lc = LineCollection(segs, colors=colors, linewidth=lw)
+    ax.add_collection(lc)
+    if label:
+        ax.plot([], [], color=color, lw=lw, label=label)
+
+
+# ── main plots ────────────────────────────────────────────────────────────────
+
+def plot_trajectory(hist, out_path):
+    fig, ax = plt.subplots(figsize=(7, 7))
+    draw_field(ax)
+
+    # Sheep paths
+    for i in range(hist["n_sheep"]):
+        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
+        xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
+        faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
+        ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
+        pa = hist["penned_at"][i]
+        end = pa if pa is not None else -1
+        ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
+
+    # Dog path
+    faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0, label="dog")
+    ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR, ms=10, zorder=5)
+    ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR, ms=10, zorder=5)
+
+    result = "SUCCESS" if hist["success"] else f"FAIL ({hist['n_penned']}/{hist['n_sheep']} penned)"
+    ax.set_title(f"Trajectory — {result} — {hist['steps']} steps", fontsize=12)
+    ax.legend(loc="upper left", fontsize=8)
+    plt.tight_layout()
+    fig.savefig(out_path, dpi=120)
+    plt.close(fig)
+    print(f"  saved {out_path}")
+
+
+def plot_timeseries(hist, out_path):
+    t      = np.arange(hist["steps"])
+    fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
+
+    # 1. Flock radius
+    axes[0].plot(t, hist["radii"], color="steelblue")
+    axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact threshold (5m)")
+    axes[0].set_ylabel("flock radius (m)")
+    axes[0].legend(fontsize=8)
+    axes[0].set_title("Flock radius — goal: get below 5m")
+
+    # 2. Per-sheep distance to pen
+    for i in range(hist["n_sheep"]):
+        c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
+        axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1, label=f"sheep {i+1}")
+        pa = hist["penned_at"][i]
+        if pa is not None:
+            axes[1].axvline(pa, color=c, ls=":", lw=1)
+    axes[1].set_ylabel("dist to pen (m)")
+    axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
+    axes[1].set_title("Per-sheep distance to pen — goal: all reach 0")
+
+    # 3. Action magnitude (how fast dog is moving)
+    axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
+    axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
+    axes[2].set_ylabel("action ||(vx,vy)||")
+    axes[2].set_ylim(0, 1.5)
+    axes[2].set_title("Dog action magnitude — 0=stopped, 1=full speed")
+    axes[2].legend(fontsize=8)
+
+    # 4. Reward per step
+    axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
+    axes[3].axhline(0, color="black", lw=0.5)
+    axes[3].set_ylabel("reward")
+    axes[3].set_xlabel("step")
+    axes[3].set_title("Reward per step")
+
+    result = "SUCCESS" if hist["success"] else f"FAIL ({hist['n_penned']}/{hist['n_sheep']} penned)"
+    fig.suptitle(f"n_sheep={hist['n_sheep']}  {result}  {hist['steps']} steps", fontsize=13)
+    plt.tight_layout()
+    fig.savefig(out_path, dpi=120)
+    plt.close(fig)
+    print(f"  saved {out_path}")
+
+
+def save_gif(hist, out_path, fps=15, skip=5):
+    """Animated replay, every `skip` steps."""
+    n    = hist["n_sheep"]
+    idxs = list(range(0, hist["steps"], skip))
+
+    fig, ax = plt.subplots(figsize=(6, 6))
+
+    def _frame(k):
+        ax.clear()
+        draw_field(ax)
+        t = idxs[k]
+
+        for i in range(n):
+            c  = SHEEP_COLORS[i % len(SHEEP_COLORS)]
+            s0 = max(0, t - 30)
+            ax.plot(hist["sheep_xs"][i][s0:t+1],
+                    hist["sheep_ys"][i][s0:t+1],
+                    color=c, lw=0.8, alpha=0.5)
+            color = "#ff69b4" if (hist["penned_at"][i] is not None
+                                   and t >= hist["penned_at"][i]) else c
+            ax.plot(hist["sheep_xs"][i][t], hist["sheep_ys"][i][t],
+                    "o", color=color, ms=10, zorder=4,
+                    markeredgecolor="#555", markeredgewidth=1)
+
+        s0 = max(0, t - 30)
+        ax.plot(hist["dog_xs"][s0:t+1], hist["dog_ys"][s0:t+1],
+                color=DOG_COLOR, lw=1.5, alpha=0.6)
+        ax.plot(hist["dog_xs"][t], hist["dog_ys"][t],
+                "s", color=DOG_COLOR, ms=13, zorder=5,
+                markeredgecolor="black", markeredgewidth=1.5)
+
+        r = hist["radii"][t]
+        ax.set_title(f"step {t}/{hist['steps']}  radius={r:.1f}m  "
+                     f"penned={hist['n_penned'] if t==hist['steps']-1 else '?'}/{n}",
+                     fontsize=10)
+
+    ani = animation.FuncAnimation(fig, _frame, frames=len(idxs), interval=1000//fps)
+    ani.save(out_path, writer="pillow", fps=fps)
+    plt.close(fig)
+    print(f"  saved {out_path}")
+
+
+# ── entry point ───────────────────────────────────────────────────────────────
+
+def parse_args():
+    p = argparse.ArgumentParser()
+    p.add_argument("--model",     default=None, help="Model .zip (omit for random policy)")
+    p.add_argument("--vecnorm",   default=None)
+    p.add_argument("--n-sheep",   type=int, default=3)
+    p.add_argument("--max-steps", type=int, default=2000)
+    p.add_argument("--seed",      type=int, default=42)
+    p.add_argument("--out-dir",   default="vis_out")
+    p.add_argument("--random",    action="store_true",
+                   help="Use random policy (baseline comparison)")
+    p.add_argument("--gif-fps",   type=int, default=15)
+    p.add_argument("--gif-skip",  type=int, default=5,
+                   help="Render every Nth step in the GIF")
+    p.add_argument("--no-gif",    action="store_true")
+    return p.parse_args()
+
+
+def main():
+    args = parse_args()
+    os.makedirs(args.out_dir, exist_ok=True)
+
+    raw = DummyVecEnv([make_env(args.n_sheep, args.max_steps, args.seed)])
+
+    if args.random or args.model is None:
+        print("Using RANDOM policy")
+        env   = raw
+        model = None
+    else:
+        if args.vecnorm:
+            env = VecNormalize.load(args.vecnorm, raw)
+            env.training    = False
+            env.norm_reward = False
+        else:
+            env = raw
+        model = PPO.load(args.model, env=env)
+        print(f"Loaded model: {args.model}")
+
+    print(f"Running episode  n_sheep={args.n_sheep}  seed={args.seed} ...")
+    hist = run_episode(model, env, args.n_sheep, args.max_steps)
+
+    result = "SUCCESS" if hist["success"] else f"FAIL ({hist['n_penned']}/{hist['n_sheep']} penned)"
+    print(f"Episode done: {result}  steps={hist['steps']}")
+    print(f"  min radius : {min(hist['radii']):.2f} m")
+    print(f"  mean reward: {np.mean(hist['rewards']):.4f}")
+    print(f"  mean action: {np.mean(hist['action_mags']):.3f}")
+
+    env.close()
+
+    plot_trajectory(hist, os.path.join(args.out_dir, "trajectory.png"))
+    plot_timeseries(hist, os.path.join(args.out_dir, "timeseries.png"))
+    if not args.no_gif:
+        save_gif(hist, os.path.join(args.out_dir, "episode.gif"),
+                 fps=args.gif_fps, skip=args.gif_skip)
+
+    print(f"\nAll outputs saved to {args.out_dir}/")
+
+
+if __name__ == "__main__":
+    main()