Approach refinement

This commit is contained in:
Johnny Fernandes
2026-04-26 12:59:04 +01:00
parent acf0810425
commit a44ddb7b08
17 changed files with 10593 additions and 194 deletions
+1 -1
View File
@@ -5,7 +5,7 @@
"W_COMPLETE": 100.0,
"W_STEP_COST": 0.02,
"W_COMPACT": 0.0,
"W_WALL_TOUCH": 0.01,
"W_WALL_TOUCH": 0.0,
"WALL_TOUCH_BUFFER": 0.4,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": true,
+21 -188
View File
@@ -25,12 +25,7 @@ import os
import time
from copy import deepcopy
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
from matplotlib.collections import LineCollection
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import (
@@ -40,15 +35,13 @@ from stable_baselines3.common.vec_env import (
)
from herding_env import HerdingEnv
# ── Colours ──────────────────────────────────────────────────────────────────
SHEEP_COLORS = [
"#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
"#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
]
DOG_COLOR = "#4e342e"
from viz import (
run_and_record,
plot_trajectory,
plot_timeseries,
plot_success_rate,
save_episode_gif,
)
# ── Callbacks ────────────────────────────────────────────────────────────────
@@ -198,178 +191,6 @@ def evaluate(model, vn_template, n_sheep, n_episodes, max_steps,
return result
# ── Visualization helpers ────────────────────────────────────────────────────
def _draw_field(ax):
ax.set_xlim(-16, 16)
ax.set_ylim(-16, 16)
ax.set_aspect("equal")
ax.set_facecolor("#dcedc8")
ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
fill=False, edgecolor="#795548", lw=2))
ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
facecolor="#ffe082", edgecolor="#795548", lw=2))
ax.text(11.5, -11.5, "pen", ha="center", va="center",
fontsize=8, color="#795548")
def _faded_path(ax, xs, ys, color, lw=1.5, label=None):
n = len(xs)
if n < 2:
return
points = np.array([xs, ys]).T.reshape(-1, 1, 2)
segs = np.concatenate([points[:-1], points[1:]], axis=1)
alphas = np.linspace(0.15, 1.0, len(segs))
colors = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
ax.add_collection(LineCollection(segs, colors=colors, linewidth=lw))
if label:
ax.plot([], [], color=color, lw=lw, label=label)
def run_and_record(model, vn_template, n_sheep, max_steps,
reward_cfg=None, seed=42):
"""Run one deterministic episode and return full history."""
raw = DummyVecEnv([make_env(n_sheep, seed, max_steps, reward_cfg)])
vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
vn.obs_rms = deepcopy(vn_template.obs_rms)
vn.ret_rms = deepcopy(vn_template.ret_rms)
obs = vn.reset()
inner = vn.envs[0]
done = False
dog_xs, dog_ys = [], []
sheep_xs = [[] for _ in range(n_sheep)]
sheep_ys = [[] for _ in range(n_sheep)]
radii = []
pen_dists = [[] for _ in range(n_sheep)]
action_mags = []
rewards = []
penned_at = [None] * n_sheep
step = 0
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, reward, dones, infos = vn.step(action)
done = dones[0]
step += 1
dog_xs.append(float(inner.dog_pos[0]))
dog_ys.append(float(inner.dog_pos[1]))
com, radius, _ = inner._flock_stats()
radii.append(radius)
rewards.append(float(reward[0]))
action_mags.append(float(np.linalg.norm(action[0])))
for i in range(n_sheep):
sheep_xs[i].append(float(inner.sheep_pos[i][0]))
sheep_ys[i].append(float(inner.sheep_pos[i][1]))
pen_dists[i].append(
float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
if inner.penned[i] and penned_at[i] is None:
penned_at[i] = step
n_penned = infos[0].get("n_penned", 0)
vn.close()
return dict(
dog_xs=dog_xs, dog_ys=dog_ys,
sheep_xs=sheep_xs, sheep_ys=sheep_ys,
radii=radii, pen_dists=pen_dists,
action_mags=action_mags, rewards=rewards,
penned_at=penned_at,
n_penned=n_penned, n_sheep=n_sheep,
success=n_penned == n_sheep, steps=step,
)
def plot_trajectory(hist, out_path):
fig, ax = plt.subplots(figsize=(7, 7))
_draw_field(ax)
for i in range(hist["n_sheep"]):
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
_faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
end = hist["penned_at"][i] if hist["penned_at"][i] is not None else -1
ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
_faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0,
label="dog")
ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR,
ms=10, zorder=5)
ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR,
ms=10, zorder=5)
result = ("SUCCESS" if hist["success"]
else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
ax.set_title(f"n={hist['n_sheep']} {result} {hist['steps']} steps",
fontsize=12)
ax.legend(loc="upper left", fontsize=8)
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_timeseries(hist, out_path):
t = np.arange(hist["steps"])
fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
axes[0].plot(t, hist["radii"], color="steelblue")
axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact (5m)")
axes[0].set_ylabel("flock radius (m)")
axes[0].legend(fontsize=8)
axes[0].set_title("Flock radius")
for i in range(hist["n_sheep"]):
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1,
label=f"sheep {i+1}")
if hist["penned_at"][i] is not None:
axes[1].axvline(hist["penned_at"][i], color=c, ls=":", lw=1)
axes[1].set_ylabel("dist to pen (m)")
axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
axes[1].set_title("Per-sheep distance to pen")
axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
axes[2].set_ylabel("action ||(vx,vy)||")
axes[2].set_ylim(0, 1.5)
axes[2].set_title("Dog action magnitude")
axes[2].legend(fontsize=8)
axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
axes[3].axhline(0, color="black", lw=0.5)
axes[3].set_ylabel("reward")
axes[3].set_xlabel("step")
axes[3].set_title("Reward per step")
result = ("SUCCESS" if hist["success"]
else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
fig.suptitle(f"n_sheep={hist['n_sheep']} {result} {hist['steps']} steps",
fontsize=13)
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_success_rate(stage_results, out_path):
fig, ax = plt.subplots(figsize=(8, 4))
ns = [r["n_sheep"] for r in stage_results]
srs = [r["sr"] * 100 for r in stage_results]
bars = ax.bar(ns, srs, color="steelblue", edgecolor="white")
ax.set_xlabel("Sheep count")
ax.set_ylabel("Success rate (%)")
ax.set_ylim(0, 105)
ax.axhline(90, color="orange", ls="--", lw=1, label="90% target")
for bar, sr in zip(bars, srs):
ax.text(bar.get_x() + bar.get_width() / 2,
bar.get_height() + 1, f"{sr:.0f}%",
ha="center", fontsize=9)
ax.legend()
ax.set_title("Evaluation success rate per sheep count")
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
# ── CLI ──────────────────────────────────────────────────────────────────────
@@ -400,6 +221,11 @@ def parse_args():
p.add_argument("--max-steps", type=int, default=2500)
p.add_argument("--eval-episodes", type=int, default=30)
p.add_argument("--run-dir", type=str, default=None)
p.add_argument("--no-gif", action="store_true",
help="Skip per-stage GIF rendering (PNGs still produced).")
p.add_argument("--gif-fps", type=int, default=20)
p.add_argument("--gif-skip", type=int, default=3,
help="Keep every Nth frame (smaller GIF; default 3).")
return p.parse_args()
@@ -441,13 +267,15 @@ def main():
vn = VecNormalize(train_env, norm_obs=True, norm_reward=True,
clip_obs=10.0)
# Model
# Model — force CPU (PPO with MLP runs faster on CPU than GPU; SB3 warns
# about this otherwise).
model = PPO(
"MlpPolicy", vn,
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
gamma=0.995, gae_lambda=0.95, clip_range=0.2,
ent_coef=cfg.get("ent_coef", 0.02), vf_coef=0.5, max_grad_norm=0.5,
policy_kwargs=dict(net_arch=[256, 256]),
device="cpu",
verbose=0,
)
@@ -488,7 +316,7 @@ def main():
print(f" reward/step: " + " ".join(
f"{k}={v:+.4f}" for k, v in rps.items()))
# Episode visualization
# Episode visualisation: trajectory + timeseries + animated GIF
hist = run_and_record(model, vn, n, args.max_steps, rcfg,
seed=1000 + n)
tag = "success" if hist["success"] else "fail"
@@ -498,6 +326,11 @@ def main():
plot_timeseries(
hist,
os.path.join(eval_dir, f"ts_{n}s_{tag}.png"))
if not args.no_gif:
save_episode_gif(
hist,
os.path.join(eval_dir, f"ep_{n}s_{tag}.gif"),
fps=args.gif_fps, skip=args.gif_skip)
r["n_sheep"] = n
stage_results.append(r)
+341
View File
@@ -0,0 +1,341 @@
"""
All visualization for the herding policy: trajectory plots, timeseries plots,
success-rate bar chart, and animated GIFs.
Used both by train.py (auto-rendered after each curriculum stage) and as a CLI
to render a fresh episode against a saved model.
CLI usage:
python viz.py --run-dir runs/v1 --n-sheep 5
python viz.py --run-dir runs/v1 --n-sheep 10 --no-gif
python viz.py --model runs/v1/final_model.zip --vecnorm runs/v1/vecnorm.pkl \\
--n-sheep 3 --out-dir vis_v1_3sheep
"""
import argparse
import os
import json
from copy import deepcopy
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.animation as animation
from matplotlib.collections import LineCollection
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
# ── Palette ──────────────────────────────────────────────────────────────────
SHEEP_COLORS = [
"#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00",
"#a65628", "#f781bf", "#999999", "#66c2a5", "#fc8d62",
]
DOG_COLOR = "#4e342e"
# ── Common drawing primitives ────────────────────────────────────────────────
def draw_field(ax):
ax.set_xlim(-16, 16)
ax.set_ylim(-16, 16)
ax.set_aspect("equal")
ax.set_facecolor("#dcedc8")
ax.add_patch(mpatches.Rectangle(
(-15, -15), 30, 30, fill=False, edgecolor="#795548", lw=2))
ax.add_patch(mpatches.Rectangle(
(10, -15), 3, 7, facecolor="#ffe082", edgecolor="#795548", lw=2))
ax.text(11.5, -11.5, "pen", ha="center", va="center",
fontsize=8, color="#795548")
def faded_path(ax, xs, ys, color, lw=1.5, label=None):
n = len(xs)
if n < 2:
return
points = np.array([xs, ys]).T.reshape(-1, 1, 2)
segs = np.concatenate([points[:-1], points[1:]], axis=1)
alphas = np.linspace(0.15, 1.0, len(segs))
colors = [(*matplotlib.colors.to_rgb(color), a) for a in alphas]
ax.add_collection(LineCollection(segs, colors=colors, linewidth=lw))
if label:
ax.plot([], [], color=color, lw=lw, label=label)
# ── Episode rollout ──────────────────────────────────────────────────────────
def make_eval_env(n_sheep, seed, max_steps, reward_cfg=None):
def _init():
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
reward_cfg=reward_cfg)
env.reset(seed=seed)
return env
return _init
def run_and_record(model, vn_template, n_sheep, max_steps,
reward_cfg=None, seed=42):
"""Run one deterministic episode and return full trajectory history."""
raw = DummyVecEnv([make_eval_env(n_sheep, seed, max_steps, reward_cfg)])
vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
vn.obs_rms = deepcopy(vn_template.obs_rms)
vn.ret_rms = deepcopy(vn_template.ret_rms)
obs = vn.reset()
inner = vn.envs[0]
done = False
dog_xs, dog_ys = [], []
sheep_xs = [[] for _ in range(n_sheep)]
sheep_ys = [[] for _ in range(n_sheep)]
sheep_penned = [[] for _ in range(n_sheep)]
radii = []
pen_dists = [[] for _ in range(n_sheep)]
action_mags = []
rewards = []
penned_at = [None] * n_sheep
step = 0
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, reward, dones, infos = vn.step(action)
done = dones[0]
step += 1
dog_xs.append(float(inner.dog_pos[0]))
dog_ys.append(float(inner.dog_pos[1]))
com, radius, _ = inner._flock_stats()
radii.append(radius)
rewards.append(float(reward[0]))
action_mags.append(float(np.linalg.norm(action[0])))
for i in range(n_sheep):
sheep_xs[i].append(float(inner.sheep_pos[i][0]))
sheep_ys[i].append(float(inner.sheep_pos[i][1]))
sheep_penned[i].append(bool(inner.penned[i]))
pen_dists[i].append(
float(np.linalg.norm(inner.sheep_pos[i] - inner.PEN_CENTER)))
if inner.penned[i] and penned_at[i] is None:
penned_at[i] = step
n_penned = infos[0].get("n_penned", 0)
vn.close()
return dict(
dog_xs=dog_xs, dog_ys=dog_ys,
sheep_xs=sheep_xs, sheep_ys=sheep_ys,
sheep_penned=sheep_penned,
radii=radii, pen_dists=pen_dists,
action_mags=action_mags, rewards=rewards,
penned_at=penned_at,
n_penned=n_penned, n_sheep=n_sheep,
success=n_penned == n_sheep, steps=step,
)
# ── Static plots ─────────────────────────────────────────────────────────────
def plot_trajectory(hist, out_path):
fig, ax = plt.subplots(figsize=(7, 7))
draw_field(ax)
for i in range(hist["n_sheep"]):
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
xs, ys = hist["sheep_xs"][i], hist["sheep_ys"][i]
faded_path(ax, xs, ys, c, lw=1.2, label=f"sheep {i+1}")
ax.plot(xs[0], ys[0], "o", color=c, ms=7, zorder=4)
end = hist["penned_at"][i] if hist["penned_at"][i] is not None else -1
ax.plot(xs[end], ys[end], "*", color=c, ms=11, zorder=5)
faded_path(ax, hist["dog_xs"], hist["dog_ys"], DOG_COLOR, lw=2.0,
label="dog")
ax.plot(hist["dog_xs"][0], hist["dog_ys"][0], "s", color=DOG_COLOR,
ms=10, zorder=5)
ax.plot(hist["dog_xs"][-1], hist["dog_ys"][-1], "D", color=DOG_COLOR,
ms=10, zorder=5)
result = ("SUCCESS" if hist["success"]
else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
ax.set_title(f"n={hist['n_sheep']} {result} {hist['steps']} steps",
fontsize=12)
ax.legend(loc="upper left", fontsize=8)
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_timeseries(hist, out_path):
t = np.arange(hist["steps"])
fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
axes[0].plot(t, hist["radii"], color="steelblue")
axes[0].axhline(5.0, color="orange", ls="--", lw=1, label="compact (5m)")
axes[0].set_ylabel("flock radius (m)")
axes[0].legend(fontsize=8)
axes[0].set_title("Flock radius")
for i in range(hist["n_sheep"]):
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
axes[1].plot(t, hist["pen_dists"][i], color=c, lw=1,
label=f"sheep {i+1}")
if hist["penned_at"][i] is not None:
axes[1].axvline(hist["penned_at"][i], color=c, ls=":", lw=1)
axes[1].set_ylabel("dist to pen (m)")
axes[1].legend(fontsize=7, ncol=min(hist["n_sheep"], 5))
axes[1].set_title("Per-sheep distance to pen")
axes[2].plot(t, hist["action_mags"], color="tomato", lw=1)
axes[2].axhline(1.0, color="gray", ls="--", lw=1, label="max")
axes[2].set_ylabel("action ||(vx,vy)||")
axes[2].set_ylim(0, 1.5)
axes[2].set_title("Dog action magnitude")
axes[2].legend(fontsize=8)
axes[3].plot(t, hist["rewards"], color="purple", lw=1, alpha=0.7)
axes[3].axhline(0, color="black", lw=0.5)
axes[3].set_ylabel("reward")
axes[3].set_xlabel("step")
axes[3].set_title("Reward per step")
result = ("SUCCESS" if hist["success"]
else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})")
fig.suptitle(f"n_sheep={hist['n_sheep']} {result} {hist['steps']} steps",
fontsize=13)
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_success_rate(stage_results, out_path):
fig, ax = plt.subplots(figsize=(8, 4))
ns = [r["n_sheep"] for r in stage_results]
srs = [r["sr"] * 100 for r in stage_results]
bars = ax.bar(ns, srs, color="steelblue", edgecolor="white")
ax.set_xlabel("Sheep count")
ax.set_ylabel("Success rate (%)")
ax.set_ylim(0, 105)
ax.axhline(90, color="orange", ls="--", lw=1, label="90% target")
for bar, sr in zip(bars, srs):
ax.text(bar.get_x() + bar.get_width() / 2,
bar.get_height() + 1, f"{sr:.0f}%",
ha="center", fontsize=9)
ax.legend()
ax.set_title("Evaluation success rate per sheep count")
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
# ── Animated GIF ─────────────────────────────────────────────────────────────
def save_episode_gif(hist, out_path, fps=20, skip=3):
"""Render hist as an animated GIF. `skip` keeps every Nth frame (smaller file)."""
n_sheep = hist["n_sheep"]
frames = list(range(0, hist["steps"], max(1, skip)))
if frames[-1] != hist["steps"] - 1:
frames.append(hist["steps"] - 1)
fig, ax = plt.subplots(figsize=(6, 6))
draw_field(ax)
title = ax.text(0, 16.5, "", ha="center", fontsize=11)
dog_marker, = ax.plot([], [], "s", color=DOG_COLOR, ms=12,
markeredgecolor="black", markeredgewidth=1.5,
zorder=5)
sheep_markers = []
for i in range(n_sheep):
c = SHEEP_COLORS[i % len(SHEEP_COLORS)]
m, = ax.plot([], [], "o", color=c, ms=10,
markeredgecolor="#333", markeredgewidth=1, zorder=4)
sheep_markers.append(m)
dog_trail, = ax.plot([], [], color=DOG_COLOR, lw=1.0, alpha=0.5)
def update(k):
title.set_text(
f"n={n_sheep} step {k+1}/{hist['steps']} "
f"penned {sum(hist['sheep_penned'][i][k] for i in range(n_sheep))}/{n_sheep}")
dog_marker.set_data([hist["dog_xs"][k]], [hist["dog_ys"][k]])
dog_trail.set_data(hist["dog_xs"][:k+1], hist["dog_ys"][:k+1])
for i, m in enumerate(sheep_markers):
m.set_data([hist["sheep_xs"][i][k]], [hist["sheep_ys"][i][k]])
penned = hist["sheep_penned"][i][k]
m.set_color("deeppink" if penned else SHEEP_COLORS[i % len(SHEEP_COLORS)])
return [title, dog_marker, dog_trail, *sheep_markers]
anim = animation.FuncAnimation(
fig, update, frames=frames, interval=1000 / fps, blit=False)
anim.save(out_path, writer=animation.PillowWriter(fps=fps), dpi=80)
plt.close(fig)
# ── CLI ──────────────────────────────────────────────────────────────────────
def _resolve_paths(args):
if args.run_dir:
model_path = os.path.join(args.run_dir, "final_model.zip")
vn_path = os.path.join(args.run_dir, "vecnorm.pkl")
cfg_path = os.path.join(args.run_dir, "config.json")
else:
model_path = args.model
vn_path = args.vecnorm
cfg_path = args.config
return model_path, vn_path, cfg_path
def main():
p = argparse.ArgumentParser(
description="Render trajectory + timeseries + GIF for a saved policy.")
p.add_argument("--run-dir", type=str, default=None,
help="Run directory containing final_model.zip + vecnorm.pkl + config.json")
p.add_argument("--model", type=str, default=None)
p.add_argument("--vecnorm", type=str, default=None)
p.add_argument("--config", type=str, default=None)
p.add_argument("--n-sheep", type=int, default=3)
p.add_argument("--seed", type=int, default=42)
p.add_argument("--max-steps", type=int, default=2500)
p.add_argument("--out-dir", type=str, default=None)
p.add_argument("--no-gif", action="store_true",
help="Skip the animated GIF (PNG-only is faster).")
p.add_argument("--gif-fps", type=int, default=20)
p.add_argument("--gif-skip", type=int, default=3)
args = p.parse_args()
model_path, vn_path, cfg_path = _resolve_paths(args)
if not (model_path and vn_path):
p.error("either --run-dir or both --model and --vecnorm are required")
rcfg = None
if cfg_path and os.path.exists(cfg_path):
with open(cfg_path) as f:
cfg = json.load(f)
rcfg = {k: v for k, v in cfg.items() if hasattr(HerdingEnv, k)}
out_dir = args.out_dir or os.path.join(
os.path.dirname(os.path.abspath(model_path)),
f"vis_{args.n_sheep}s")
os.makedirs(out_dir, exist_ok=True)
print(f"Loading model: {model_path}")
print(f"Loading vecnorm: {vn_path}")
model = PPO.load(model_path, device="cpu")
raw = DummyVecEnv([make_eval_env(args.n_sheep, args.seed, args.max_steps, rcfg)])
vn = VecNormalize.load(vn_path, raw)
print(f"Rolling out n_sheep={args.n_sheep} (seed={args.seed})...")
hist = run_and_record(model, vn, args.n_sheep, args.max_steps,
reward_cfg=rcfg, seed=args.seed)
result = "SUCCESS" if hist["success"] else f"FAIL ({hist['n_penned']}/{hist['n_sheep']})"
print(f" {result} in {hist['steps']} steps")
plot_trajectory(hist, os.path.join(out_dir, "trajectory.png"))
plot_timeseries(hist, os.path.join(out_dir, "timeseries.png"))
print(f" saved trajectory.png + timeseries.png to {out_dir}/")
if not args.no_gif:
gif_path = os.path.join(out_dir, "episode.gif")
print(f" rendering GIF (fps={args.gif_fps}, skip={args.gif_skip})...")
save_episode_gif(hist, gif_path, fps=args.gif_fps, skip=args.gif_skip)
print(f" saved {gif_path}")
if __name__ == "__main__":
main()