diff --git a/training/replay_config.py b/training/replay_config.py
new file mode 100644
index 0000000..dacfef0
--- /dev/null
+++ b/training/replay_config.py
@@ -0,0 +1,118 @@
+"""
+Replay a reward config from the sweep with a longer training budget.
+
+Tells you whether a promising sweep config was bottlenecked by training time
+vs. structurally limited. If sr2/sr3 climb past their sweep numbers given more
+budget, the issue was budget; if they plateau, the policy/obs needs work.
+
+Usage
+-----
+    python replay_config.py --config runs/sweep_<ts>/best.json
+    python replay_config.py --config runs/sweep_<ts>/trial_007/config.json \
+        --max-sheep 4 --steps-per-stage 1500000
+
+Argument summary:
+    --config           JSON file with the reward config (sweep best.json works)
+    --max-sheep        Final curriculum stage (default 3)
+    --steps-per-stage  Env steps per curriculum stage (default 1.5M)
+    --n-envs           Parallel envs (default 8)
+    --eval-episodes    Per-stage eval episodes (default 30)
+    --run-dir          Output directory (default runs/replay_<ts>/)
+"""
+import argparse
+import json
+import os
+import time
+from copy import deepcopy
+
+import numpy as np
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv, VecNormalize
+
+from herding_env import HerdingEnv
+from sweep_reward import ProgressCallback, reward_cfg, evaluate, make_env
+
+
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--config", type=str, required=True,
+                   help="Reward config JSON (sweep best.json or trial config.json)")
+    p.add_argument("--max-sheep", type=int, default=3)
+    p.add_argument("--steps-per-stage", type=int, default=1_500_000)
+    p.add_argument("--n-envs", type=int, default=8)
+    p.add_argument("--max-steps", type=int, default=1500)
+    p.add_argument("--eval-episodes", type=int, default=30)
+    p.add_argument("--run-dir", type=str, default=None)
+    args = p.parse_args()
+
+    with open(args.config) as f:
+        raw = json.load(f)
+    cfg = raw["config"] if "config" in raw and isinstance(raw["config"], dict) else raw
+    rcfg = reward_cfg(cfg)
+    print(f"Config: {cfg}")
+
+    run_dir = args.run_dir or os.path.join(
+        "runs", "replay_" + time.strftime("%Y%m%d_%H%M%S")
+    )
+    os.makedirs(run_dir, exist_ok=True)
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(cfg, f, indent=2)
+    print(f"Run dir: {run_dir}")
+    print(f"Curriculum: 1 → {args.max_sheep} sheep, "
+          f"{args.steps_per_stage:,} steps/stage")
+
+    train_env = SubprocVecEnv([
+        make_env(1, seed=i, max_steps=args.max_steps, rcfg=rcfg)
+        for i in range(args.n_envs)
+    ])
+    vn = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.0)
+
+    model = PPO(
+        "MlpPolicy", vn,
+        learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
+        gamma=0.995, gae_lambda=0.95, clip_range=0.2,
+        ent_coef=cfg["ent_coef"], vf_coef=0.5, max_grad_norm=0.5,
+        policy_kwargs=dict(net_arch=[256, 256]),
+        verbose=0,
+    )
+
+    stage_results = []
+    t0 = time.time()
+    try:
+        for n in range(1, args.max_sheep + 1):
+            if n > 1:
+                vn.env_method("set_n_sheep", n)
+            print(f"\n[Stage n_sheep={n}] training {args.steps_per_stage:,} steps")
+            model.learn(
+                total_timesteps=args.steps_per_stage,
+                reset_num_timesteps=(n == 1),
+                callback=ProgressCallback(0, f"{n} sheep", freq=100_000),
+            )
+            print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
+            r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
+            print(f"[Stage n_sheep={n}] sr={r['sr']*100:.0f}%  "
+                  f"mean_len={r['mean_len']:.0f}  mean_min_pen={r['mean_min_pen']:.1f}m  "
+                  f"mean_act={r['mean_act']:.2f}")
+            stage_results.append({"n_sheep": n, **r})
+
+        model.save(os.path.join(run_dir, "final_model"))
+        vn.save(os.path.join(run_dir, "vecnorm.pkl"))
+        with open(os.path.join(run_dir, "stage_results.json"), "w") as f:
+            json.dump(stage_results, f, indent=2)
+    finally:
+        try: vn.close()
+        except Exception: pass
+
+    print("\n" + "=" * 60)
+    print("  REPLAY SUMMARY")
+    print("=" * 60)
+    for r in stage_results:
+        print(f"  n_sheep={r['n_sheep']}  sr={r['sr']*100:>3.0f}%  "
+              f"len={r['mean_len']:>5.0f}  min_pen={r['mean_min_pen']:>5.1f}m  "
+              f"act={r['mean_act']:.2f}")
+    print(f"\n  Total time: {(time.time()-t0)/60:.1f} min")
+    print(f"  Artefacts:  {run_dir}/")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/training/sweep_reward.py b/training/sweep_reward.py
index 0e0b819..02380fe 100644
--- a/training/sweep_reward.py
+++ b/training/sweep_reward.py
@@ -155,8 +155,12 @@ def evaluate(model, vn_template, n_sheep, n_episodes, max_steps, rcfg):
     }
 
 
-def run_trial(trial_id: int, cfg: dict, log_path: str) -> dict:
+def run_trial(trial_id: int, cfg: dict, log_path: str, run_dir: str) -> dict:
     rcfg = reward_cfg(cfg)
+    trial_dir = os.path.join(run_dir, f"trial_{trial_id:03d}")
+    os.makedirs(trial_dir, exist_ok=True)
+    with open(os.path.join(trial_dir, "config.json"), "w") as f:
+        json.dump(cfg, f, indent=2)
 
     train_env = SubprocVecEnv([
         make_env(1, seed=trial_id * 100 + i, max_steps=MAX_STEPS, rcfg=rcfg)
@@ -186,6 +190,9 @@ def run_trial(trial_id: int, cfg: dict, log_path: str) -> dict:
         for n in EVAL_NSHEEP:
             print(f"           ... [trial {trial_id+1} | eval n={n}]", flush=True)
             per_sheep[n] = evaluate(model, vn, n, EVAL_EPISODES, MAX_STEPS, rcfg)
+
+        model.save(os.path.join(trial_dir, "model"))
+        vn.save(os.path.join(trial_dir, "vecnorm.pkl"))
     finally:
         try: vn.close()
         except Exception: pass
@@ -250,7 +257,7 @@ def main():
         t0 = time.time()
         print(f"[Trial {trial_id+1:>3}] {cfg}")
         try:
-            result = run_trial(trial_id, cfg, log_path)
+            result = run_trial(trial_id, cfg, log_path, run_dir)
             result["elapsed_s"] = time.time() - t0
             sr = result["sr"]
             print(f"           → score={result['score']:.3f}  "