Cleanup and new approach

This commit is contained in:
Johnny Fernandes
2026-04-26 01:50:01 +01:00
parent b031473758
commit 61f8a7db15
139 changed files with 510 additions and 16170 deletions
+1
View File
@@ -1,5 +1,6 @@
# Stuff
#_example/
.claude/
# Python
__pycache__/
+14
View File
@@ -0,0 +1,14 @@
{
"W_PER_SHEEP": 2.0,
"W_ALIGN": 0.05,
"W_PEN_BONUS": 10.0,
"W_COMPLETE": 100.0,
"W_STEP_COST": 0.02,
"W_COMPACT": 0.0,
"W_WALL_TOUCH": 0.15,
"WALL_TOUCH_BUFFER": 0.8,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": true,
"ENTRY_AWARE": false,
"ent_coef": 0.02
}
Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

-223
View File
@@ -1,223 +0,0 @@
"""
Episode-level diagnostics for the herding policy.
Runs N episodes and for each one tracks:
- flock radius over time
- COM-to-pen distance over time
- dog position over time
- when (if ever) the flock first became compact
- failure mode classification
Then produces:
1. Console summary of failure modes
2. Per-episode time-series plots (radius + com_dist)
3. Optional rendered playback of the worst episodes
Usage
-----
python diagnose.py --model runs/ppo_consolidation/final_model.zip \
--vecnorm runs/ppo_consolidation/vecnorm.pkl \
--n-sheep 5 --episodes 20
# Watch the policy live (first episode rendered):
python diagnose.py ... --render
# Save plots to a directory instead of showing interactively:
python diagnose.py ... --plot-dir debug_plots/
"""
import argparse
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
# ── failure mode constants ────────────────────────────────────────────────────
COMPACT_RADIUS = 5.0 # must match DRIVE_GATE_RADIUS in herding_env.py
def classify_failure(ep_radius, ep_com_dist, n_penned, n_sheep, success):
if success:
return "SUCCESS"
if min(ep_radius) > COMPACT_RADIUS:
return "NEVER_COMPACT" # flock was always too scattered
first_compact = next(i for i, r in enumerate(ep_radius) if r <= COMPACT_RADIUS)
min_com_after = min(ep_com_dist[first_compact:])
pen_close = 3.0 # COM within 3m of pen counts as "got close"
if min_com_after > pen_close:
return "COMPACT_CANT_DRIVE" # compacted but never drove to pen
if n_penned == 0:
return "DROVE_NO_SHEEP" # got near pen, nothing went in
return f"PARTIAL_{n_penned}of{n_sheep}" # some in, not all
# ── main ─────────────────────────────────────────────────────────────────────
def parse_args():
p = argparse.ArgumentParser()
p.add_argument("--model", required=True)
p.add_argument("--vecnorm", default=None)
p.add_argument("--n-sheep", type=int, default=5)
p.add_argument("--episodes", type=int, default=20)
p.add_argument("--max-steps", type=int, default=4000)
p.add_argument("--render", action="store_true",
help="Show matplotlib animation of the first episode")
p.add_argument("--plot-dir", default=None,
help="Save time-series plots here (one per episode)")
p.add_argument("--seed", type=int, default=0)
return p.parse_args()
def make_env(n_sheep, max_steps, render_mode=None):
def _init():
return HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
render_mode=render_mode)
return _init
def main():
args = parse_args()
if args.plot_dir:
os.makedirs(args.plot_dir, exist_ok=True)
matplotlib.use("Agg")
render_mode = "human" if args.render else None
raw_env = DummyVecEnv([make_env(args.n_sheep, args.max_steps, render_mode)])
if args.vecnorm:
env = VecNormalize.load(args.vecnorm, raw_env)
env.training = False
env.norm_reward = False
else:
env = raw_env
model = PPO.load(args.model, env=env)
failure_counts = {}
all_ep_data = []
for ep in range(args.episodes):
obs = env.reset()
done = False
step = 0
ep_radius = []
ep_com_dist = []
ep_dog_x = []
ep_dog_y = []
ep_n_penned = []
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, _, dones, infos = env.step(action)
done = dones[0]
step += 1
inner = env.envs[0] if hasattr(env, "envs") else env.venv.envs[0]
com, radius, _ = inner._flock_stats()
com_dist = float(np.linalg.norm(com - inner.PEN_CENTER))
n_penned = int(inner.penned[:inner.n_sheep].sum())
ep_radius.append(radius)
ep_com_dist.append(com_dist)
ep_dog_x.append(float(inner.dog_pos[0]))
ep_dog_y.append(float(inner.dog_pos[1]))
ep_n_penned.append(n_penned)
info = infos[0]
n_pen = info.get("n_penned", 0)
n_sheep = info.get("n_sheep", args.n_sheep)
success = n_pen == n_sheep
mode = classify_failure(ep_radius, ep_com_dist, n_pen, n_sheep, success)
failure_counts[mode] = failure_counts.get(mode, 0) + 1
compact_step = next((i for i, r in enumerate(ep_radius)
if r <= COMPACT_RADIUS), None)
min_radius = min(ep_radius)
min_com_dist = min(ep_com_dist)
print(f" ep {ep+1:>3} steps={step:>5} penned={n_pen}/{n_sheep}"
f" min_r={min_radius:.1f}m"
f" min_com={min_com_dist:.1f}m"
f" compact@step={compact_step if compact_step is not None else 'NEVER'}"
f" [{mode}]")
all_ep_data.append(dict(
ep=ep, radius=ep_radius, com_dist=ep_com_dist,
dog_x=ep_dog_x, dog_y=ep_dog_y, n_penned=ep_n_penned,
steps=step, mode=mode, success=success,
))
# ── per-episode time-series plot ──────────────────────────────────
if args.plot_dir or (not args.render and ep < 5):
fig, axes = plt.subplots(2, 1, figsize=(10, 6), sharex=True)
t = np.arange(len(ep_radius))
axes[0].plot(t, ep_radius, color="steelblue", label="flock radius (m)")
axes[0].axhline(COMPACT_RADIUS, color="orange", linestyle="--",
label=f"compact threshold ({COMPACT_RADIUS}m)")
if compact_step is not None:
axes[0].axvline(compact_step, color="green", linestyle=":",
alpha=0.6, label=f"first compact (step {compact_step})")
axes[0].set_ylabel("radius (m)")
axes[0].legend(fontsize=8)
axes[0].set_title(f"ep {ep+1} | n_sheep={n_sheep} | {mode}")
axes[1].plot(t, ep_com_dist, color="tomato", label="COM-to-pen dist (m)")
axes[1].set_ylabel("COM-to-pen (m)")
axes[1].set_xlabel("step")
axes[1].legend(fontsize=8)
plt.tight_layout()
if args.plot_dir:
fig.savefig(os.path.join(args.plot_dir, f"ep{ep+1:03d}_{mode}.png"),
dpi=100)
plt.close(fig)
else:
plt.show(block=False)
plt.pause(0.5)
env.close()
# ── summary ──────────────────────────────────────────────────────────────
print("\n" + "=" * 55)
print(f" Model : {args.model}")
print(f" n_sheep : {args.n_sheep} episodes : {args.episodes}")
print("-" * 55)
total = sum(failure_counts.values())
for mode, cnt in sorted(failure_counts.items(), key=lambda x: -x[1]):
bar = "" * cnt
print(f" {mode:<26} {cnt:>3}/{total} {bar}")
print("-" * 55)
never_compact = failure_counts.get("NEVER_COMPACT", 0)
cant_drive = failure_counts.get("COMPACT_CANT_DRIVE", 0)
partial = sum(v for k, v in failure_counts.items() if k.startswith("PARTIAL"))
successes = failure_counts.get("SUCCESS", 0)
print(f"\n Diagnosis:")
if never_compact / total > 0.5:
print(" ► COLLECT problem: dog rarely compacts the flock.")
print(" → Phase-gate W_DRIVE, increase W_COLLECT, check alignment reward.")
if cant_drive / total > 0.3:
print(" ► DRIVE problem: flock compacts but doesn't reach pen.")
print(" → Check dog alignment, pen direction, W_DRIVE magnitude.")
if partial / total > 0.3:
print(" ► PARTIAL problem: some sheep penned, stragglers remain.")
print(" → Flock splits; need better straggler-chasing behavior.")
if successes / total > 0.5:
print(" ► Mostly working! Fine-tune for consistency.")
print("=" * 55)
if __name__ == "__main__":
main()
-109
View File
@@ -1,109 +0,0 @@
"""
Load a saved run and evaluate the policy at every n_sheep from 1..N.
Tells you exactly where the curriculum stopped working.
Usage:
python eval_per_sheep.py --run-dir runs/ppo_v3
python eval_per_sheep.py --run-dir runs/ppo_v3 --max-sheep 10 --episodes 20
python eval_per_sheep.py --model runs/ppo_v3/final_model.zip \
--vecnorm runs/ppo_v3/vecnorm.pkl
"""
import argparse
import os
from copy import deepcopy
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
from train import _classify, COMPACT_RADIUS
def evaluate(model, vn_template, n_sheep, n_episodes, max_steps):
raw = DummyVecEnv([lambda: HerdingEnv(n_sheep=n_sheep, max_steps=max_steps)])
vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
vn.obs_rms = deepcopy(vn_template.obs_rms)
vn.ret_rms = deepcopy(vn_template.ret_rms)
failure = {}
successes = 0
act_mags, min_radii, min_dog_com, min_pen = [], [], [], []
for _ in range(n_episodes):
obs = vn.reset()
done = False
ep_radius, ep_com_dist, ep_dog_com, ep_act = [], [], [], []
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, _, dones, infos = vn.step(action)
done = dones[0]
inner = vn.envs[0]
com, radius, _ = inner._flock_stats()
ep_radius.append(radius)
ep_com_dist.append(float(np.linalg.norm(com - inner.PEN_CENTER)))
ep_dog_com.append(float(np.linalg.norm(inner.dog_pos - com)))
ep_act.append(float(np.linalg.norm(action[0])))
npen = infos[0].get("n_penned", 0)
success = npen == n_sheep
successes += int(success)
mode = _classify(ep_radius, ep_com_dist, npen, n_sheep, success)
failure[mode] = failure.get(mode, 0) + 1
act_mags.extend(ep_act)
min_radii.append(min(ep_radius))
min_dog_com.append(min(ep_dog_com))
min_pen.append(min(ep_com_dist))
vn.close()
return {
"n_sheep": n_sheep,
"success_rate": successes / n_episodes,
"failure": failure,
"mean_action": float(np.mean(act_mags)),
"mean_min_radius": float(np.mean(min_radii)),
"mean_min_dog_com": float(np.mean(min_dog_com)),
"mean_min_pen": float(np.mean(min_pen)),
}
def main():
p = argparse.ArgumentParser()
p.add_argument("--run-dir", type=str, default=None)
p.add_argument("--model", type=str, default=None)
p.add_argument("--vecnorm", type=str, default=None)
p.add_argument("--max-sheep", type=int, default=10)
p.add_argument("--episodes", type=int, default=10)
p.add_argument("--max-steps", type=int, default=2000)
args = p.parse_args()
if args.run_dir:
model_path = os.path.join(args.run_dir, "final_model.zip")
if not os.path.exists(model_path):
model_path = os.path.join(args.run_dir, "best_model", "best_model.zip")
vn_path = os.path.join(args.run_dir, "vecnorm.pkl")
else:
model_path = args.model
vn_path = args.vecnorm
print(f"Loading model: {model_path}")
print(f"Loading vecnorm: {vn_path}\n")
model = PPO.load(model_path, device="cpu")
raw = DummyVecEnv([lambda: HerdingEnv(n_sheep=1, max_steps=args.max_steps)])
vn_template = VecNormalize.load(vn_path, raw)
print(f"{'n_sheep':>7} {'success':>8} {'act':>6} {'min_r':>7} "
f"{'dog→com':>8} {'com→pen':>8} failure breakdown")
print("-" * 90)
for n in range(1, args.max_sheep + 1):
r = evaluate(model, vn_template, n, args.episodes, args.max_steps)
fb = " ".join(f"{m}={c}" for m, c in
sorted(r["failure"].items(), key=lambda x: -x[1]))
print(f"{n:>7d} {r['success_rate']*100:>6.0f}% "
f"{r['mean_action']:>6.2f} "
f"{r['mean_min_radius']:>6.2f}m "
f"{r['mean_min_dog_com']:>7.2f}m "
f"{r['mean_min_pen']:>7.2f}m {fb}")
if __name__ == "__main__":
main()
-142
View File
@@ -1,142 +0,0 @@
"""
Evaluation script for a trained herding policy.
Runs N episodes and reports the three project metrics:
1. Success rate — fraction of episodes where all sheep are penned
2. Time-to-pen — mean steps across successful episodes (per sheep)
3. Flock dispersion — mean pairwise distance among active sheep, averaged
over all timesteps (lower = tighter herding)
Usage
-----
python evaluate.py --model runs/ppo_herding/best_model/best_model.zip \
--vecnorm runs/ppo_herding/vecnorm.pkl \
--n-sheep 5 --episodes 100
Add --render to watch the first episode in a matplotlib window.
"""
import argparse
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
def make_single_env(n_sheep: int, max_steps: int, render_mode: str = None):
def _init():
return HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
render_mode=render_mode)
return _init
def pairwise_mean(positions: np.ndarray, n_active: int) -> float:
"""Mean pairwise distance among the first n_active sheep."""
if n_active < 2:
return 0.0
pts = positions[:n_active]
dists = []
for i in range(n_active):
for j in range(i + 1, n_active):
dists.append(float(np.linalg.norm(pts[i] - pts[j])))
return float(np.mean(dists))
def parse_args():
p = argparse.ArgumentParser()
p.add_argument("--model", required=True,
help="Path to saved model .zip")
p.add_argument("--vecnorm", default=None,
help="Path to VecNormalize stats .pkl (optional)")
p.add_argument("--n-sheep", type=int, default=1)
p.add_argument("--episodes", type=int, default=50)
p.add_argument("--max-steps", type=int, default=2000)
p.add_argument("--render", action="store_true",
help="Render first episode in matplotlib")
p.add_argument("--seed", type=int, default=42)
return p.parse_args()
def main():
args = parse_args()
render_mode = "human" if args.render else None
raw_env = DummyVecEnv([make_single_env(args.n_sheep, args.max_steps,
render_mode)])
if args.vecnorm:
env = VecNormalize.load(args.vecnorm, raw_env)
env.training = False
env.norm_reward = False
else:
env = raw_env
model = PPO.load(args.model, env=env)
successes = []
steps_to_pen = [] # steps for successful episodes
dispersions = [] # per-episode mean flock dispersion
for ep in range(args.episodes):
obs = env.reset()
done = False
ep_steps = 0
ep_dispersion = []
first_ep = ep == 0
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, _, dones, infos = env.step(action)
done = dones[0]
ep_steps += 1
# Access the underlying HerdingEnv for dispersion calculation
inner = env.envs[0] if hasattr(env, "envs") else env.venv.envs[0]
if not inner.penned[:inner.n_sheep].all():
_, radius, _ = inner._flock_stats()
ep_dispersion.append(radius)
if first_ep and render_mode == "human":
pass # render() is called inside step()
info = infos[0]
n_penned = info.get("n_penned", 0)
n_sheep = info.get("n_sheep", args.n_sheep)
success = n_penned == n_sheep
successes.append(int(success))
if success:
steps_to_pen.append(ep_steps / n_sheep)
if ep_dispersion:
dispersions.append(float(np.mean(ep_dispersion)))
if (ep + 1) % 10 == 0:
print(f" Episode {ep + 1:>4}/{args.episodes} "
f"success={int(success)} steps={ep_steps}")
env.close()
# -----------------------------------------------------------------------
# Report
# -----------------------------------------------------------------------
success_rate = float(np.mean(successes))
mean_ttp = float(np.mean(steps_to_pen)) if steps_to_pen else float("nan")
mean_disp = float(np.mean(dispersions)) if dispersions else float("nan")
print("\n" + "=" * 50)
print(f" Model : {args.model}")
print(f" Sheep : {args.n_sheep}")
print(f" Episodes : {args.episodes}")
print("-" * 50)
print(f" Success rate : {success_rate * 100:.1f}%"
f" ({sum(successes)}/{args.episodes})")
print(f" Time-to-pen : {mean_ttp:.1f} steps/sheep"
f" (successful episodes only)")
print(f" Flock radius : {mean_disp:.2f} m"
f" (max sheep-to-COM distance while active)")
print("=" * 50)
if __name__ == "__main__":
main()
+26 -16
View File
@@ -61,18 +61,19 @@ class HerdingEnv(gym.Env):
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
W_WALL_TOUCH = 0.01 # per-sheep, per-step penalty when an active sheep is
# pinned against the outside of a pen W/E wall. Kept
# small (<step_cost) so the dog isn't incentivised to
# hover above the entrance to avoid the penalty.
WALL_TOUCH_BUFFER = 0.3 # metres outside the wall counted as "touching"
W_WALL_TOUCH = 0.15 # per-sheep max penalty at wall surface. Linear ramp
# within WALL_TOUCH_BUFFER gives the RL agent a gradient
# signal to avoid pinning sheep against pen walls.
# 0.15 ≈ 7.5× step_cost — strong enough to shape behavior
# without overwhelming progress reward.
WALL_TOUCH_BUFFER = 0.8 # metres from wall where penalty starts ramping
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
ALIGN_GATED = True # gate alignment on action magnitude
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
# PEN_CENTER. Stops the wall-corraling exploit: when a
# sheep is shoved south past y=-8 outside the pen x-range,
# distance to PEN_ENTRY grows (since target is at y=-8),
# so progress reward goes negative instead of positive.
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead
# of PEN_CENTER for progress/obs. Intended to fix wall-
# corralling but collapsed n_sheep≥2 success rate.
# The wall-touch gradient penalty handles wall avoidance
# without breaking the core herding signal.
# Initial sheep spawn: first sheep placed anywhere; rest within CLUSTER_RADIUS
# of it. Set to None for legacy uniform-scatter behaviour.
@@ -406,16 +407,25 @@ class HerdingEnv(gym.Env):
else:
alignment = 0.0
# Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
# Wall-touch penalty: distance-based gradient covering all 3 solid pen
# walls (west, east, south). Linearly ramps from 0 at buffer edge to
# W_WALL_TOUCH at the wall surface — gives the agent a smooth signal
# to avoid pinning sheep against walls.
if self.W_WALL_TOUCH and active.any():
pts = self.sheep_pos[:self.n_sheep][active]
px0, px1 = self.PEN_X
py0, py1 = self.PEN_Y
in_y = (pts[:, 1] > py0) & (pts[:, 1] < py1)
near_w = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
near_e = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
n_touch = int(((near_w | near_e) & in_y).sum())
r_wall_touch = -n_touch * self.W_WALL_TOUCH
buf = self.WALL_TOUCH_BUFFER
far = buf + 1.0
d_w = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
px0 - pts[:, 0], far)
d_e = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
pts[:, 0] - px1, far)
d_s = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
py0 - pts[:, 1], far)
d_min = np.minimum(np.minimum(d_w, d_e), d_s)
penalties = np.maximum(0.0, 1.0 - d_min / buf) * self.W_WALL_TOUCH
r_wall_touch = -float(penalties.sum())
else:
r_wall_touch = 0.0
-172
View File
@@ -1,172 +0,0 @@
"""
Replay a reward config from the sweep with a longer training budget.
Tells you whether a promising sweep config was bottlenecked by training time
vs. structurally limited. If sr2/sr3 climb past their sweep numbers given more
budget, the issue was budget; if they plateau, the policy/obs needs work.
Usage
-----
python replay_config.py --config runs/sweep_<ts>/best.json
python replay_config.py --config runs/sweep_<ts>/trial_007/config.json \
--max-sheep 4 --steps-per-stage 1500000
Argument summary:
--config JSON file with the reward config (sweep best.json works)
--max-sheep Final curriculum stage (default 3)
--steps-per-stage Env steps per curriculum stage (default 1.5M)
--n-envs Parallel envs (default 8)
--eval-episodes Per-stage eval episodes (default 30)
--run-dir Output directory (default runs/replay_<ts>/)
"""
import argparse
import json
import os
import time
from copy import deepcopy
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
from sweep_reward import ProgressCallback, reward_cfg, evaluate, make_env
def main():
p = argparse.ArgumentParser()
p.add_argument("--config", type=str, required=True,
help="Reward config JSON (sweep best.json or trial config.json)")
p.add_argument("--start-sheep", type=int, default=1)
p.add_argument("--max-sheep", type=int, default=3)
p.add_argument("--steps-per-stage", type=int, default=1_500_000)
p.add_argument("--mixed", action="store_true",
help="Train with n_sheep randomized per episode (no curriculum). "
"Total train steps = steps-per-stage * max_sheep.")
p.add_argument("--final-mixed-steps", type=int, default=0,
help="After the curriculum, train this many extra steps with "
"random_n_sheep ∈ [1, max_sheep] to consolidate the policy "
"across all flock sizes. Re-evaluates all n_sheep at the end.")
p.add_argument("--n-envs", type=int, default=8)
p.add_argument("--max-steps", type=int, default=2500)
p.add_argument("--eval-episodes", type=int, default=30)
p.add_argument("--run-dir", type=str, default=None)
args = p.parse_args()
with open(args.config) as f:
raw = json.load(f)
cfg = raw["config"] if "config" in raw and isinstance(raw["config"], dict) else raw
rcfg = reward_cfg(cfg)
print(f"Config: {cfg}")
run_dir = args.run_dir or os.path.join(
"runs", "replay_" + time.strftime("%Y%m%d_%H%M%S")
)
os.makedirs(run_dir, exist_ok=True)
with open(os.path.join(run_dir, "config.json"), "w") as f:
json.dump(cfg, f, indent=2)
print(f"Run dir: {run_dir}")
if args.mixed:
print(f"MIXED training: random n_sheep ∈ [1, {args.max_sheep}], "
f"{args.steps_per_stage * args.max_sheep:,} total steps")
else:
print(f"Curriculum: {args.start_sheep}{args.max_sheep} sheep, "
f"{args.steps_per_stage:,} steps/stage")
train_env = SubprocVecEnv([
make_env(args.max_sheep if args.mixed else args.start_sheep,
seed=i, max_steps=args.max_steps, rcfg=rcfg,
random_n_sheep=args.mixed)
for i in range(args.n_envs)
])
vn = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.0)
model = PPO(
"MlpPolicy", vn,
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
gamma=0.995, gae_lambda=0.95, clip_range=0.2,
ent_coef=cfg["ent_coef"], vf_coef=0.5, max_grad_norm=0.5,
policy_kwargs=dict(net_arch=[256, 256]),
verbose=0,
)
stage_results = []
t0 = time.time()
try:
if args.mixed:
total = args.steps_per_stage * args.max_sheep
print(f"\n[Mixed] training {total:,} steps")
model.learn(
total_timesteps=total,
reset_num_timesteps=True,
callback=ProgressCallback(0, "mixed", freq=100_000),
)
for n in range(1, args.max_sheep + 1):
print(f"[Mixed] evaluating n={n}, {args.eval_episodes} eps")
r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
print(f"[Mixed] n_sheep={n} sr={r['sr']*100:.0f}% "
f"mean_len={r['mean_len']:.0f} "
f"mean_min_pen={r['mean_min_pen']:.1f}m "
f"mean_act={r['mean_act']:.2f}")
stage_results.append({"n_sheep": n, **r})
else:
for n in range(args.start_sheep, args.max_sheep + 1):
if n > args.start_sheep:
vn.env_method("set_n_sheep", n)
print(f"\n[Stage n_sheep={n}] training {args.steps_per_stage:,} steps")
model.learn(
total_timesteps=args.steps_per_stage,
reset_num_timesteps=(n == args.start_sheep),
callback=ProgressCallback(0, f"{n} sheep", freq=100_000),
)
print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
print(f"[Stage n_sheep={n}] sr={r['sr']*100:.0f}% "
f"mean_len={r['mean_len']:.0f} "
f"mean_min_pen={r['mean_min_pen']:.1f}m "
f"mean_act={r['mean_act']:.2f}")
stage_results.append({"n_sheep": n, **r})
# Optional consolidation pass with mixed n_sheep — fixes specialization
# imbalance from curriculum order (e.g. n=1 weakness after long n=10
# training). Replaces stage_results with the post-consolidation eval.
if args.final_mixed_steps > 0 and not args.mixed:
print(f"\n[Consolidation] mixed n_sheep ∈ [1, {args.max_sheep}], "
f"{args.final_mixed_steps:,} steps")
vn.env_method("__setattr__", "random_n_sheep", True)
model.learn(
total_timesteps=args.final_mixed_steps,
reset_num_timesteps=False,
callback=ProgressCallback(0, "consolidate", freq=100_000),
)
print("[Consolidation] re-evaluating all sheep counts")
stage_results = []
for n in range(1, args.max_sheep + 1):
r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
print(f"[Consolidation] n_sheep={n} sr={r['sr']*100:.0f}% "
f"mean_len={r['mean_len']:.0f} "
f"mean_min_pen={r['mean_min_pen']:.1f}m "
f"mean_act={r['mean_act']:.2f}")
stage_results.append({"n_sheep": n, **r})
model.save(os.path.join(run_dir, "final_model"))
vn.save(os.path.join(run_dir, "vecnorm.pkl"))
with open(os.path.join(run_dir, "stage_results.json"), "w") as f:
json.dump(stage_results, f, indent=2)
finally:
try: vn.close()
except Exception: pass
print("\n" + "=" * 60)
print(" REPLAY SUMMARY")
print("=" * 60)
for r in stage_results:
print(f" n_sheep={r['n_sheep']} sr={r['sr']*100:>3.0f}% "
f"len={r['mean_len']:>5.0f} min_pen={r['mean_min_pen']:>5.1f}m "
f"act={r['mean_act']:.2f}")
print(f"\n Total time: {(time.time()-t0)/60:.1f} min")
print(f" Artefacts: {run_dir}/")
if __name__ == "__main__":
main()
-35
View File
@@ -1,35 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/expA_fresh2
Curriculum: 2 → 2 sheep, 2,000,000 steps/stage
[Stage n_sheep=2] training 2,000,000 steps
... [trial 1 | 2 sheep | 100,000 steps | ret(last 50)=-13.44 sr=0%]
... [trial 1 | 2 sheep | 200,000 steps | ret(last 50)=-14.60 sr=0%]
... [trial 1 | 2 sheep | 300,000 steps | ret(last 50)=-17.36 sr=0%]
... [trial 1 | 2 sheep | 400,000 steps | ret(last 50)=-17.36 sr=0%]
... [trial 1 | 2 sheep | 500,000 steps | ret(last 50)=-17.92 sr=0%]
... [trial 1 | 2 sheep | 600,000 steps | ret(last 50)=-15.65 sr=0%]
... [trial 1 | 2 sheep | 700,000 steps | ret(last 50)=-17.69 sr=2%]
... [trial 1 | 2 sheep | 800,000 steps | ret(last 50)=-14.61 sr=2%]
... [trial 1 | 2 sheep | 900,000 steps | ret(last 50)=-17.36 sr=0%]
... [trial 1 | 2 sheep | 1,000,000 steps | ret(last 50)=-17.44 sr=0%]
... [trial 1 | 2 sheep | 1,100,000 steps | ret(last 50)=-15.91 sr=2%]
... [trial 1 | 2 sheep | 1,200,000 steps | ret(last 50)=-16.08 sr=0%]
... [trial 1 | 2 sheep | 1,300,000 steps | ret(last 50)=-14.34 sr=0%]
... [trial 1 | 2 sheep | 1,400,000 steps | ret(last 50)=-17.00 sr=2%]
... [trial 1 | 2 sheep | 1,500,000 steps | ret(last 50)=-18.52 sr=0%]
... [trial 1 | 2 sheep | 1,600,000 steps | ret(last 50)=-16.68 sr=0%]
... [trial 1 | 2 sheep | 1,700,000 steps | ret(last 50)=-17.52 sr=0%]
... [trial 1 | 2 sheep | 1,800,000 steps | ret(last 50)=-17.33 sr=0%]
... [trial 1 | 2 sheep | 1,900,000 steps | ret(last 50)=-14.96 sr=2%]
... [trial 1 | 2 sheep | 2,000,000 steps | ret(last 50)=-15.59 sr=0%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=0% mean_len=1500 mean_min_pen=13.2m mean_act=0.96
============================================================
REPLAY SUMMARY
============================================================
n_sheep=2 sr= 0% len= 1500 min_pen= 13.2m act=0.96
Total time: 10.7 min
Artefacts: runs/expA_fresh2/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,9 +0,0 @@
[
{
"n_sheep": 2,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.171057415008544,
"mean_act": 0.960968065615257
}
]
Binary file not shown.
-51
View File
@@ -1,51 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/expB_mixed
MIXED training: random n_sheep ∈ [1, 3], 3,000,000 total steps
[Mixed] training 3,000,000 steps
... [trial 1 | mixed | 100,000 steps | ret(last 50)=-13.68 sr=2%]
... [trial 1 | mixed | 200,000 steps | ret(last 50)=-14.08 sr=0%]
... [trial 1 | mixed | 300,000 steps | ret(last 50)=-9.80 sr=0%]
... [trial 1 | mixed | 400,000 steps | ret(last 50)=-11.20 sr=0%]
... [trial 1 | mixed | 500,000 steps | ret(last 50)=-10.61 sr=0%]
... [trial 1 | mixed | 600,000 steps | ret(last 50)=-11.19 sr=0%]
... [trial 1 | mixed | 700,000 steps | ret(last 50)=-14.22 sr=0%]
... [trial 1 | mixed | 800,000 steps | ret(last 50)=-6.31 sr=0%]
... [trial 1 | mixed | 900,000 steps | ret(last 50)=-12.68 sr=0%]
... [trial 1 | mixed | 1,000,000 steps | ret(last 50)=-11.06 sr=0%]
... [trial 1 | mixed | 1,100,000 steps | ret(last 50)=-13.39 sr=0%]
... [trial 1 | mixed | 1,200,000 steps | ret(last 50)=-14.20 sr=0%]
... [trial 1 | mixed | 1,300,000 steps | ret(last 50)=-11.33 sr=0%]
... [trial 1 | mixed | 1,400,000 steps | ret(last 50)=-10.73 sr=0%]
... [trial 1 | mixed | 1,500,000 steps | ret(last 50)=-10.91 sr=0%]
... [trial 1 | mixed | 1,600,000 steps | ret(last 50)=-10.44 sr=0%]
... [trial 1 | mixed | 1,700,000 steps | ret(last 50)=-10.56 sr=0%]
... [trial 1 | mixed | 1,800,000 steps | ret(last 50)=-15.74 sr=0%]
... [trial 1 | mixed | 1,900,000 steps | ret(last 50)=-13.46 sr=0%]
... [trial 1 | mixed | 2,000,000 steps | ret(last 50)=-9.86 sr=0%]
... [trial 1 | mixed | 2,100,000 steps | ret(last 50)=-13.07 sr=0%]
... [trial 1 | mixed | 2,200,000 steps | ret(last 50)=-9.86 sr=0%]
... [trial 1 | mixed | 2,300,000 steps | ret(last 50)=-9.73 sr=2%]
... [trial 1 | mixed | 2,400,000 steps | ret(last 50)=-12.21 sr=0%]
... [trial 1 | mixed | 2,500,000 steps | ret(last 50)=-14.27 sr=0%]
... [trial 1 | mixed | 2,600,000 steps | ret(last 50)=-10.90 sr=2%]
... [trial 1 | mixed | 2,700,000 steps | ret(last 50)=-9.67 sr=0%]
... [trial 1 | mixed | 2,800,000 steps | ret(last 50)=-14.29 sr=0%]
... [trial 1 | mixed | 2,900,000 steps | ret(last 50)=-9.08 sr=0%]
... [trial 1 | mixed | 3,000,000 steps | ret(last 50)=-11.62 sr=6%]
[Mixed] evaluating n=1, 30 eps
[Mixed] n_sheep=1 sr=0% mean_len=1500 mean_min_pen=12.1m mean_act=0.64
[Mixed] evaluating n=2, 30 eps
[Mixed] n_sheep=2 sr=0% mean_len=1500 mean_min_pen=13.6m mean_act=1.12
[Mixed] evaluating n=3, 30 eps
[Mixed] n_sheep=3 sr=0% mean_len=1500 mean_min_pen=13.3m mean_act=1.02
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 0% len= 1500 min_pen= 12.1m act=0.64
n_sheep=2 sr= 0% len= 1500 min_pen= 13.6m act=1.12
n_sheep=3 sr= 0% len= 1500 min_pen= 13.3m act=1.02
Total time: 20.6 min
Artefacts: runs/expB_mixed/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,23 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 12.136781152089437,
"mean_act": 0.6380681545449439
},
{
"n_sheep": 2,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.609641806284587,
"mean_act": 1.1225489819858792
},
{
"n_sheep": 3,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.337443319956462,
"mean_act": 1.0186407331574738
}
]
Binary file not shown.
-57
View File
@@ -1,57 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/expC_clustered
Curriculum: 1 → 3 sheep, 1,000,000 steps/stage
[Stage n_sheep=1] training 1,000,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-17.04 sr=6%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-17.39 sr=4%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-15.50 sr=4%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-2.07 sr=26%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=+3.81 sr=52%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=+8.03 sr=76%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=+9.49 sr=86%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=+9.42 sr=88%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=+9.49 sr=88%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=+10.34 sr=94%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=83% mean_len=519 mean_min_pen=3.5m mean_act=0.25
[Stage n_sheep=2] training 1,000,000 steps
... [trial 1 | 2 sheep | 1,015,816 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 2 sheep | 1,115,816 steps | ret(last 50)=-0.13 sr=10%]
... [trial 1 | 2 sheep | 1,215,816 steps | ret(last 50)=-1.23 sr=10%]
... [trial 1 | 2 sheep | 1,315,816 steps | ret(last 50)=-0.10 sr=6%]
... [trial 1 | 2 sheep | 1,415,816 steps | ret(last 50)=+4.10 sr=28%]
... [trial 1 | 2 sheep | 1,515,816 steps | ret(last 50)=+6.24 sr=32%]
... [trial 1 | 2 sheep | 1,615,816 steps | ret(last 50)=+8.48 sr=52%]
... [trial 1 | 2 sheep | 1,715,816 steps | ret(last 50)=+14.14 sr=98%]
... [trial 1 | 2 sheep | 1,815,816 steps | ret(last 50)=+14.33 sr=98%]
... [trial 1 | 2 sheep | 1,915,816 steps | ret(last 50)=+14.02 sr=100%]
... [trial 1 | 2 sheep | 2,015,816 steps | ret(last 50)=+14.05 sr=100%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=100% mean_len=695 mean_min_pen=3.4m mean_act=0.58
[Stage n_sheep=3] training 1,000,000 steps
... [trial 1 | 3 sheep | 2,031,624 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 3 sheep | 2,131,624 steps | ret(last 50)=+10.43 sr=56%]
... [trial 1 | 3 sheep | 2,231,624 steps | ret(last 50)=+13.91 sr=74%]
... [trial 1 | 3 sheep | 2,331,624 steps | ret(last 50)=+13.98 sr=76%]
... [trial 1 | 3 sheep | 2,431,624 steps | ret(last 50)=+12.67 sr=68%]
... [trial 1 | 3 sheep | 2,531,624 steps | ret(last 50)=+15.79 sr=90%]
... [trial 1 | 3 sheep | 2,631,624 steps | ret(last 50)=+16.29 sr=94%]
... [trial 1 | 3 sheep | 2,731,624 steps | ret(last 50)=+15.47 sr=90%]
... [trial 1 | 3 sheep | 2,831,624 steps | ret(last 50)=+16.67 sr=96%]
... [trial 1 | 3 sheep | 2,931,624 steps | ret(last 50)=+17.50 sr=100%]
... [trial 1 | 3 sheep | 3,031,624 steps | ret(last 50)=+16.49 sr=96%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=90% mean_len=794 mean_min_pen=3.7m mean_act=0.47
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 83% len= 519 min_pen= 3.5m act=0.25
n_sheep=2 sr=100% len= 695 min_pen= 3.4m act=0.58
n_sheep=3 sr= 90% len= 794 min_pen= 3.7m act=0.47
Total time: 15.1 min
Artefacts: runs/expC_clustered/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,23 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.8333333333333334,
"mean_len": 518.5333333333333,
"mean_min_pen": 3.5244259238243103,
"mean_act": 0.25044742608759274
},
{
"n_sheep": 2,
"sr": 1.0,
"mean_len": 694.9,
"mean_min_pen": 3.4314632336298625,
"mean_act": 0.5796192060058971
},
{
"n_sheep": 3,
"sr": 0.9,
"mean_len": 794.1333333333333,
"mean_min_pen": 3.6645382324854534,
"mean_act": 0.46590614892287907
}
]
Binary file not shown.
-219
View File
@@ -1,219 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/final_v2
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 41)=-38.49 win_sr=10% cum_sr=10%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-32.87 win_sr=8% cum_sr=9%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-33.60 win_sr=4% cum_sr=7%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-34.78 win_sr=8% cum_sr=7%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-31.25 win_sr=12% cum_sr=8%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-32.87 win_sr=2% cum_sr=7%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=-33.25 win_sr=6% cum_sr=7%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=-27.80 win_sr=16% cum_sr=8%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=-27.44 win_sr=14% cum_sr=9%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=-30.52 win_sr=6% cum_sr=9%]
... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=-24.75 win_sr=20% cum_sr=10%]
... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=-29.94 win_sr=4% cum_sr=10%]
... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=-22.72 win_sr=22% cum_sr=11%]
... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=-9.84 win_sr=46% cum_sr=14%]
... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+10.01 win_sr=96% cum_sr=24%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=97% mean_len=351 mean_min_pen=3.9m mean_act=0.28
[Stage n_sheep=2] training 1,500,000 steps
... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 43)=-4.11 win_sr=33% cum_sr=33%]
... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-0.34 win_sr=36% cum_sr=34%]
... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=+14.73 win_sr=92% cum_sr=62%]
... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=+17.38 win_sr=100% cum_sr=76%]
... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=+16.80 win_sr=100% cum_sr=83%]
... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=+15.67 win_sr=100% cum_sr=87%]
... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=+15.39 win_sr=100% cum_sr=90%]
... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=+15.58 win_sr=100% cum_sr=92%]
... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=+15.01 win_sr=100% cum_sr=93%]
... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=+15.50 win_sr=100% cum_sr=94%]
... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=+15.21 win_sr=100% cum_sr=95%]
... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=+15.22 win_sr=100% cum_sr=95%]
... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=+15.05 win_sr=100% cum_sr=96%]
... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=+14.37 win_sr=100% cum_sr=96%]
... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=+14.70 win_sr=100% cum_sr=97%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=100% mean_len=421 mean_min_pen=3.5m mean_act=1.01
[Stage n_sheep=3] training 1,500,000 steps
... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=+16.52 win_sr=100% cum_sr=99%]
... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=+16.74 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=+17.09 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=+16.90 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=+16.97 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=+17.20 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=+17.09 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=+17.12 win_sr=98% cum_sr=100%]
... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=+17.17 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=+16.25 win_sr=98% cum_sr=100%]
... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=+17.04 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=+16.31 win_sr=98% cum_sr=100%]
... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=+16.82 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=+16.49 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=+16.54 win_sr=100% cum_sr=100%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=100% mean_len=608 mean_min_pen=3.5m mean_act=1.06
[Stage n_sheep=4] training 1,500,000 steps
... [trial 1 | 4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 4 sheep | 4,621,992 steps | ret(last 50)=+18.55 win_sr=98% cum_sr=94%]
... [trial 1 | 4 sheep | 4,721,992 steps | ret(last 50)=+19.17 win_sr=100% cum_sr=97%]
... [trial 1 | 4 sheep | 4,821,992 steps | ret(last 50)=+18.64 win_sr=100% cum_sr=98%]
... [trial 1 | 4 sheep | 4,921,992 steps | ret(last 50)=+19.06 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,021,992 steps | ret(last 50)=+19.01 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,121,992 steps | ret(last 50)=+19.23 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,221,992 steps | ret(last 50)=+18.71 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,321,992 steps | ret(last 50)=+18.81 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,421,992 steps | ret(last 50)=+19.51 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,521,992 steps | ret(last 50)=+19.01 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,621,992 steps | ret(last 50)=+19.21 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,721,992 steps | ret(last 50)=+18.62 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,821,992 steps | ret(last 50)=+18.57 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,921,992 steps | ret(last 50)=+19.22 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 6,021,992 steps | ret(last 50)=+18.73 win_sr=100% cum_sr=100%]
[Stage n_sheep=4] evaluating 30 eps
[Stage n_sheep=4] sr=100% mean_len=874 mean_min_pen=3.3m mean_act=1.23
[Stage n_sheep=5] training 1,500,000 steps
... [trial 1 | 5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 5 sheep | 6,129,320 steps | ret(last 50)=+22.70 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,229,320 steps | ret(last 50)=+20.82 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,329,320 steps | ret(last 50)=+20.84 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,429,320 steps | ret(last 50)=+21.70 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,529,320 steps | ret(last 50)=+21.25 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,629,320 steps | ret(last 50)=+20.61 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,729,320 steps | ret(last 50)=+21.10 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,829,320 steps | ret(last 50)=+21.42 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,929,320 steps | ret(last 50)=+21.39 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,029,320 steps | ret(last 50)=+20.80 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,129,320 steps | ret(last 50)=+21.19 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,229,320 steps | ret(last 50)=+20.92 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,329,320 steps | ret(last 50)=+20.97 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,429,320 steps | ret(last 50)=+20.48 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,529,320 steps | ret(last 50)=+21.36 win_sr=100% cum_sr=100%]
[Stage n_sheep=5] evaluating 30 eps
[Stage n_sheep=5] sr=97% mean_len=945 mean_min_pen=3.4m mean_act=1.33
[Stage n_sheep=6] training 1,500,000 steps
... [trial 1 | 6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 6 sheep | 7,636,648 steps | ret(last 50)=+22.41 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,736,648 steps | ret(last 50)=+23.84 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,836,648 steps | ret(last 50)=+22.95 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,936,648 steps | ret(last 50)=+23.97 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,036,648 steps | ret(last 50)=+24.02 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,136,648 steps | ret(last 50)=+23.42 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,236,648 steps | ret(last 50)=+24.15 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,336,648 steps | ret(last 50)=+23.32 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,436,648 steps | ret(last 50)=+23.46 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,536,648 steps | ret(last 50)=+23.80 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,636,648 steps | ret(last 50)=+24.41 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,736,648 steps | ret(last 50)=+23.86 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,836,648 steps | ret(last 50)=+23.57 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,936,648 steps | ret(last 50)=+23.74 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 9,036,648 steps | ret(last 50)=+22.87 win_sr=100% cum_sr=100%]
[Stage n_sheep=6] evaluating 30 eps
[Stage n_sheep=6] sr=100% mean_len=1162 mean_min_pen=3.1m mean_act=1.36
[Stage n_sheep=7] training 1,500,000 steps
... [trial 1 | 7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 7 sheep | 9,143,976 steps | ret(last 50)=+24.46 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,243,976 steps | ret(last 50)=+25.47 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,343,976 steps | ret(last 50)=+25.10 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,443,976 steps | ret(last 50)=+24.85 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,543,976 steps | ret(last 50)=+26.01 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,643,976 steps | ret(last 50)=+26.26 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,743,976 steps | ret(last 50)=+26.44 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,843,976 steps | ret(last 50)=+26.08 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,943,976 steps | ret(last 50)=+25.00 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,043,976 steps | ret(last 50)=+26.22 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,143,976 steps | ret(last 50)=+24.79 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,243,976 steps | ret(last 50)=+26.33 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,343,976 steps | ret(last 50)=+26.36 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,443,976 steps | ret(last 50)=+25.68 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,543,976 steps | ret(last 50)=+26.75 win_sr=100% cum_sr=100%]
[Stage n_sheep=7] evaluating 30 eps
[Stage n_sheep=7] sr=100% mean_len=1253 mean_min_pen=2.7m mean_act=1.38
[Stage n_sheep=8] training 1,500,000 steps
... [trial 1 | 8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 8 sheep | 10,651,304 steps | ret(last 50)=+28.19 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,751,304 steps | ret(last 50)=+28.80 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,851,304 steps | ret(last 50)=+27.81 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,951,304 steps | ret(last 50)=+27.31 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,051,304 steps | ret(last 50)=+27.67 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,151,304 steps | ret(last 50)=+27.14 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,251,304 steps | ret(last 50)=+29.60 win_sr=98% cum_sr=100%]
... [trial 1 | 8 sheep | 11,351,304 steps | ret(last 50)=+28.81 win_sr=98% cum_sr=100%]
... [trial 1 | 8 sheep | 11,451,304 steps | ret(last 50)=+27.76 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,551,304 steps | ret(last 50)=+27.28 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,651,304 steps | ret(last 50)=+29.04 win_sr=98% cum_sr=99%]
... [trial 1 | 8 sheep | 11,751,304 steps | ret(last 50)=+28.75 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,851,304 steps | ret(last 50)=+29.04 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,951,304 steps | ret(last 50)=+28.27 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 12,051,304 steps | ret(last 50)=+27.90 win_sr=100% cum_sr=100%]
[Stage n_sheep=8] evaluating 30 eps
[Stage n_sheep=8] sr=93% mean_len=1495 mean_min_pen=2.6m mean_act=1.39
[Stage n_sheep=9] training 1,500,000 steps
... [trial 1 | 9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 9 sheep | 12,158,632 steps | ret(last 50)=+30.67 win_sr=98% cum_sr=98%]
... [trial 1 | 9 sheep | 12,258,632 steps | ret(last 50)=+28.78 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,358,632 steps | ret(last 50)=+30.08 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,458,632 steps | ret(last 50)=+29.61 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,558,632 steps | ret(last 50)=+30.34 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 12,658,632 steps | ret(last 50)=+29.48 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 12,758,632 steps | ret(last 50)=+29.92 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 12,858,632 steps | ret(last 50)=+29.26 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,958,632 steps | ret(last 50)=+30.36 win_sr=96% cum_sr=98%]
... [trial 1 | 9 sheep | 13,058,632 steps | ret(last 50)=+30.19 win_sr=100% cum_sr=98%]
... [trial 1 | 9 sheep | 13,158,632 steps | ret(last 50)=+29.24 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 13,258,632 steps | ret(last 50)=+30.40 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 13,358,632 steps | ret(last 50)=+31.65 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 13,458,632 steps | ret(last 50)=+30.77 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 13,558,632 steps | ret(last 50)=+30.21 win_sr=94% cum_sr=98%]
[Stage n_sheep=9] evaluating 30 eps
[Stage n_sheep=9] sr=97% mean_len=1625 mean_min_pen=2.1m mean_act=1.39
[Stage n_sheep=10] training 1,500,000 steps
... [trial 1 | 10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 10 sheep | 13,665,960 steps | ret(last 50)=+30.13 win_sr=90% cum_sr=92%]
... [trial 1 | 10 sheep | 13,765,960 steps | ret(last 50)=+31.84 win_sr=96% cum_sr=92%]
... [trial 1 | 10 sheep | 13,865,960 steps | ret(last 50)=+32.66 win_sr=88% cum_sr=91%]
... [trial 1 | 10 sheep | 13,965,960 steps | ret(last 50)=+32.56 win_sr=90% cum_sr=91%]
... [trial 1 | 10 sheep | 14,065,960 steps | ret(last 50)=+31.29 win_sr=98% cum_sr=93%]
... [trial 1 | 10 sheep | 14,165,960 steps | ret(last 50)=+32.72 win_sr=94% cum_sr=93%]
... [trial 1 | 10 sheep | 14,265,960 steps | ret(last 50)=+32.42 win_sr=96% cum_sr=93%]
... [trial 1 | 10 sheep | 14,365,960 steps | ret(last 50)=+33.96 win_sr=92% cum_sr=93%]
... [trial 1 | 10 sheep | 14,465,960 steps | ret(last 50)=+33.17 win_sr=98% cum_sr=94%]
... [trial 1 | 10 sheep | 14,565,960 steps | ret(last 50)=+31.48 win_sr=96% cum_sr=94%]
... [trial 1 | 10 sheep | 14,665,960 steps | ret(last 50)=+31.19 win_sr=90% cum_sr=94%]
... [trial 1 | 10 sheep | 14,765,960 steps | ret(last 50)=+32.87 win_sr=98% cum_sr=94%]
... [trial 1 | 10 sheep | 14,865,960 steps | ret(last 50)=+32.36 win_sr=94% cum_sr=94%]
... [trial 1 | 10 sheep | 14,965,960 steps | ret(last 50)=+31.14 win_sr=94% cum_sr=94%]
... [trial 1 | 10 sheep | 15,065,960 steps | ret(last 50)=+32.18 win_sr=96% cum_sr=94%]
[Stage n_sheep=10] evaluating 30 eps
[Stage n_sheep=10] sr=97% mean_len=1816 mean_min_pen=2.0m mean_act=1.39
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 97% len= 351 min_pen= 3.9m act=0.28
n_sheep=2 sr=100% len= 421 min_pen= 3.5m act=1.01
n_sheep=3 sr=100% len= 608 min_pen= 3.5m act=1.06
n_sheep=4 sr=100% len= 874 min_pen= 3.3m act=1.23
n_sheep=5 sr= 97% len= 945 min_pen= 3.4m act=1.33
n_sheep=6 sr=100% len= 1162 min_pen= 3.1m act=1.36
n_sheep=7 sr=100% len= 1253 min_pen= 2.7m act=1.38
n_sheep=8 sr= 93% len= 1495 min_pen= 2.6m act=1.39
n_sheep=9 sr= 97% len= 1625 min_pen= 2.1m act=1.39
n_sheep=10 sr= 97% len= 1816 min_pen= 2.0m act=1.39
Total time: 90.3 min
Artefacts: runs/final_v2/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
-72
View File
@@ -1,72 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.9666666666666667,
"mean_len": 350.96666666666664,
"mean_min_pen": 3.913520161310832,
"mean_act": 0.2797267940386975
},
{
"n_sheep": 2,
"sr": 1.0,
"mean_len": 421.46666666666664,
"mean_min_pen": 3.485754116376241,
"mean_act": 1.0053067604365706
},
{
"n_sheep": 3,
"sr": 1.0,
"mean_len": 608.5,
"mean_min_pen": 3.52824010848999,
"mean_act": 1.0576287743527575
},
{
"n_sheep": 4,
"sr": 1.0,
"mean_len": 874.1333333333333,
"mean_min_pen": 3.2648465514183043,
"mean_act": 1.2302308682249101
},
{
"n_sheep": 5,
"sr": 0.9666666666666667,
"mean_len": 945.1333333333333,
"mean_min_pen": 3.390091093381246,
"mean_act": 1.328577256075333
},
{
"n_sheep": 6,
"sr": 1.0,
"mean_len": 1162.1,
"mean_min_pen": 3.0996540347735086,
"mean_act": 1.3581346810990618
},
{
"n_sheep": 7,
"sr": 1.0,
"mean_len": 1252.6,
"mean_min_pen": 2.6753984689712524,
"mean_act": 1.3753795162019462
},
{
"n_sheep": 8,
"sr": 0.9333333333333333,
"mean_len": 1495.2333333333333,
"mean_min_pen": 2.560386610031128,
"mean_act": 1.3861974064434042
},
{
"n_sheep": 9,
"sr": 0.9666666666666667,
"mean_len": 1624.9,
"mean_min_pen": 2.130835851033529,
"mean_act": 1.387693840600181
},
{
"n_sheep": 10,
"sr": 0.9666666666666667,
"mean_len": 1816.5,
"mean_min_pen": 1.9940622925758362,
"mean_act": 1.3946097864970635
}
]
Binary file not shown.
-253
View File
@@ -1,253 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/final_v3
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 40)=-28.61 win_sr=10% cum_sr=10%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-29.25 win_sr=12% cum_sr=11%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-31.55 win_sr=6% cum_sr=9%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-30.74 win_sr=10% cum_sr=9%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-32.89 win_sr=4% cum_sr=8%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-34.66 win_sr=4% cum_sr=7%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=-31.44 win_sr=12% cum_sr=8%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=-32.70 win_sr=6% cum_sr=8%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=-35.48 win_sr=2% cum_sr=7%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=-31.81 win_sr=10% cum_sr=8%]
... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=-28.53 win_sr=10% cum_sr=8%]
... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=-5.61 win_sr=62% cum_sr=13%]
... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=+11.97 win_sr=100% cum_sr=34%]
... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=+10.92 win_sr=96% cum_sr=50%]
... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+11.97 win_sr=100% cum_sr=63%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=100% mean_len=249 mean_min_pen=3.7m mean_act=0.41
[Stage n_sheep=2] training 1,500,000 steps
... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 47)=-1.11 win_sr=45% cum_sr=45%]
... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-8.90 win_sr=8% cum_sr=27%]
... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=-5.28 win_sr=16% cum_sr=24%]
... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=+3.16 win_sr=58% cum_sr=33%]
... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=+10.26 win_sr=84% cum_sr=48%]
... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=+14.27 win_sr=100% cum_sr=64%]
... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=+14.08 win_sr=100% cum_sr=72%]
... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=+14.38 win_sr=100% cum_sr=77%]
... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=+14.27 win_sr=100% cum_sr=81%]
... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=+14.37 win_sr=100% cum_sr=84%]
... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=86%]
... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=+14.04 win_sr=100% cum_sr=87%]
... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=+14.25 win_sr=100% cum_sr=89%]
... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=+14.61 win_sr=100% cum_sr=90%]
... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=+13.98 win_sr=98% cum_sr=91%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=100% mean_len=548 mean_min_pen=3.5m mean_act=0.92
[Stage n_sheep=3] training 1,500,000 steps
... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=+16.10 win_sr=100% cum_sr=99%]
... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=+17.27 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=+16.86 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=+16.86 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=+17.46 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=+17.43 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=+16.76 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=+16.97 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=+16.97 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=+17.19 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=+17.23 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=+16.45 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=+17.18 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=+16.42 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=+16.32 win_sr=100% cum_sr=100%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=100% mean_len=640 mean_min_pen=3.5m mean_act=1.06
[Stage n_sheep=4] training 1,500,000 steps
... [trial 1 | 4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 4 sheep | 4,621,992 steps | ret(last 50)=+18.61 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 4,721,992 steps | ret(last 50)=+18.82 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 4,821,992 steps | ret(last 50)=+18.91 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 4,921,992 steps | ret(last 50)=+18.55 win_sr=98% cum_sr=100%]
... [trial 1 | 4 sheep | 5,021,992 steps | ret(last 50)=+18.99 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,121,992 steps | ret(last 50)=+18.76 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,221,992 steps | ret(last 50)=+18.46 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,321,992 steps | ret(last 50)=+19.21 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,421,992 steps | ret(last 50)=+17.86 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,521,992 steps | ret(last 50)=+19.19 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,621,992 steps | ret(last 50)=+18.83 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,721,992 steps | ret(last 50)=+18.51 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,821,992 steps | ret(last 50)=+18.38 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,921,992 steps | ret(last 50)=+18.56 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 6,021,992 steps | ret(last 50)=+18.82 win_sr=100% cum_sr=100%]
[Stage n_sheep=4] evaluating 30 eps
[Stage n_sheep=4] sr=100% mean_len=762 mean_min_pen=3.5m mean_act=1.26
[Stage n_sheep=5] training 1,500,000 steps
... [trial 1 | 5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 5 sheep | 6,129,320 steps | ret(last 50)=+20.46 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,229,320 steps | ret(last 50)=+20.41 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,329,320 steps | ret(last 50)=+20.58 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,429,320 steps | ret(last 50)=+21.10 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,529,320 steps | ret(last 50)=+20.48 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,629,320 steps | ret(last 50)=+20.56 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,729,320 steps | ret(last 50)=+20.51 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,829,320 steps | ret(last 50)=+20.70 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,929,320 steps | ret(last 50)=+20.83 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,029,320 steps | ret(last 50)=+21.52 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,129,320 steps | ret(last 50)=+21.62 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,229,320 steps | ret(last 50)=+21.22 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,329,320 steps | ret(last 50)=+21.17 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,429,320 steps | ret(last 50)=+21.00 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,529,320 steps | ret(last 50)=+20.48 win_sr=100% cum_sr=100%]
[Stage n_sheep=5] evaluating 30 eps
[Stage n_sheep=5] sr=100% mean_len=931 mean_min_pen=3.6m mean_act=1.31
[Stage n_sheep=6] training 1,500,000 steps
... [trial 1 | 6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 6 sheep | 7,636,648 steps | ret(last 50)=+21.89 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,736,648 steps | ret(last 50)=+22.98 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,836,648 steps | ret(last 50)=+22.66 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,936,648 steps | ret(last 50)=+23.23 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,036,648 steps | ret(last 50)=+22.83 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,136,648 steps | ret(last 50)=+22.65 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,236,648 steps | ret(last 50)=+22.22 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,336,648 steps | ret(last 50)=+22.45 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,436,648 steps | ret(last 50)=+22.55 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,536,648 steps | ret(last 50)=+22.99 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,636,648 steps | ret(last 50)=+21.99 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,736,648 steps | ret(last 50)=+22.30 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,836,648 steps | ret(last 50)=+23.06 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,936,648 steps | ret(last 50)=+23.32 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 9,036,648 steps | ret(last 50)=+21.80 win_sr=100% cum_sr=100%]
[Stage n_sheep=6] evaluating 30 eps
[Stage n_sheep=6] sr=100% mean_len=1082 mean_min_pen=3.6m mean_act=1.35
[Stage n_sheep=7] training 1,500,000 steps
... [trial 1 | 7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 7 sheep | 9,143,976 steps | ret(last 50)=+25.57 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,243,976 steps | ret(last 50)=+24.76 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,343,976 steps | ret(last 50)=+24.69 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,443,976 steps | ret(last 50)=+26.12 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,543,976 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,643,976 steps | ret(last 50)=+25.39 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,743,976 steps | ret(last 50)=+24.45 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,843,976 steps | ret(last 50)=+26.45 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,943,976 steps | ret(last 50)=+24.51 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,043,976 steps | ret(last 50)=+24.80 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,143,976 steps | ret(last 50)=+25.56 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,243,976 steps | ret(last 50)=+25.75 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,343,976 steps | ret(last 50)=+25.64 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,443,976 steps | ret(last 50)=+26.45 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,543,976 steps | ret(last 50)=+25.19 win_sr=100% cum_sr=100%]
[Stage n_sheep=7] evaluating 30 eps
[Stage n_sheep=7] sr=100% mean_len=1081 mean_min_pen=3.5m mean_act=1.37
[Stage n_sheep=8] training 1,500,000 steps
... [trial 1 | 8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 8 sheep | 10,651,304 steps | ret(last 50)=+26.63 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,751,304 steps | ret(last 50)=+27.63 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,851,304 steps | ret(last 50)=+27.53 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,951,304 steps | ret(last 50)=+27.43 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,051,304 steps | ret(last 50)=+27.70 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,151,304 steps | ret(last 50)=+26.53 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,251,304 steps | ret(last 50)=+27.24 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,351,304 steps | ret(last 50)=+27.14 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,451,304 steps | ret(last 50)=+27.43 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,551,304 steps | ret(last 50)=+27.25 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,651,304 steps | ret(last 50)=+27.40 win_sr=98% cum_sr=100%]
... [trial 1 | 8 sheep | 11,751,304 steps | ret(last 50)=+27.35 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,851,304 steps | ret(last 50)=+26.33 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,951,304 steps | ret(last 50)=+26.89 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 12,051,304 steps | ret(last 50)=+27.86 win_sr=100% cum_sr=100%]
[Stage n_sheep=8] evaluating 30 eps
[Stage n_sheep=8] sr=100% mean_len=1311 mean_min_pen=3.5m mean_act=1.38
[Stage n_sheep=9] training 1,500,000 steps
... [trial 1 | 9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 9 sheep | 12,158,632 steps | ret(last 50)=+29.62 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,258,632 steps | ret(last 50)=+31.32 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,358,632 steps | ret(last 50)=+30.30 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,458,632 steps | ret(last 50)=+29.33 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,558,632 steps | ret(last 50)=+28.83 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,658,632 steps | ret(last 50)=+29.02 win_sr=98% cum_sr=100%]
... [trial 1 | 9 sheep | 12,758,632 steps | ret(last 50)=+29.60 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,858,632 steps | ret(last 50)=+29.88 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,958,632 steps | ret(last 50)=+30.12 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,058,632 steps | ret(last 50)=+28.80 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,158,632 steps | ret(last 50)=+30.33 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,258,632 steps | ret(last 50)=+27.85 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,358,632 steps | ret(last 50)=+28.21 win_sr=96% cum_sr=100%]
... [trial 1 | 9 sheep | 13,458,632 steps | ret(last 50)=+29.88 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,558,632 steps | ret(last 50)=+29.06 win_sr=98% cum_sr=100%]
[Stage n_sheep=9] evaluating 30 eps
[Stage n_sheep=9] sr=100% mean_len=1435 mean_min_pen=3.6m mean_act=1.39
[Stage n_sheep=10] training 1,500,000 steps
... [trial 1 | 10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 10 sheep | 13,665,960 steps | ret(last 50)=+30.42 win_sr=96% cum_sr=96%]
... [trial 1 | 10 sheep | 13,765,960 steps | ret(last 50)=+29.97 win_sr=92% cum_sr=95%]
... [trial 1 | 10 sheep | 13,865,960 steps | ret(last 50)=+30.45 win_sr=82% cum_sr=90%]
... [trial 1 | 10 sheep | 13,965,960 steps | ret(last 50)=+29.82 win_sr=90% cum_sr=91%]
... [trial 1 | 10 sheep | 14,065,960 steps | ret(last 50)=+29.66 win_sr=90% cum_sr=91%]
... [trial 1 | 10 sheep | 14,165,960 steps | ret(last 50)=+31.57 win_sr=98% cum_sr=92%]
... [trial 1 | 10 sheep | 14,265,960 steps | ret(last 50)=+31.71 win_sr=96% cum_sr=93%]
... [trial 1 | 10 sheep | 14,365,960 steps | ret(last 50)=+31.75 win_sr=94% cum_sr=93%]
... [trial 1 | 10 sheep | 14,465,960 steps | ret(last 50)=+29.46 win_sr=88% cum_sr=93%]
... [trial 1 | 10 sheep | 14,565,960 steps | ret(last 50)=+29.62 win_sr=94% cum_sr=93%]
... [trial 1 | 10 sheep | 14,665,960 steps | ret(last 50)=+31.64 win_sr=98% cum_sr=93%]
... [trial 1 | 10 sheep | 14,765,960 steps | ret(last 50)=+30.86 win_sr=90% cum_sr=93%]
... [trial 1 | 10 sheep | 14,865,960 steps | ret(last 50)=+31.65 win_sr=90% cum_sr=93%]
... [trial 1 | 10 sheep | 14,965,960 steps | ret(last 50)=+31.75 win_sr=92% cum_sr=93%]
... [trial 1 | 10 sheep | 15,065,960 steps | ret(last 50)=+30.24 win_sr=100% cum_sr=93%]
[Stage n_sheep=10] evaluating 30 eps
[Stage n_sheep=10] sr=90% mean_len=1841 mean_min_pen=3.6m mean_act=1.39
[Consolidation] mixed n_sheep ∈ [1, 10], 2,000,000 steps
... [trial 1 | consolidate | 15,073,288 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | consolidate | 15,173,288 steps | ret(last 50)=+20.69 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 15,273,288 steps | ret(last 50)=+20.62 win_sr=90% cum_sr=92%]
... [trial 1 | consolidate | 15,373,288 steps | ret(last 50)=+20.25 win_sr=94% cum_sr=93%]
... [trial 1 | consolidate | 15,473,288 steps | ret(last 50)=+19.82 win_sr=96% cum_sr=94%]
... [trial 1 | consolidate | 15,573,288 steps | ret(last 50)=+20.56 win_sr=94% cum_sr=94%]
... [trial 1 | consolidate | 15,673,288 steps | ret(last 50)=+20.56 win_sr=92% cum_sr=94%]
... [trial 1 | consolidate | 15,773,288 steps | ret(last 50)=+19.43 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 15,873,288 steps | ret(last 50)=+21.85 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 15,973,288 steps | ret(last 50)=+21.84 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 16,073,288 steps | ret(last 50)=+22.13 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,173,288 steps | ret(last 50)=+21.89 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 16,273,288 steps | ret(last 50)=+21.88 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,373,288 steps | ret(last 50)=+20.81 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 16,473,288 steps | ret(last 50)=+20.91 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,573,288 steps | ret(last 50)=+21.13 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,673,288 steps | ret(last 50)=+19.85 win_sr=100% cum_sr=95%]
... [trial 1 | consolidate | 16,773,288 steps | ret(last 50)=+22.30 win_sr=92% cum_sr=95%]
... [trial 1 | consolidate | 16,873,288 steps | ret(last 50)=+20.61 win_sr=96% cum_sr=95%]
... [trial 1 | consolidate | 16,973,288 steps | ret(last 50)=+21.93 win_sr=98% cum_sr=96%]
... [trial 1 | consolidate | 17,073,288 steps | ret(last 50)=+21.86 win_sr=98% cum_sr=96%]
[Consolidation] re-evaluating all sheep counts
[Consolidation] n_sheep=1 sr=97% mean_len=377 mean_min_pen=3.5m mean_act=1.39
[Consolidation] n_sheep=2 sr=47% mean_len=1718 mean_min_pen=2.4m mean_act=1.39
[Consolidation] n_sheep=3 sr=93% mean_len=970 mean_min_pen=3.2m mean_act=1.39
[Consolidation] n_sheep=4 sr=97% mean_len=1008 mean_min_pen=3.3m mean_act=1.39
[Consolidation] n_sheep=5 sr=100% mean_len=1176 mean_min_pen=3.3m mean_act=1.39
[Consolidation] n_sheep=6 sr=100% mean_len=1305 mean_min_pen=3.3m mean_act=1.39
[Consolidation] n_sheep=7 sr=100% mean_len=1300 mean_min_pen=3.4m mean_act=1.39
[Consolidation] n_sheep=8 sr=100% mean_len=1461 mean_min_pen=3.5m mean_act=1.39
[Consolidation] n_sheep=9 sr=87% mean_len=1607 mean_min_pen=3.8m mean_act=1.39
[Consolidation] n_sheep=10 sr=80% mean_len=1801 mean_min_pen=3.7m mean_act=1.39
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 97% len= 377 min_pen= 3.5m act=1.39
n_sheep=2 sr= 47% len= 1718 min_pen= 2.4m act=1.39
n_sheep=3 sr= 93% len= 970 min_pen= 3.2m act=1.39
n_sheep=4 sr= 97% len= 1008 min_pen= 3.3m act=1.39
n_sheep=5 sr=100% len= 1176 min_pen= 3.3m act=1.39
n_sheep=6 sr=100% len= 1305 min_pen= 3.3m act=1.39
n_sheep=7 sr=100% len= 1300 min_pen= 3.4m act=1.39
n_sheep=8 sr=100% len= 1461 min_pen= 3.5m act=1.39
n_sheep=9 sr= 87% len= 1607 min_pen= 3.8m act=1.39
n_sheep=10 sr= 80% len= 1801 min_pen= 3.7m act=1.39
Total time: 110.1 min
Artefacts: runs/final_v3/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
-72
View File
@@ -1,72 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.9666666666666667,
"mean_len": 377.3666666666667,
"mean_min_pen": 3.5389957586924234,
"mean_act": 1.3908841227086732
},
{
"n_sheep": 2,
"sr": 0.4666666666666667,
"mean_len": 1717.6333333333334,
"mean_min_pen": 2.4164488633473713,
"mean_act": 1.3922284740020803
},
{
"n_sheep": 3,
"sr": 0.9333333333333333,
"mean_len": 970.2666666666667,
"mean_min_pen": 3.203955141703288,
"mean_act": 1.3945290882248416
},
{
"n_sheep": 4,
"sr": 0.9666666666666667,
"mean_len": 1008.0,
"mean_min_pen": 3.279213563601176,
"mean_act": 1.3918021049325862
},
{
"n_sheep": 5,
"sr": 1.0,
"mean_len": 1175.8666666666666,
"mean_min_pen": 3.3209743976593016,
"mean_act": 1.3925684957666513
},
{
"n_sheep": 6,
"sr": 1.0,
"mean_len": 1305.0,
"mean_min_pen": 3.312229561805725,
"mean_act": 1.391130207932886
},
{
"n_sheep": 7,
"sr": 1.0,
"mean_len": 1300.0,
"mean_min_pen": 3.363971138000488,
"mean_act": 1.392986050516367
},
{
"n_sheep": 8,
"sr": 1.0,
"mean_len": 1461.3666666666666,
"mean_min_pen": 3.4741388003031415,
"mean_act": 1.392040583461347
},
{
"n_sheep": 9,
"sr": 0.8666666666666667,
"mean_len": 1606.7333333333333,
"mean_min_pen": 3.835897175470988,
"mean_act": 1.3907199496534952
},
{
"n_sheep": 10,
"sr": 0.8,
"mean_len": 1800.9666666666667,
"mean_min_pen": 3.741190282503764,
"mean_act": 1.392501896076031
}
]
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,23 +0,0 @@
[
{
"n_sheep": 1,
"sr": 1.0,
"mean_len": 267.6333333333333,
"mean_min_pen": 3.7235233147939044,
"mean_act": 0.3746675180125346
},
{
"n_sheep": 2,
"sr": 0.06666666666666667,
"mean_len": 1458.6666666666667,
"mean_min_pen": 14.14484707514445,
"mean_act": 0.284232099657656
},
{
"n_sheep": 3,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 12.514182837804158,
"mean_act": 1.2590703022670828
}
]
Binary file not shown.
-72
View File
@@ -1,72 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/replay_20260425_152857
Curriculum: 1 → 3 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-20.83 sr=6%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-21.40 sr=4%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-22.31 sr=0%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-19.13 sr=4%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-18.79 sr=8%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-10.15 sr=8%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=+10.14 sr=82%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=+11.90 sr=100%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=+11.32 sr=100%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=+11.36 sr=100%]
... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=+11.18 sr=100%]
... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=+11.08 sr=100%]
... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=+11.14 sr=100%]
... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=+11.10 sr=100%]
... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+10.99 sr=100%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=100% mean_len=268 mean_min_pen=3.7m mean_act=0.37
[Stage n_sheep=2] training 1,500,000 steps
... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 50)=-3.10 sr=2%]
... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-3.41 sr=2%]
... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=-3.11 sr=6%]
... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=-2.65 sr=8%]
... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=-4.11 sr=2%]
... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=-3.19 sr=6%]
... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=-3.45 sr=4%]
... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=-4.13 sr=0%]
... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=-3.47 sr=8%]
... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=-3.83 sr=4%]
... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=-4.58 sr=0%]
... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=-3.94 sr=2%]
... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=-4.15 sr=2%]
... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=-3.95 sr=4%]
... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=-4.44 sr=0%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=7% mean_len=1459 mean_min_pen=14.1m mean_act=0.28
[Stage n_sheep=3] training 1,500,000 steps
... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=-4.16 sr=0%]
... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=-4.94 sr=0%]
... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=-4.42 sr=0%]
... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=-4.69 sr=0%]
... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=-3.72 sr=0%]
... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=-5.04 sr=0%]
... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=-4.26 sr=0%]
... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=-4.70 sr=0%]
... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=-4.61 sr=0%]
... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=-4.19 sr=0%]
... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=-4.35 sr=0%]
... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=-4.41 sr=0%]
... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=-4.42 sr=0%]
... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=-4.77 sr=0%]
... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=-4.49 sr=0%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=0% mean_len=1500 mean_min_pen=12.5m mean_act=1.26
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr=100% len= 268 min_pen= 3.7m act=0.37
n_sheep=2 sr= 7% len= 1459 min_pen= 14.1m act=0.28
n_sheep=3 sr= 0% len= 1500 min_pen= 12.5m act=1.26
Total time: 26.9 min
Artefacts: runs/replay_20260425_152857/
Binary file not shown.
Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.
Binary file not shown.
Binary file not shown.

Before

Width:  |  Height:  |  Size: 74 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.
Binary file not shown.
Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.
@@ -1,41 +0,0 @@
{
"trial": 0,
"config": {
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.1,
"W_PEN_BONUS": 10.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 100.0,
"W_COMPACT": 3.0,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.005
},
"score": 0.06,
"sr": {
"1": 0.3,
"2": 0.0,
"3": 0.0
},
"details": {
"1": {
"sr": 0.3,
"mean_len": 1252.2,
"mean_min_pen": 2.1085331559181215,
"mean_act": 0.07743233270979732
},
"2": {
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 12.107558453083039,
"mean_act": 0.15608626089841424
},
"3": {
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.675278377532958,
"mean_act": 0.10535904271739319
}
},
"elapsed_s": 307.773992061615
}
@@ -1 +0,0 @@
{"trial": 0, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1252.2, "mean_min_pen": 2.1085331559181215, "mean_act": 0.07743233270979732}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.107558453083039, "mean_act": 0.15608626089841424}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.675278377532958, "mean_act": 0.10535904271739319}}, "elapsed_s": 307.773992061615}
@@ -1,41 +0,0 @@
{
"trial": 13,
"config": {
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
},
"score": 0.35,
"sr": {
"1": 1.0,
"2": 0.3,
"3": 0.0
},
"details": {
"1": {
"sr": 1.0,
"mean_len": 428.9,
"mean_min_pen": 3.731236696243286,
"mean_act": 0.33429858573849425
},
"2": {
"sr": 0.3,
"mean_len": 1242.7,
"mean_min_pen": 8.937442195415496,
"mean_act": 0.3998076917437125
},
"3": {
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 14.061083602905274,
"mean_act": 0.5966902794524755
}
},
"elapsed_s": 313.8281009197235
}
@@ -1,25 +0,0 @@
{"trial": 0, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.15000000000000002, "sr": {"1": 0.5, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.5, "mean_len": 1051.6, "mean_min_pen": 3.0551586985588073, "mean_act": 0.0887192903536989}, "2": {"sr": 0.1, "mean_len": 1438.1, "mean_min_pen": 10.993862140178681, "mean_act": 0.1723056222816755}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.92835488319397, "mean_act": 0.15403316749989074}}, "elapsed_s": 316.9084241390228}
{"trial": 1, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.05, "W_COMPLETE": 200.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1153.8, "mean_min_pen": 3.8145030617713926, "mean_act": 0.15146865127462797}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.058024168014526, "mean_act": 0.10904584494279744}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.5988187789917, "mean_act": 0.09578829008591905}}, "elapsed_s": 310.8732409477234}
{"trial": 2, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.01}, "score": 0.27, "sr": {"1": 0.7, "2": 0.2, "3": 0.1}, "details": {"1": {"sr": 0.7, "mean_len": 772.1, "mean_min_pen": 2.92204372882843, "mean_act": 0.1583604314471399}, "2": {"sr": 0.2, "mean_len": 1390.6, "mean_min_pen": 12.992859578132629, "mean_act": 0.16090679360424953}, "3": {"sr": 0.1, "mean_len": 1403.7, "mean_min_pen": 13.045468378067017, "mean_act": 0.07991531561051667}}, "elapsed_s": 303.7708294391632}
{"trial": 3, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1137.5, "mean_min_pen": 2.1229824781417848, "mean_act": 0.08172097406143335}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.521494126319885, "mean_act": 0.16864279503144788}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.317158126831055, "mean_act": 0.05537428615499472}}, "elapsed_s": 301.6172459125519}
{"trial": 4, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.2, "sr": {"1": 1.0, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 1.0, "mean_len": 567.0, "mean_min_pen": 3.2795117855072022, "mean_act": 0.1855437107780058}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 9.976170372962951, "mean_act": 0.2074074002778701}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.89306182861328, "mean_act": 0.21666522849385267}}, "elapsed_s": 313.525591135025}
{"trial": 5, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.05, "W_COMPLETE": 200.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.01}, "score": 0.16000000000000003, "sr": {"1": 0.8, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.8, "mean_len": 675.5, "mean_min_pen": 3.1338732481002807, "mean_act": 0.11691584614814514}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 9.693846690654755, "mean_act": 0.19984676872865814}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.684805488586425, "mean_act": 0.06430307933471292}}, "elapsed_s": 312.4476580619812}
{"trial": 6, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.005, "W_COMPLETE": 200.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.01}, "score": 0.08000000000000002, "sr": {"1": 0.4, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.4, "mean_len": 1343.9, "mean_min_pen": 4.092962062358856, "mean_act": 0.07675616785431166}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.157618689537049, "mean_act": 0.13906600509098352}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.079688358306885, "mean_act": 0.07073271389845953}}, "elapsed_s": 337.7615342140198}
{"trial": 7, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.11, "sr": {"1": 0.3, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1177.5, "mean_min_pen": 2.261639392375946, "mean_act": 0.11013885321646562}, "2": {"sr": 0.1, "mean_len": 1437.5, "mean_min_pen": 5.9263048529624935, "mean_act": 0.16420815230170227}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.130784749984741, "mean_act": 0.20303070502222206}}, "elapsed_s": 451.2424490451813}
{"trial": 8, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.05}, "score": 0.19, "sr": {"1": 0.7, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 874.2, "mean_min_pen": 4.152815592288971, "mean_act": 0.1303976929043709}, "2": {"sr": 0.1, "mean_len": 1381.4, "mean_min_pen": 12.115124177932739, "mean_act": 0.3749806733317197}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.596546864509582, "mean_act": 0.10082290474528718}}, "elapsed_s": 349.3926422595978}
{"trial": 9, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.02, "W_COMPLETE": 200.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.05}, "score": 0.0, "sr": {"1": 0.0, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 8.404254817962647, "mean_act": 0.6749623541596586}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.970247220993041, "mean_act": 0.45562502020561796}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.029277420043945, "mean_act": 0.1599790089856222}}, "elapsed_s": 319.38924622535706}
{"trial": 10, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.02, "W_COMPLETE": 200.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.16000000000000003, "sr": {"1": 0.8, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.8, "mean_len": 690.7, "mean_min_pen": 3.1264367938041686, "mean_act": 0.13493279961414406}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.040377330780029, "mean_act": 0.20203861368317985}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.379706478118896, "mean_act": 0.05979441475490263}}, "elapsed_s": 310.1806254386902}
{"trial": 11, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.05, "W_COMPLETE": 50.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.24, "sr": {"1": 0.7, "2": 0.2, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 727.5, "mean_min_pen": 2.933144009113312, "mean_act": 0.11888058594495643}, "2": {"sr": 0.2, "mean_len": 1317.8, "mean_min_pen": 10.2599928855896, "mean_act": 0.14370172662258304}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.231103086471558, "mean_act": 0.0614644922383149}}, "elapsed_s": 330.0620620250702}
{"trial": 12, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1244.8, "mean_min_pen": 2.1193889737129212, "mean_act": 0.08216679023110932}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.745809042453766, "mean_act": 0.16497857472260813}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.016976690292358, "mean_act": 0.09897869050660908}}, "elapsed_s": 323.27931213378906}
{"trial": 13, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.02, "W_COMPLETE": 200.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.35, "sr": {"1": 1.0, "2": 0.3, "3": 0.0}, "details": {"1": {"sr": 1.0, "mean_len": 428.9, "mean_min_pen": 3.731236696243286, "mean_act": 0.33429858573849425}, "2": {"sr": 0.3, "mean_len": 1242.7, "mean_min_pen": 8.937442195415496, "mean_act": 0.3998076917437125}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.061083602905274, "mean_act": 0.5966902794524755}}, "elapsed_s": 313.8281009197235}
{"trial": 14, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.05}, "score": 0.13999999999999999, "sr": {"1": 0.7, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 912.4, "mean_min_pen": 2.940706562995911, "mean_act": 1.3471978399000248}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 9.901372599601746, "mean_act": 0.9463685217667609}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.291404342651367, "mean_act": 0.08601266834173493}}, "elapsed_s": 322.57220220565796}
{"trial": 15, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.01}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1210.5, "mean_min_pen": 2.107759189605713, "mean_act": 0.08131515106917063}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.824185514450074, "mean_act": 0.20362997558291535}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.042323064804076, "mean_act": 0.17125511734669563}}, "elapsed_s": 312.3465087413788}
{"trial": 16, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.005, "W_COMPLETE": 200.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": true, "ent_coef": 0.05}, "score": 0.24, "sr": {"1": 0.7, "2": 0.2, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 650.1, "mean_min_pen": 2.981771671772003, "mean_act": 0.1621352170537764}, "2": {"sr": 0.2, "mean_len": 1435.5, "mean_min_pen": 8.686615812778474, "mean_act": 0.3279171284351484}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.374159717559815, "mean_act": 0.04937917392927017}}, "elapsed_s": 303.71519470214844}
{"trial": 17, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.005, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.16, "sr": {"1": 0.3, "2": 0.2, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1088.1, "mean_min_pen": 3.4793057322502134, "mean_act": 0.09515179877670824}, "2": {"sr": 0.2, "mean_len": 1428.5, "mean_min_pen": 10.024536824226379, "mean_act": 0.4135459636897354}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.302330660820008, "mean_act": 0.34973196326509737}}, "elapsed_s": 315.76633620262146}
{"trial": 18, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 50.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.16000000000000003, "sr": {"1": 0.8, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.8, "mean_len": 645.4, "mean_min_pen": 3.1326077818870544, "mean_act": 0.15081361126264722}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.723365247249603, "mean_act": 0.10806036127302399}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.303192138671875, "mean_act": 0.08246586098832388}}, "elapsed_s": 318.483638048172}
{"trial": 19, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.13, "sr": {"1": 0.4, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.4, "mean_len": 1231.4, "mean_min_pen": 2.6246669054031373, "mean_act": 0.07338090033141094}, "2": {"sr": 0.1, "mean_len": 1420.2, "mean_min_pen": 8.371916389465332, "mean_act": 0.16944798908643302}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.287557554244994, "mean_act": 0.09957915147298428}}, "elapsed_s": 315.07627868652344}
{"trial": 20, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.05, "sr": {"1": 0.0, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 1.5734932541847229, "mean_act": 0.08394606926547861}, "2": {"sr": 0.1, "mean_len": 1498.9, "mean_min_pen": 6.444609999656677, "mean_act": 0.2938110977638972}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.258054113388061, "mean_act": 0.16288984295733971}}, "elapsed_s": 309.5854580402374}
{"trial": 21, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.11, "sr": {"1": 0.3, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1324.6, "mean_min_pen": 3.3425565361976624, "mean_act": 0.1115106962044226}, "2": {"sr": 0.1, "mean_len": 1443.0, "mean_min_pen": 11.069470012187958, "mean_act": 0.17271345215252376}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.845431709289551, "mean_act": 0.13337391122176}}, "elapsed_s": 315.54923272132874}
{"trial": 22, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.05}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1220.2, "mean_min_pen": 2.1276236534118653, "mean_act": 0.4312911105166665}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 8.770305395126343, "mean_act": 0.6047595652043354}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.12634140253067, "mean_act": 0.14348885283676113}}, "elapsed_s": 471.740927696228}
{"trial": 23, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.005, "W_COMPLETE": 200.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.01}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1209.4, "mean_min_pen": 3.811609184741974, "mean_act": 0.08888363576016632}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.143073177337646, "mean_act": 0.27062979487000655}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 15.135865116119385, "mean_act": 0.3670903712440903}}, "elapsed_s": 335.26912212371826}
{"trial": 24, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 50.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": true, "ent_coef": 0.02}, "score": 0.0, "sr": {"1": 0.0, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.014724779129029, "mean_act": 1.024556803444028}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.734652400016785, "mean_act": 1.0186923123559604}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.690151166915893, "mean_act": 1.000638129701217}}, "elapsed_s": 306.1110165119171}

Some files were not shown because too many files have changed in this diff Show More