Cleanup and new approach

This commit is contained in:
Johnny Fernandes
2026-04-26 01:50:01 +01:00
parent b031473758
commit 61f8a7db15
139 changed files with 510 additions and 16170 deletions
+14
View File
@@ -0,0 +1,14 @@
{
"W_PER_SHEEP": 2.0,
"W_ALIGN": 0.05,
"W_PEN_BONUS": 10.0,
"W_COMPLETE": 100.0,
"W_STEP_COST": 0.02,
"W_COMPACT": 0.0,
"W_WALL_TOUCH": 0.15,
"WALL_TOUCH_BUFFER": 0.8,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": true,
"ENTRY_AWARE": false,
"ent_coef": 0.02
}
Binary file not shown.

Before

Width:  |  Height:  |  Size: 70 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

-223
View File
@@ -1,223 +0,0 @@
"""
Episode-level diagnostics for the herding policy.
Runs N episodes and for each one tracks:
- flock radius over time
- COM-to-pen distance over time
- dog position over time
- when (if ever) the flock first became compact
- failure mode classification
Then produces:
1. Console summary of failure modes
2. Per-episode time-series plots (radius + com_dist)
3. Optional rendered playback of the worst episodes
Usage
-----
python diagnose.py --model runs/ppo_consolidation/final_model.zip \
--vecnorm runs/ppo_consolidation/vecnorm.pkl \
--n-sheep 5 --episodes 20
# Watch the policy live (first episode rendered):
python diagnose.py ... --render
# Save plots to a directory instead of showing interactively:
python diagnose.py ... --plot-dir debug_plots/
"""
import argparse
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
# ── failure mode constants ────────────────────────────────────────────────────
COMPACT_RADIUS = 5.0 # must match DRIVE_GATE_RADIUS in herding_env.py
def classify_failure(ep_radius, ep_com_dist, n_penned, n_sheep, success):
if success:
return "SUCCESS"
if min(ep_radius) > COMPACT_RADIUS:
return "NEVER_COMPACT" # flock was always too scattered
first_compact = next(i for i, r in enumerate(ep_radius) if r <= COMPACT_RADIUS)
min_com_after = min(ep_com_dist[first_compact:])
pen_close = 3.0 # COM within 3m of pen counts as "got close"
if min_com_after > pen_close:
return "COMPACT_CANT_DRIVE" # compacted but never drove to pen
if n_penned == 0:
return "DROVE_NO_SHEEP" # got near pen, nothing went in
return f"PARTIAL_{n_penned}of{n_sheep}" # some in, not all
# ── main ─────────────────────────────────────────────────────────────────────
def parse_args():
p = argparse.ArgumentParser()
p.add_argument("--model", required=True)
p.add_argument("--vecnorm", default=None)
p.add_argument("--n-sheep", type=int, default=5)
p.add_argument("--episodes", type=int, default=20)
p.add_argument("--max-steps", type=int, default=4000)
p.add_argument("--render", action="store_true",
help="Show matplotlib animation of the first episode")
p.add_argument("--plot-dir", default=None,
help="Save time-series plots here (one per episode)")
p.add_argument("--seed", type=int, default=0)
return p.parse_args()
def make_env(n_sheep, max_steps, render_mode=None):
def _init():
return HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
render_mode=render_mode)
return _init
def main():
args = parse_args()
if args.plot_dir:
os.makedirs(args.plot_dir, exist_ok=True)
matplotlib.use("Agg")
render_mode = "human" if args.render else None
raw_env = DummyVecEnv([make_env(args.n_sheep, args.max_steps, render_mode)])
if args.vecnorm:
env = VecNormalize.load(args.vecnorm, raw_env)
env.training = False
env.norm_reward = False
else:
env = raw_env
model = PPO.load(args.model, env=env)
failure_counts = {}
all_ep_data = []
for ep in range(args.episodes):
obs = env.reset()
done = False
step = 0
ep_radius = []
ep_com_dist = []
ep_dog_x = []
ep_dog_y = []
ep_n_penned = []
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, _, dones, infos = env.step(action)
done = dones[0]
step += 1
inner = env.envs[0] if hasattr(env, "envs") else env.venv.envs[0]
com, radius, _ = inner._flock_stats()
com_dist = float(np.linalg.norm(com - inner.PEN_CENTER))
n_penned = int(inner.penned[:inner.n_sheep].sum())
ep_radius.append(radius)
ep_com_dist.append(com_dist)
ep_dog_x.append(float(inner.dog_pos[0]))
ep_dog_y.append(float(inner.dog_pos[1]))
ep_n_penned.append(n_penned)
info = infos[0]
n_pen = info.get("n_penned", 0)
n_sheep = info.get("n_sheep", args.n_sheep)
success = n_pen == n_sheep
mode = classify_failure(ep_radius, ep_com_dist, n_pen, n_sheep, success)
failure_counts[mode] = failure_counts.get(mode, 0) + 1
compact_step = next((i for i, r in enumerate(ep_radius)
if r <= COMPACT_RADIUS), None)
min_radius = min(ep_radius)
min_com_dist = min(ep_com_dist)
print(f" ep {ep+1:>3} steps={step:>5} penned={n_pen}/{n_sheep}"
f" min_r={min_radius:.1f}m"
f" min_com={min_com_dist:.1f}m"
f" compact@step={compact_step if compact_step is not None else 'NEVER'}"
f" [{mode}]")
all_ep_data.append(dict(
ep=ep, radius=ep_radius, com_dist=ep_com_dist,
dog_x=ep_dog_x, dog_y=ep_dog_y, n_penned=ep_n_penned,
steps=step, mode=mode, success=success,
))
# ── per-episode time-series plot ──────────────────────────────────
if args.plot_dir or (not args.render and ep < 5):
fig, axes = plt.subplots(2, 1, figsize=(10, 6), sharex=True)
t = np.arange(len(ep_radius))
axes[0].plot(t, ep_radius, color="steelblue", label="flock radius (m)")
axes[0].axhline(COMPACT_RADIUS, color="orange", linestyle="--",
label=f"compact threshold ({COMPACT_RADIUS}m)")
if compact_step is not None:
axes[0].axvline(compact_step, color="green", linestyle=":",
alpha=0.6, label=f"first compact (step {compact_step})")
axes[0].set_ylabel("radius (m)")
axes[0].legend(fontsize=8)
axes[0].set_title(f"ep {ep+1} | n_sheep={n_sheep} | {mode}")
axes[1].plot(t, ep_com_dist, color="tomato", label="COM-to-pen dist (m)")
axes[1].set_ylabel("COM-to-pen (m)")
axes[1].set_xlabel("step")
axes[1].legend(fontsize=8)
plt.tight_layout()
if args.plot_dir:
fig.savefig(os.path.join(args.plot_dir, f"ep{ep+1:03d}_{mode}.png"),
dpi=100)
plt.close(fig)
else:
plt.show(block=False)
plt.pause(0.5)
env.close()
# ── summary ──────────────────────────────────────────────────────────────
print("\n" + "=" * 55)
print(f" Model : {args.model}")
print(f" n_sheep : {args.n_sheep} episodes : {args.episodes}")
print("-" * 55)
total = sum(failure_counts.values())
for mode, cnt in sorted(failure_counts.items(), key=lambda x: -x[1]):
bar = "" * cnt
print(f" {mode:<26} {cnt:>3}/{total} {bar}")
print("-" * 55)
never_compact = failure_counts.get("NEVER_COMPACT", 0)
cant_drive = failure_counts.get("COMPACT_CANT_DRIVE", 0)
partial = sum(v for k, v in failure_counts.items() if k.startswith("PARTIAL"))
successes = failure_counts.get("SUCCESS", 0)
print(f"\n Diagnosis:")
if never_compact / total > 0.5:
print(" ► COLLECT problem: dog rarely compacts the flock.")
print(" → Phase-gate W_DRIVE, increase W_COLLECT, check alignment reward.")
if cant_drive / total > 0.3:
print(" ► DRIVE problem: flock compacts but doesn't reach pen.")
print(" → Check dog alignment, pen direction, W_DRIVE magnitude.")
if partial / total > 0.3:
print(" ► PARTIAL problem: some sheep penned, stragglers remain.")
print(" → Flock splits; need better straggler-chasing behavior.")
if successes / total > 0.5:
print(" ► Mostly working! Fine-tune for consistency.")
print("=" * 55)
if __name__ == "__main__":
main()
-109
View File
@@ -1,109 +0,0 @@
"""
Load a saved run and evaluate the policy at every n_sheep from 1..N.
Tells you exactly where the curriculum stopped working.
Usage:
python eval_per_sheep.py --run-dir runs/ppo_v3
python eval_per_sheep.py --run-dir runs/ppo_v3 --max-sheep 10 --episodes 20
python eval_per_sheep.py --model runs/ppo_v3/final_model.zip \
--vecnorm runs/ppo_v3/vecnorm.pkl
"""
import argparse
import os
from copy import deepcopy
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
from train import _classify, COMPACT_RADIUS
def evaluate(model, vn_template, n_sheep, n_episodes, max_steps):
raw = DummyVecEnv([lambda: HerdingEnv(n_sheep=n_sheep, max_steps=max_steps)])
vn = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
vn.obs_rms = deepcopy(vn_template.obs_rms)
vn.ret_rms = deepcopy(vn_template.ret_rms)
failure = {}
successes = 0
act_mags, min_radii, min_dog_com, min_pen = [], [], [], []
for _ in range(n_episodes):
obs = vn.reset()
done = False
ep_radius, ep_com_dist, ep_dog_com, ep_act = [], [], [], []
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, _, dones, infos = vn.step(action)
done = dones[0]
inner = vn.envs[0]
com, radius, _ = inner._flock_stats()
ep_radius.append(radius)
ep_com_dist.append(float(np.linalg.norm(com - inner.PEN_CENTER)))
ep_dog_com.append(float(np.linalg.norm(inner.dog_pos - com)))
ep_act.append(float(np.linalg.norm(action[0])))
npen = infos[0].get("n_penned", 0)
success = npen == n_sheep
successes += int(success)
mode = _classify(ep_radius, ep_com_dist, npen, n_sheep, success)
failure[mode] = failure.get(mode, 0) + 1
act_mags.extend(ep_act)
min_radii.append(min(ep_radius))
min_dog_com.append(min(ep_dog_com))
min_pen.append(min(ep_com_dist))
vn.close()
return {
"n_sheep": n_sheep,
"success_rate": successes / n_episodes,
"failure": failure,
"mean_action": float(np.mean(act_mags)),
"mean_min_radius": float(np.mean(min_radii)),
"mean_min_dog_com": float(np.mean(min_dog_com)),
"mean_min_pen": float(np.mean(min_pen)),
}
def main():
p = argparse.ArgumentParser()
p.add_argument("--run-dir", type=str, default=None)
p.add_argument("--model", type=str, default=None)
p.add_argument("--vecnorm", type=str, default=None)
p.add_argument("--max-sheep", type=int, default=10)
p.add_argument("--episodes", type=int, default=10)
p.add_argument("--max-steps", type=int, default=2000)
args = p.parse_args()
if args.run_dir:
model_path = os.path.join(args.run_dir, "final_model.zip")
if not os.path.exists(model_path):
model_path = os.path.join(args.run_dir, "best_model", "best_model.zip")
vn_path = os.path.join(args.run_dir, "vecnorm.pkl")
else:
model_path = args.model
vn_path = args.vecnorm
print(f"Loading model: {model_path}")
print(f"Loading vecnorm: {vn_path}\n")
model = PPO.load(model_path, device="cpu")
raw = DummyVecEnv([lambda: HerdingEnv(n_sheep=1, max_steps=args.max_steps)])
vn_template = VecNormalize.load(vn_path, raw)
print(f"{'n_sheep':>7} {'success':>8} {'act':>6} {'min_r':>7} "
f"{'dog→com':>8} {'com→pen':>8} failure breakdown")
print("-" * 90)
for n in range(1, args.max_sheep + 1):
r = evaluate(model, vn_template, n, args.episodes, args.max_steps)
fb = " ".join(f"{m}={c}" for m, c in
sorted(r["failure"].items(), key=lambda x: -x[1]))
print(f"{n:>7d} {r['success_rate']*100:>6.0f}% "
f"{r['mean_action']:>6.2f} "
f"{r['mean_min_radius']:>6.2f}m "
f"{r['mean_min_dog_com']:>7.2f}m "
f"{r['mean_min_pen']:>7.2f}m {fb}")
if __name__ == "__main__":
main()
-142
View File
@@ -1,142 +0,0 @@
"""
Evaluation script for a trained herding policy.
Runs N episodes and reports the three project metrics:
1. Success rate — fraction of episodes where all sheep are penned
2. Time-to-pen — mean steps across successful episodes (per sheep)
3. Flock dispersion — mean pairwise distance among active sheep, averaged
over all timesteps (lower = tighter herding)
Usage
-----
python evaluate.py --model runs/ppo_herding/best_model/best_model.zip \
--vecnorm runs/ppo_herding/vecnorm.pkl \
--n-sheep 5 --episodes 100
Add --render to watch the first episode in a matplotlib window.
"""
import argparse
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
def make_single_env(n_sheep: int, max_steps: int, render_mode: str = None):
def _init():
return HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
render_mode=render_mode)
return _init
def pairwise_mean(positions: np.ndarray, n_active: int) -> float:
"""Mean pairwise distance among the first n_active sheep."""
if n_active < 2:
return 0.0
pts = positions[:n_active]
dists = []
for i in range(n_active):
for j in range(i + 1, n_active):
dists.append(float(np.linalg.norm(pts[i] - pts[j])))
return float(np.mean(dists))
def parse_args():
p = argparse.ArgumentParser()
p.add_argument("--model", required=True,
help="Path to saved model .zip")
p.add_argument("--vecnorm", default=None,
help="Path to VecNormalize stats .pkl (optional)")
p.add_argument("--n-sheep", type=int, default=1)
p.add_argument("--episodes", type=int, default=50)
p.add_argument("--max-steps", type=int, default=2000)
p.add_argument("--render", action="store_true",
help="Render first episode in matplotlib")
p.add_argument("--seed", type=int, default=42)
return p.parse_args()
def main():
args = parse_args()
render_mode = "human" if args.render else None
raw_env = DummyVecEnv([make_single_env(args.n_sheep, args.max_steps,
render_mode)])
if args.vecnorm:
env = VecNormalize.load(args.vecnorm, raw_env)
env.training = False
env.norm_reward = False
else:
env = raw_env
model = PPO.load(args.model, env=env)
successes = []
steps_to_pen = [] # steps for successful episodes
dispersions = [] # per-episode mean flock dispersion
for ep in range(args.episodes):
obs = env.reset()
done = False
ep_steps = 0
ep_dispersion = []
first_ep = ep == 0
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, _, dones, infos = env.step(action)
done = dones[0]
ep_steps += 1
# Access the underlying HerdingEnv for dispersion calculation
inner = env.envs[0] if hasattr(env, "envs") else env.venv.envs[0]
if not inner.penned[:inner.n_sheep].all():
_, radius, _ = inner._flock_stats()
ep_dispersion.append(radius)
if first_ep and render_mode == "human":
pass # render() is called inside step()
info = infos[0]
n_penned = info.get("n_penned", 0)
n_sheep = info.get("n_sheep", args.n_sheep)
success = n_penned == n_sheep
successes.append(int(success))
if success:
steps_to_pen.append(ep_steps / n_sheep)
if ep_dispersion:
dispersions.append(float(np.mean(ep_dispersion)))
if (ep + 1) % 10 == 0:
print(f" Episode {ep + 1:>4}/{args.episodes} "
f"success={int(success)} steps={ep_steps}")
env.close()
# -----------------------------------------------------------------------
# Report
# -----------------------------------------------------------------------
success_rate = float(np.mean(successes))
mean_ttp = float(np.mean(steps_to_pen)) if steps_to_pen else float("nan")
mean_disp = float(np.mean(dispersions)) if dispersions else float("nan")
print("\n" + "=" * 50)
print(f" Model : {args.model}")
print(f" Sheep : {args.n_sheep}")
print(f" Episodes : {args.episodes}")
print("-" * 50)
print(f" Success rate : {success_rate * 100:.1f}%"
f" ({sum(successes)}/{args.episodes})")
print(f" Time-to-pen : {mean_ttp:.1f} steps/sheep"
f" (successful episodes only)")
print(f" Flock radius : {mean_disp:.2f} m"
f" (max sheep-to-COM distance while active)")
print("=" * 50)
if __name__ == "__main__":
main()
+26 -16
View File
@@ -61,18 +61,19 @@ class HerdingEnv(gym.Env):
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
W_WALL_TOUCH = 0.01 # per-sheep, per-step penalty when an active sheep is
# pinned against the outside of a pen W/E wall. Kept
# small (<step_cost) so the dog isn't incentivised to
# hover above the entrance to avoid the penalty.
WALL_TOUCH_BUFFER = 0.3 # metres outside the wall counted as "touching"
W_WALL_TOUCH = 0.15 # per-sheep max penalty at wall surface. Linear ramp
# within WALL_TOUCH_BUFFER gives the RL agent a gradient
# signal to avoid pinning sheep against pen walls.
# 0.15 ≈ 7.5× step_cost — strong enough to shape behavior
# without overwhelming progress reward.
WALL_TOUCH_BUFFER = 0.8 # metres from wall where penalty starts ramping
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
ALIGN_GATED = True # gate alignment on action magnitude
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
# PEN_CENTER. Stops the wall-corraling exploit: when a
# sheep is shoved south past y=-8 outside the pen x-range,
# distance to PEN_ENTRY grows (since target is at y=-8),
# so progress reward goes negative instead of positive.
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead
# of PEN_CENTER for progress/obs. Intended to fix wall-
# corralling but collapsed n_sheep≥2 success rate.
# The wall-touch gradient penalty handles wall avoidance
# without breaking the core herding signal.
# Initial sheep spawn: first sheep placed anywhere; rest within CLUSTER_RADIUS
# of it. Set to None for legacy uniform-scatter behaviour.
@@ -406,16 +407,25 @@ class HerdingEnv(gym.Env):
else:
alignment = 0.0
# Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
# Wall-touch penalty: distance-based gradient covering all 3 solid pen
# walls (west, east, south). Linearly ramps from 0 at buffer edge to
# W_WALL_TOUCH at the wall surface — gives the agent a smooth signal
# to avoid pinning sheep against walls.
if self.W_WALL_TOUCH and active.any():
pts = self.sheep_pos[:self.n_sheep][active]
px0, px1 = self.PEN_X
py0, py1 = self.PEN_Y
in_y = (pts[:, 1] > py0) & (pts[:, 1] < py1)
near_w = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
near_e = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
n_touch = int(((near_w | near_e) & in_y).sum())
r_wall_touch = -n_touch * self.W_WALL_TOUCH
buf = self.WALL_TOUCH_BUFFER
far = buf + 1.0
d_w = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
px0 - pts[:, 0], far)
d_e = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
pts[:, 0] - px1, far)
d_s = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
py0 - pts[:, 1], far)
d_min = np.minimum(np.minimum(d_w, d_e), d_s)
penalties = np.maximum(0.0, 1.0 - d_min / buf) * self.W_WALL_TOUCH
r_wall_touch = -float(penalties.sum())
else:
r_wall_touch = 0.0
-172
View File
@@ -1,172 +0,0 @@
"""
Replay a reward config from the sweep with a longer training budget.
Tells you whether a promising sweep config was bottlenecked by training time
vs. structurally limited. If sr2/sr3 climb past their sweep numbers given more
budget, the issue was budget; if they plateau, the policy/obs needs work.
Usage
-----
python replay_config.py --config runs/sweep_<ts>/best.json
python replay_config.py --config runs/sweep_<ts>/trial_007/config.json \
--max-sheep 4 --steps-per-stage 1500000
Argument summary:
--config JSON file with the reward config (sweep best.json works)
--max-sheep Final curriculum stage (default 3)
--steps-per-stage Env steps per curriculum stage (default 1.5M)
--n-envs Parallel envs (default 8)
--eval-episodes Per-stage eval episodes (default 30)
--run-dir Output directory (default runs/replay_<ts>/)
"""
import argparse
import json
import os
import time
from copy import deepcopy
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv, VecNormalize
from herding_env import HerdingEnv
from sweep_reward import ProgressCallback, reward_cfg, evaluate, make_env
def main():
p = argparse.ArgumentParser()
p.add_argument("--config", type=str, required=True,
help="Reward config JSON (sweep best.json or trial config.json)")
p.add_argument("--start-sheep", type=int, default=1)
p.add_argument("--max-sheep", type=int, default=3)
p.add_argument("--steps-per-stage", type=int, default=1_500_000)
p.add_argument("--mixed", action="store_true",
help="Train with n_sheep randomized per episode (no curriculum). "
"Total train steps = steps-per-stage * max_sheep.")
p.add_argument("--final-mixed-steps", type=int, default=0,
help="After the curriculum, train this many extra steps with "
"random_n_sheep ∈ [1, max_sheep] to consolidate the policy "
"across all flock sizes. Re-evaluates all n_sheep at the end.")
p.add_argument("--n-envs", type=int, default=8)
p.add_argument("--max-steps", type=int, default=2500)
p.add_argument("--eval-episodes", type=int, default=30)
p.add_argument("--run-dir", type=str, default=None)
args = p.parse_args()
with open(args.config) as f:
raw = json.load(f)
cfg = raw["config"] if "config" in raw and isinstance(raw["config"], dict) else raw
rcfg = reward_cfg(cfg)
print(f"Config: {cfg}")
run_dir = args.run_dir or os.path.join(
"runs", "replay_" + time.strftime("%Y%m%d_%H%M%S")
)
os.makedirs(run_dir, exist_ok=True)
with open(os.path.join(run_dir, "config.json"), "w") as f:
json.dump(cfg, f, indent=2)
print(f"Run dir: {run_dir}")
if args.mixed:
print(f"MIXED training: random n_sheep ∈ [1, {args.max_sheep}], "
f"{args.steps_per_stage * args.max_sheep:,} total steps")
else:
print(f"Curriculum: {args.start_sheep}{args.max_sheep} sheep, "
f"{args.steps_per_stage:,} steps/stage")
train_env = SubprocVecEnv([
make_env(args.max_sheep if args.mixed else args.start_sheep,
seed=i, max_steps=args.max_steps, rcfg=rcfg,
random_n_sheep=args.mixed)
for i in range(args.n_envs)
])
vn = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10.0)
model = PPO(
"MlpPolicy", vn,
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
gamma=0.995, gae_lambda=0.95, clip_range=0.2,
ent_coef=cfg["ent_coef"], vf_coef=0.5, max_grad_norm=0.5,
policy_kwargs=dict(net_arch=[256, 256]),
verbose=0,
)
stage_results = []
t0 = time.time()
try:
if args.mixed:
total = args.steps_per_stage * args.max_sheep
print(f"\n[Mixed] training {total:,} steps")
model.learn(
total_timesteps=total,
reset_num_timesteps=True,
callback=ProgressCallback(0, "mixed", freq=100_000),
)
for n in range(1, args.max_sheep + 1):
print(f"[Mixed] evaluating n={n}, {args.eval_episodes} eps")
r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
print(f"[Mixed] n_sheep={n} sr={r['sr']*100:.0f}% "
f"mean_len={r['mean_len']:.0f} "
f"mean_min_pen={r['mean_min_pen']:.1f}m "
f"mean_act={r['mean_act']:.2f}")
stage_results.append({"n_sheep": n, **r})
else:
for n in range(args.start_sheep, args.max_sheep + 1):
if n > args.start_sheep:
vn.env_method("set_n_sheep", n)
print(f"\n[Stage n_sheep={n}] training {args.steps_per_stage:,} steps")
model.learn(
total_timesteps=args.steps_per_stage,
reset_num_timesteps=(n == args.start_sheep),
callback=ProgressCallback(0, f"{n} sheep", freq=100_000),
)
print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
print(f"[Stage n_sheep={n}] sr={r['sr']*100:.0f}% "
f"mean_len={r['mean_len']:.0f} "
f"mean_min_pen={r['mean_min_pen']:.1f}m "
f"mean_act={r['mean_act']:.2f}")
stage_results.append({"n_sheep": n, **r})
# Optional consolidation pass with mixed n_sheep — fixes specialization
# imbalance from curriculum order (e.g. n=1 weakness after long n=10
# training). Replaces stage_results with the post-consolidation eval.
if args.final_mixed_steps > 0 and not args.mixed:
print(f"\n[Consolidation] mixed n_sheep ∈ [1, {args.max_sheep}], "
f"{args.final_mixed_steps:,} steps")
vn.env_method("__setattr__", "random_n_sheep", True)
model.learn(
total_timesteps=args.final_mixed_steps,
reset_num_timesteps=False,
callback=ProgressCallback(0, "consolidate", freq=100_000),
)
print("[Consolidation] re-evaluating all sheep counts")
stage_results = []
for n in range(1, args.max_sheep + 1):
r = evaluate(model, vn, n, args.eval_episodes, args.max_steps, rcfg)
print(f"[Consolidation] n_sheep={n} sr={r['sr']*100:.0f}% "
f"mean_len={r['mean_len']:.0f} "
f"mean_min_pen={r['mean_min_pen']:.1f}m "
f"mean_act={r['mean_act']:.2f}")
stage_results.append({"n_sheep": n, **r})
model.save(os.path.join(run_dir, "final_model"))
vn.save(os.path.join(run_dir, "vecnorm.pkl"))
with open(os.path.join(run_dir, "stage_results.json"), "w") as f:
json.dump(stage_results, f, indent=2)
finally:
try: vn.close()
except Exception: pass
print("\n" + "=" * 60)
print(" REPLAY SUMMARY")
print("=" * 60)
for r in stage_results:
print(f" n_sheep={r['n_sheep']} sr={r['sr']*100:>3.0f}% "
f"len={r['mean_len']:>5.0f} min_pen={r['mean_min_pen']:>5.1f}m "
f"act={r['mean_act']:.2f}")
print(f"\n Total time: {(time.time()-t0)/60:.1f} min")
print(f" Artefacts: {run_dir}/")
if __name__ == "__main__":
main()
-35
View File
@@ -1,35 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/expA_fresh2
Curriculum: 2 → 2 sheep, 2,000,000 steps/stage
[Stage n_sheep=2] training 2,000,000 steps
... [trial 1 | 2 sheep | 100,000 steps | ret(last 50)=-13.44 sr=0%]
... [trial 1 | 2 sheep | 200,000 steps | ret(last 50)=-14.60 sr=0%]
... [trial 1 | 2 sheep | 300,000 steps | ret(last 50)=-17.36 sr=0%]
... [trial 1 | 2 sheep | 400,000 steps | ret(last 50)=-17.36 sr=0%]
... [trial 1 | 2 sheep | 500,000 steps | ret(last 50)=-17.92 sr=0%]
... [trial 1 | 2 sheep | 600,000 steps | ret(last 50)=-15.65 sr=0%]
... [trial 1 | 2 sheep | 700,000 steps | ret(last 50)=-17.69 sr=2%]
... [trial 1 | 2 sheep | 800,000 steps | ret(last 50)=-14.61 sr=2%]
... [trial 1 | 2 sheep | 900,000 steps | ret(last 50)=-17.36 sr=0%]
... [trial 1 | 2 sheep | 1,000,000 steps | ret(last 50)=-17.44 sr=0%]
... [trial 1 | 2 sheep | 1,100,000 steps | ret(last 50)=-15.91 sr=2%]
... [trial 1 | 2 sheep | 1,200,000 steps | ret(last 50)=-16.08 sr=0%]
... [trial 1 | 2 sheep | 1,300,000 steps | ret(last 50)=-14.34 sr=0%]
... [trial 1 | 2 sheep | 1,400,000 steps | ret(last 50)=-17.00 sr=2%]
... [trial 1 | 2 sheep | 1,500,000 steps | ret(last 50)=-18.52 sr=0%]
... [trial 1 | 2 sheep | 1,600,000 steps | ret(last 50)=-16.68 sr=0%]
... [trial 1 | 2 sheep | 1,700,000 steps | ret(last 50)=-17.52 sr=0%]
... [trial 1 | 2 sheep | 1,800,000 steps | ret(last 50)=-17.33 sr=0%]
... [trial 1 | 2 sheep | 1,900,000 steps | ret(last 50)=-14.96 sr=2%]
... [trial 1 | 2 sheep | 2,000,000 steps | ret(last 50)=-15.59 sr=0%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=0% mean_len=1500 mean_min_pen=13.2m mean_act=0.96
============================================================
REPLAY SUMMARY
============================================================
n_sheep=2 sr= 0% len= 1500 min_pen= 13.2m act=0.96
Total time: 10.7 min
Artefacts: runs/expA_fresh2/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,9 +0,0 @@
[
{
"n_sheep": 2,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.171057415008544,
"mean_act": 0.960968065615257
}
]
Binary file not shown.
-51
View File
@@ -1,51 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/expB_mixed
MIXED training: random n_sheep ∈ [1, 3], 3,000,000 total steps
[Mixed] training 3,000,000 steps
... [trial 1 | mixed | 100,000 steps | ret(last 50)=-13.68 sr=2%]
... [trial 1 | mixed | 200,000 steps | ret(last 50)=-14.08 sr=0%]
... [trial 1 | mixed | 300,000 steps | ret(last 50)=-9.80 sr=0%]
... [trial 1 | mixed | 400,000 steps | ret(last 50)=-11.20 sr=0%]
... [trial 1 | mixed | 500,000 steps | ret(last 50)=-10.61 sr=0%]
... [trial 1 | mixed | 600,000 steps | ret(last 50)=-11.19 sr=0%]
... [trial 1 | mixed | 700,000 steps | ret(last 50)=-14.22 sr=0%]
... [trial 1 | mixed | 800,000 steps | ret(last 50)=-6.31 sr=0%]
... [trial 1 | mixed | 900,000 steps | ret(last 50)=-12.68 sr=0%]
... [trial 1 | mixed | 1,000,000 steps | ret(last 50)=-11.06 sr=0%]
... [trial 1 | mixed | 1,100,000 steps | ret(last 50)=-13.39 sr=0%]
... [trial 1 | mixed | 1,200,000 steps | ret(last 50)=-14.20 sr=0%]
... [trial 1 | mixed | 1,300,000 steps | ret(last 50)=-11.33 sr=0%]
... [trial 1 | mixed | 1,400,000 steps | ret(last 50)=-10.73 sr=0%]
... [trial 1 | mixed | 1,500,000 steps | ret(last 50)=-10.91 sr=0%]
... [trial 1 | mixed | 1,600,000 steps | ret(last 50)=-10.44 sr=0%]
... [trial 1 | mixed | 1,700,000 steps | ret(last 50)=-10.56 sr=0%]
... [trial 1 | mixed | 1,800,000 steps | ret(last 50)=-15.74 sr=0%]
... [trial 1 | mixed | 1,900,000 steps | ret(last 50)=-13.46 sr=0%]
... [trial 1 | mixed | 2,000,000 steps | ret(last 50)=-9.86 sr=0%]
... [trial 1 | mixed | 2,100,000 steps | ret(last 50)=-13.07 sr=0%]
... [trial 1 | mixed | 2,200,000 steps | ret(last 50)=-9.86 sr=0%]
... [trial 1 | mixed | 2,300,000 steps | ret(last 50)=-9.73 sr=2%]
... [trial 1 | mixed | 2,400,000 steps | ret(last 50)=-12.21 sr=0%]
... [trial 1 | mixed | 2,500,000 steps | ret(last 50)=-14.27 sr=0%]
... [trial 1 | mixed | 2,600,000 steps | ret(last 50)=-10.90 sr=2%]
... [trial 1 | mixed | 2,700,000 steps | ret(last 50)=-9.67 sr=0%]
... [trial 1 | mixed | 2,800,000 steps | ret(last 50)=-14.29 sr=0%]
... [trial 1 | mixed | 2,900,000 steps | ret(last 50)=-9.08 sr=0%]
... [trial 1 | mixed | 3,000,000 steps | ret(last 50)=-11.62 sr=6%]
[Mixed] evaluating n=1, 30 eps
[Mixed] n_sheep=1 sr=0% mean_len=1500 mean_min_pen=12.1m mean_act=0.64
[Mixed] evaluating n=2, 30 eps
[Mixed] n_sheep=2 sr=0% mean_len=1500 mean_min_pen=13.6m mean_act=1.12
[Mixed] evaluating n=3, 30 eps
[Mixed] n_sheep=3 sr=0% mean_len=1500 mean_min_pen=13.3m mean_act=1.02
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 0% len= 1500 min_pen= 12.1m act=0.64
n_sheep=2 sr= 0% len= 1500 min_pen= 13.6m act=1.12
n_sheep=3 sr= 0% len= 1500 min_pen= 13.3m act=1.02
Total time: 20.6 min
Artefacts: runs/expB_mixed/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,23 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 12.136781152089437,
"mean_act": 0.6380681545449439
},
{
"n_sheep": 2,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.609641806284587,
"mean_act": 1.1225489819858792
},
{
"n_sheep": 3,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.337443319956462,
"mean_act": 1.0186407331574738
}
]
Binary file not shown.
-57
View File
@@ -1,57 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/expC_clustered
Curriculum: 1 → 3 sheep, 1,000,000 steps/stage
[Stage n_sheep=1] training 1,000,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-17.04 sr=6%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-17.39 sr=4%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-15.50 sr=4%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-2.07 sr=26%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=+3.81 sr=52%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=+8.03 sr=76%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=+9.49 sr=86%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=+9.42 sr=88%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=+9.49 sr=88%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=+10.34 sr=94%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=83% mean_len=519 mean_min_pen=3.5m mean_act=0.25
[Stage n_sheep=2] training 1,000,000 steps
... [trial 1 | 2 sheep | 1,015,816 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 2 sheep | 1,115,816 steps | ret(last 50)=-0.13 sr=10%]
... [trial 1 | 2 sheep | 1,215,816 steps | ret(last 50)=-1.23 sr=10%]
... [trial 1 | 2 sheep | 1,315,816 steps | ret(last 50)=-0.10 sr=6%]
... [trial 1 | 2 sheep | 1,415,816 steps | ret(last 50)=+4.10 sr=28%]
... [trial 1 | 2 sheep | 1,515,816 steps | ret(last 50)=+6.24 sr=32%]
... [trial 1 | 2 sheep | 1,615,816 steps | ret(last 50)=+8.48 sr=52%]
... [trial 1 | 2 sheep | 1,715,816 steps | ret(last 50)=+14.14 sr=98%]
... [trial 1 | 2 sheep | 1,815,816 steps | ret(last 50)=+14.33 sr=98%]
... [trial 1 | 2 sheep | 1,915,816 steps | ret(last 50)=+14.02 sr=100%]
... [trial 1 | 2 sheep | 2,015,816 steps | ret(last 50)=+14.05 sr=100%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=100% mean_len=695 mean_min_pen=3.4m mean_act=0.58
[Stage n_sheep=3] training 1,000,000 steps
... [trial 1 | 3 sheep | 2,031,624 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 3 sheep | 2,131,624 steps | ret(last 50)=+10.43 sr=56%]
... [trial 1 | 3 sheep | 2,231,624 steps | ret(last 50)=+13.91 sr=74%]
... [trial 1 | 3 sheep | 2,331,624 steps | ret(last 50)=+13.98 sr=76%]
... [trial 1 | 3 sheep | 2,431,624 steps | ret(last 50)=+12.67 sr=68%]
... [trial 1 | 3 sheep | 2,531,624 steps | ret(last 50)=+15.79 sr=90%]
... [trial 1 | 3 sheep | 2,631,624 steps | ret(last 50)=+16.29 sr=94%]
... [trial 1 | 3 sheep | 2,731,624 steps | ret(last 50)=+15.47 sr=90%]
... [trial 1 | 3 sheep | 2,831,624 steps | ret(last 50)=+16.67 sr=96%]
... [trial 1 | 3 sheep | 2,931,624 steps | ret(last 50)=+17.50 sr=100%]
... [trial 1 | 3 sheep | 3,031,624 steps | ret(last 50)=+16.49 sr=96%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=90% mean_len=794 mean_min_pen=3.7m mean_act=0.47
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 83% len= 519 min_pen= 3.5m act=0.25
n_sheep=2 sr=100% len= 695 min_pen= 3.4m act=0.58
n_sheep=3 sr= 90% len= 794 min_pen= 3.7m act=0.47
Total time: 15.1 min
Artefacts: runs/expC_clustered/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,23 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.8333333333333334,
"mean_len": 518.5333333333333,
"mean_min_pen": 3.5244259238243103,
"mean_act": 0.25044742608759274
},
{
"n_sheep": 2,
"sr": 1.0,
"mean_len": 694.9,
"mean_min_pen": 3.4314632336298625,
"mean_act": 0.5796192060058971
},
{
"n_sheep": 3,
"sr": 0.9,
"mean_len": 794.1333333333333,
"mean_min_pen": 3.6645382324854534,
"mean_act": 0.46590614892287907
}
]
Binary file not shown.
-219
View File
@@ -1,219 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/final_v2
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 41)=-38.49 win_sr=10% cum_sr=10%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-32.87 win_sr=8% cum_sr=9%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-33.60 win_sr=4% cum_sr=7%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-34.78 win_sr=8% cum_sr=7%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-31.25 win_sr=12% cum_sr=8%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-32.87 win_sr=2% cum_sr=7%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=-33.25 win_sr=6% cum_sr=7%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=-27.80 win_sr=16% cum_sr=8%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=-27.44 win_sr=14% cum_sr=9%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=-30.52 win_sr=6% cum_sr=9%]
... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=-24.75 win_sr=20% cum_sr=10%]
... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=-29.94 win_sr=4% cum_sr=10%]
... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=-22.72 win_sr=22% cum_sr=11%]
... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=-9.84 win_sr=46% cum_sr=14%]
... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+10.01 win_sr=96% cum_sr=24%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=97% mean_len=351 mean_min_pen=3.9m mean_act=0.28
[Stage n_sheep=2] training 1,500,000 steps
... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 43)=-4.11 win_sr=33% cum_sr=33%]
... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-0.34 win_sr=36% cum_sr=34%]
... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=+14.73 win_sr=92% cum_sr=62%]
... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=+17.38 win_sr=100% cum_sr=76%]
... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=+16.80 win_sr=100% cum_sr=83%]
... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=+15.67 win_sr=100% cum_sr=87%]
... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=+15.39 win_sr=100% cum_sr=90%]
... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=+15.58 win_sr=100% cum_sr=92%]
... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=+15.01 win_sr=100% cum_sr=93%]
... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=+15.50 win_sr=100% cum_sr=94%]
... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=+15.21 win_sr=100% cum_sr=95%]
... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=+15.22 win_sr=100% cum_sr=95%]
... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=+15.05 win_sr=100% cum_sr=96%]
... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=+14.37 win_sr=100% cum_sr=96%]
... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=+14.70 win_sr=100% cum_sr=97%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=100% mean_len=421 mean_min_pen=3.5m mean_act=1.01
[Stage n_sheep=3] training 1,500,000 steps
... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=+16.52 win_sr=100% cum_sr=99%]
... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=+16.74 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=+17.09 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=+16.90 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=+16.97 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=+17.20 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=+17.09 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=+17.12 win_sr=98% cum_sr=100%]
... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=+17.17 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=+16.25 win_sr=98% cum_sr=100%]
... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=+17.04 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=+16.31 win_sr=98% cum_sr=100%]
... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=+16.82 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=+16.49 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=+16.54 win_sr=100% cum_sr=100%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=100% mean_len=608 mean_min_pen=3.5m mean_act=1.06
[Stage n_sheep=4] training 1,500,000 steps
... [trial 1 | 4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 4 sheep | 4,621,992 steps | ret(last 50)=+18.55 win_sr=98% cum_sr=94%]
... [trial 1 | 4 sheep | 4,721,992 steps | ret(last 50)=+19.17 win_sr=100% cum_sr=97%]
... [trial 1 | 4 sheep | 4,821,992 steps | ret(last 50)=+18.64 win_sr=100% cum_sr=98%]
... [trial 1 | 4 sheep | 4,921,992 steps | ret(last 50)=+19.06 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,021,992 steps | ret(last 50)=+19.01 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,121,992 steps | ret(last 50)=+19.23 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,221,992 steps | ret(last 50)=+18.71 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,321,992 steps | ret(last 50)=+18.81 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,421,992 steps | ret(last 50)=+19.51 win_sr=100% cum_sr=99%]
... [trial 1 | 4 sheep | 5,521,992 steps | ret(last 50)=+19.01 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,621,992 steps | ret(last 50)=+19.21 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,721,992 steps | ret(last 50)=+18.62 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,821,992 steps | ret(last 50)=+18.57 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,921,992 steps | ret(last 50)=+19.22 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 6,021,992 steps | ret(last 50)=+18.73 win_sr=100% cum_sr=100%]
[Stage n_sheep=4] evaluating 30 eps
[Stage n_sheep=4] sr=100% mean_len=874 mean_min_pen=3.3m mean_act=1.23
[Stage n_sheep=5] training 1,500,000 steps
... [trial 1 | 5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 5 sheep | 6,129,320 steps | ret(last 50)=+22.70 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,229,320 steps | ret(last 50)=+20.82 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,329,320 steps | ret(last 50)=+20.84 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,429,320 steps | ret(last 50)=+21.70 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,529,320 steps | ret(last 50)=+21.25 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,629,320 steps | ret(last 50)=+20.61 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,729,320 steps | ret(last 50)=+21.10 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,829,320 steps | ret(last 50)=+21.42 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,929,320 steps | ret(last 50)=+21.39 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,029,320 steps | ret(last 50)=+20.80 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,129,320 steps | ret(last 50)=+21.19 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,229,320 steps | ret(last 50)=+20.92 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,329,320 steps | ret(last 50)=+20.97 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,429,320 steps | ret(last 50)=+20.48 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,529,320 steps | ret(last 50)=+21.36 win_sr=100% cum_sr=100%]
[Stage n_sheep=5] evaluating 30 eps
[Stage n_sheep=5] sr=97% mean_len=945 mean_min_pen=3.4m mean_act=1.33
[Stage n_sheep=6] training 1,500,000 steps
... [trial 1 | 6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 6 sheep | 7,636,648 steps | ret(last 50)=+22.41 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,736,648 steps | ret(last 50)=+23.84 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,836,648 steps | ret(last 50)=+22.95 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,936,648 steps | ret(last 50)=+23.97 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,036,648 steps | ret(last 50)=+24.02 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,136,648 steps | ret(last 50)=+23.42 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,236,648 steps | ret(last 50)=+24.15 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,336,648 steps | ret(last 50)=+23.32 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,436,648 steps | ret(last 50)=+23.46 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,536,648 steps | ret(last 50)=+23.80 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,636,648 steps | ret(last 50)=+24.41 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,736,648 steps | ret(last 50)=+23.86 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,836,648 steps | ret(last 50)=+23.57 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,936,648 steps | ret(last 50)=+23.74 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 9,036,648 steps | ret(last 50)=+22.87 win_sr=100% cum_sr=100%]
[Stage n_sheep=6] evaluating 30 eps
[Stage n_sheep=6] sr=100% mean_len=1162 mean_min_pen=3.1m mean_act=1.36
[Stage n_sheep=7] training 1,500,000 steps
... [trial 1 | 7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 7 sheep | 9,143,976 steps | ret(last 50)=+24.46 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,243,976 steps | ret(last 50)=+25.47 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,343,976 steps | ret(last 50)=+25.10 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,443,976 steps | ret(last 50)=+24.85 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,543,976 steps | ret(last 50)=+26.01 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,643,976 steps | ret(last 50)=+26.26 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,743,976 steps | ret(last 50)=+26.44 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,843,976 steps | ret(last 50)=+26.08 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,943,976 steps | ret(last 50)=+25.00 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,043,976 steps | ret(last 50)=+26.22 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,143,976 steps | ret(last 50)=+24.79 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,243,976 steps | ret(last 50)=+26.33 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,343,976 steps | ret(last 50)=+26.36 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,443,976 steps | ret(last 50)=+25.68 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,543,976 steps | ret(last 50)=+26.75 win_sr=100% cum_sr=100%]
[Stage n_sheep=7] evaluating 30 eps
[Stage n_sheep=7] sr=100% mean_len=1253 mean_min_pen=2.7m mean_act=1.38
[Stage n_sheep=8] training 1,500,000 steps
... [trial 1 | 8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 8 sheep | 10,651,304 steps | ret(last 50)=+28.19 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,751,304 steps | ret(last 50)=+28.80 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,851,304 steps | ret(last 50)=+27.81 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,951,304 steps | ret(last 50)=+27.31 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,051,304 steps | ret(last 50)=+27.67 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,151,304 steps | ret(last 50)=+27.14 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,251,304 steps | ret(last 50)=+29.60 win_sr=98% cum_sr=100%]
... [trial 1 | 8 sheep | 11,351,304 steps | ret(last 50)=+28.81 win_sr=98% cum_sr=100%]
... [trial 1 | 8 sheep | 11,451,304 steps | ret(last 50)=+27.76 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,551,304 steps | ret(last 50)=+27.28 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,651,304 steps | ret(last 50)=+29.04 win_sr=98% cum_sr=99%]
... [trial 1 | 8 sheep | 11,751,304 steps | ret(last 50)=+28.75 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,851,304 steps | ret(last 50)=+29.04 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,951,304 steps | ret(last 50)=+28.27 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 12,051,304 steps | ret(last 50)=+27.90 win_sr=100% cum_sr=100%]
[Stage n_sheep=8] evaluating 30 eps
[Stage n_sheep=8] sr=93% mean_len=1495 mean_min_pen=2.6m mean_act=1.39
[Stage n_sheep=9] training 1,500,000 steps
... [trial 1 | 9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 9 sheep | 12,158,632 steps | ret(last 50)=+30.67 win_sr=98% cum_sr=98%]
... [trial 1 | 9 sheep | 12,258,632 steps | ret(last 50)=+28.78 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,358,632 steps | ret(last 50)=+30.08 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,458,632 steps | ret(last 50)=+29.61 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,558,632 steps | ret(last 50)=+30.34 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 12,658,632 steps | ret(last 50)=+29.48 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 12,758,632 steps | ret(last 50)=+29.92 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 12,858,632 steps | ret(last 50)=+29.26 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 12,958,632 steps | ret(last 50)=+30.36 win_sr=96% cum_sr=98%]
... [trial 1 | 9 sheep | 13,058,632 steps | ret(last 50)=+30.19 win_sr=100% cum_sr=98%]
... [trial 1 | 9 sheep | 13,158,632 steps | ret(last 50)=+29.24 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 13,258,632 steps | ret(last 50)=+30.40 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 13,358,632 steps | ret(last 50)=+31.65 win_sr=100% cum_sr=99%]
... [trial 1 | 9 sheep | 13,458,632 steps | ret(last 50)=+30.77 win_sr=98% cum_sr=99%]
... [trial 1 | 9 sheep | 13,558,632 steps | ret(last 50)=+30.21 win_sr=94% cum_sr=98%]
[Stage n_sheep=9] evaluating 30 eps
[Stage n_sheep=9] sr=97% mean_len=1625 mean_min_pen=2.1m mean_act=1.39
[Stage n_sheep=10] training 1,500,000 steps
... [trial 1 | 10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 10 sheep | 13,665,960 steps | ret(last 50)=+30.13 win_sr=90% cum_sr=92%]
... [trial 1 | 10 sheep | 13,765,960 steps | ret(last 50)=+31.84 win_sr=96% cum_sr=92%]
... [trial 1 | 10 sheep | 13,865,960 steps | ret(last 50)=+32.66 win_sr=88% cum_sr=91%]
... [trial 1 | 10 sheep | 13,965,960 steps | ret(last 50)=+32.56 win_sr=90% cum_sr=91%]
... [trial 1 | 10 sheep | 14,065,960 steps | ret(last 50)=+31.29 win_sr=98% cum_sr=93%]
... [trial 1 | 10 sheep | 14,165,960 steps | ret(last 50)=+32.72 win_sr=94% cum_sr=93%]
... [trial 1 | 10 sheep | 14,265,960 steps | ret(last 50)=+32.42 win_sr=96% cum_sr=93%]
... [trial 1 | 10 sheep | 14,365,960 steps | ret(last 50)=+33.96 win_sr=92% cum_sr=93%]
... [trial 1 | 10 sheep | 14,465,960 steps | ret(last 50)=+33.17 win_sr=98% cum_sr=94%]
... [trial 1 | 10 sheep | 14,565,960 steps | ret(last 50)=+31.48 win_sr=96% cum_sr=94%]
... [trial 1 | 10 sheep | 14,665,960 steps | ret(last 50)=+31.19 win_sr=90% cum_sr=94%]
... [trial 1 | 10 sheep | 14,765,960 steps | ret(last 50)=+32.87 win_sr=98% cum_sr=94%]
... [trial 1 | 10 sheep | 14,865,960 steps | ret(last 50)=+32.36 win_sr=94% cum_sr=94%]
... [trial 1 | 10 sheep | 14,965,960 steps | ret(last 50)=+31.14 win_sr=94% cum_sr=94%]
... [trial 1 | 10 sheep | 15,065,960 steps | ret(last 50)=+32.18 win_sr=96% cum_sr=94%]
[Stage n_sheep=10] evaluating 30 eps
[Stage n_sheep=10] sr=97% mean_len=1816 mean_min_pen=2.0m mean_act=1.39
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 97% len= 351 min_pen= 3.9m act=0.28
n_sheep=2 sr=100% len= 421 min_pen= 3.5m act=1.01
n_sheep=3 sr=100% len= 608 min_pen= 3.5m act=1.06
n_sheep=4 sr=100% len= 874 min_pen= 3.3m act=1.23
n_sheep=5 sr= 97% len= 945 min_pen= 3.4m act=1.33
n_sheep=6 sr=100% len= 1162 min_pen= 3.1m act=1.36
n_sheep=7 sr=100% len= 1253 min_pen= 2.7m act=1.38
n_sheep=8 sr= 93% len= 1495 min_pen= 2.6m act=1.39
n_sheep=9 sr= 97% len= 1625 min_pen= 2.1m act=1.39
n_sheep=10 sr= 97% len= 1816 min_pen= 2.0m act=1.39
Total time: 90.3 min
Artefacts: runs/final_v2/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
-72
View File
@@ -1,72 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.9666666666666667,
"mean_len": 350.96666666666664,
"mean_min_pen": 3.913520161310832,
"mean_act": 0.2797267940386975
},
{
"n_sheep": 2,
"sr": 1.0,
"mean_len": 421.46666666666664,
"mean_min_pen": 3.485754116376241,
"mean_act": 1.0053067604365706
},
{
"n_sheep": 3,
"sr": 1.0,
"mean_len": 608.5,
"mean_min_pen": 3.52824010848999,
"mean_act": 1.0576287743527575
},
{
"n_sheep": 4,
"sr": 1.0,
"mean_len": 874.1333333333333,
"mean_min_pen": 3.2648465514183043,
"mean_act": 1.2302308682249101
},
{
"n_sheep": 5,
"sr": 0.9666666666666667,
"mean_len": 945.1333333333333,
"mean_min_pen": 3.390091093381246,
"mean_act": 1.328577256075333
},
{
"n_sheep": 6,
"sr": 1.0,
"mean_len": 1162.1,
"mean_min_pen": 3.0996540347735086,
"mean_act": 1.3581346810990618
},
{
"n_sheep": 7,
"sr": 1.0,
"mean_len": 1252.6,
"mean_min_pen": 2.6753984689712524,
"mean_act": 1.3753795162019462
},
{
"n_sheep": 8,
"sr": 0.9333333333333333,
"mean_len": 1495.2333333333333,
"mean_min_pen": 2.560386610031128,
"mean_act": 1.3861974064434042
},
{
"n_sheep": 9,
"sr": 0.9666666666666667,
"mean_len": 1624.9,
"mean_min_pen": 2.130835851033529,
"mean_act": 1.387693840600181
},
{
"n_sheep": 10,
"sr": 0.9666666666666667,
"mean_len": 1816.5,
"mean_min_pen": 1.9940622925758362,
"mean_act": 1.3946097864970635
}
]
Binary file not shown.
-253
View File
@@ -1,253 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/final_v3
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 40)=-28.61 win_sr=10% cum_sr=10%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-29.25 win_sr=12% cum_sr=11%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-31.55 win_sr=6% cum_sr=9%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-30.74 win_sr=10% cum_sr=9%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-32.89 win_sr=4% cum_sr=8%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-34.66 win_sr=4% cum_sr=7%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=-31.44 win_sr=12% cum_sr=8%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=-32.70 win_sr=6% cum_sr=8%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=-35.48 win_sr=2% cum_sr=7%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=-31.81 win_sr=10% cum_sr=8%]
... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=-28.53 win_sr=10% cum_sr=8%]
... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=-5.61 win_sr=62% cum_sr=13%]
... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=+11.97 win_sr=100% cum_sr=34%]
... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=+10.92 win_sr=96% cum_sr=50%]
... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+11.97 win_sr=100% cum_sr=63%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=100% mean_len=249 mean_min_pen=3.7m mean_act=0.41
[Stage n_sheep=2] training 1,500,000 steps
... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 47)=-1.11 win_sr=45% cum_sr=45%]
... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-8.90 win_sr=8% cum_sr=27%]
... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=-5.28 win_sr=16% cum_sr=24%]
... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=+3.16 win_sr=58% cum_sr=33%]
... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=+10.26 win_sr=84% cum_sr=48%]
... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=+14.27 win_sr=100% cum_sr=64%]
... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=+14.08 win_sr=100% cum_sr=72%]
... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=+14.38 win_sr=100% cum_sr=77%]
... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=+14.27 win_sr=100% cum_sr=81%]
... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=+14.37 win_sr=100% cum_sr=84%]
... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=86%]
... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=+14.04 win_sr=100% cum_sr=87%]
... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=+14.25 win_sr=100% cum_sr=89%]
... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=+14.61 win_sr=100% cum_sr=90%]
... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=+13.98 win_sr=98% cum_sr=91%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=100% mean_len=548 mean_min_pen=3.5m mean_act=0.92
[Stage n_sheep=3] training 1,500,000 steps
... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=+16.10 win_sr=100% cum_sr=99%]
... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=+17.27 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=+16.86 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=+16.86 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=+17.46 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=+17.43 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=+16.76 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=+16.97 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=+16.97 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=+17.19 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=+17.23 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=+16.45 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=+17.18 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=+16.42 win_sr=100% cum_sr=100%]
... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=+16.32 win_sr=100% cum_sr=100%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=100% mean_len=640 mean_min_pen=3.5m mean_act=1.06
[Stage n_sheep=4] training 1,500,000 steps
... [trial 1 | 4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 4 sheep | 4,621,992 steps | ret(last 50)=+18.61 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 4,721,992 steps | ret(last 50)=+18.82 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 4,821,992 steps | ret(last 50)=+18.91 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 4,921,992 steps | ret(last 50)=+18.55 win_sr=98% cum_sr=100%]
... [trial 1 | 4 sheep | 5,021,992 steps | ret(last 50)=+18.99 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,121,992 steps | ret(last 50)=+18.76 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,221,992 steps | ret(last 50)=+18.46 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,321,992 steps | ret(last 50)=+19.21 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,421,992 steps | ret(last 50)=+17.86 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,521,992 steps | ret(last 50)=+19.19 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,621,992 steps | ret(last 50)=+18.83 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,721,992 steps | ret(last 50)=+18.51 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,821,992 steps | ret(last 50)=+18.38 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 5,921,992 steps | ret(last 50)=+18.56 win_sr=100% cum_sr=100%]
... [trial 1 | 4 sheep | 6,021,992 steps | ret(last 50)=+18.82 win_sr=100% cum_sr=100%]
[Stage n_sheep=4] evaluating 30 eps
[Stage n_sheep=4] sr=100% mean_len=762 mean_min_pen=3.5m mean_act=1.26
[Stage n_sheep=5] training 1,500,000 steps
... [trial 1 | 5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 5 sheep | 6,129,320 steps | ret(last 50)=+20.46 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,229,320 steps | ret(last 50)=+20.41 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,329,320 steps | ret(last 50)=+20.58 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,429,320 steps | ret(last 50)=+21.10 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,529,320 steps | ret(last 50)=+20.48 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,629,320 steps | ret(last 50)=+20.56 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,729,320 steps | ret(last 50)=+20.51 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,829,320 steps | ret(last 50)=+20.70 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 6,929,320 steps | ret(last 50)=+20.83 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,029,320 steps | ret(last 50)=+21.52 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,129,320 steps | ret(last 50)=+21.62 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,229,320 steps | ret(last 50)=+21.22 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,329,320 steps | ret(last 50)=+21.17 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,429,320 steps | ret(last 50)=+21.00 win_sr=100% cum_sr=100%]
... [trial 1 | 5 sheep | 7,529,320 steps | ret(last 50)=+20.48 win_sr=100% cum_sr=100%]
[Stage n_sheep=5] evaluating 30 eps
[Stage n_sheep=5] sr=100% mean_len=931 mean_min_pen=3.6m mean_act=1.31
[Stage n_sheep=6] training 1,500,000 steps
... [trial 1 | 6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 6 sheep | 7,636,648 steps | ret(last 50)=+21.89 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,736,648 steps | ret(last 50)=+22.98 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,836,648 steps | ret(last 50)=+22.66 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 7,936,648 steps | ret(last 50)=+23.23 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,036,648 steps | ret(last 50)=+22.83 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,136,648 steps | ret(last 50)=+22.65 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,236,648 steps | ret(last 50)=+22.22 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,336,648 steps | ret(last 50)=+22.45 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,436,648 steps | ret(last 50)=+22.55 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,536,648 steps | ret(last 50)=+22.99 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,636,648 steps | ret(last 50)=+21.99 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,736,648 steps | ret(last 50)=+22.30 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,836,648 steps | ret(last 50)=+23.06 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 8,936,648 steps | ret(last 50)=+23.32 win_sr=100% cum_sr=100%]
... [trial 1 | 6 sheep | 9,036,648 steps | ret(last 50)=+21.80 win_sr=100% cum_sr=100%]
[Stage n_sheep=6] evaluating 30 eps
[Stage n_sheep=6] sr=100% mean_len=1082 mean_min_pen=3.6m mean_act=1.35
[Stage n_sheep=7] training 1,500,000 steps
... [trial 1 | 7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 7 sheep | 9,143,976 steps | ret(last 50)=+25.57 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,243,976 steps | ret(last 50)=+24.76 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,343,976 steps | ret(last 50)=+24.69 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,443,976 steps | ret(last 50)=+26.12 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,543,976 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,643,976 steps | ret(last 50)=+25.39 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,743,976 steps | ret(last 50)=+24.45 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,843,976 steps | ret(last 50)=+26.45 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 9,943,976 steps | ret(last 50)=+24.51 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,043,976 steps | ret(last 50)=+24.80 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,143,976 steps | ret(last 50)=+25.56 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,243,976 steps | ret(last 50)=+25.75 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,343,976 steps | ret(last 50)=+25.64 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,443,976 steps | ret(last 50)=+26.45 win_sr=100% cum_sr=100%]
... [trial 1 | 7 sheep | 10,543,976 steps | ret(last 50)=+25.19 win_sr=100% cum_sr=100%]
[Stage n_sheep=7] evaluating 30 eps
[Stage n_sheep=7] sr=100% mean_len=1081 mean_min_pen=3.5m mean_act=1.37
[Stage n_sheep=8] training 1,500,000 steps
... [trial 1 | 8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 8 sheep | 10,651,304 steps | ret(last 50)=+26.63 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,751,304 steps | ret(last 50)=+27.63 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,851,304 steps | ret(last 50)=+27.53 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 10,951,304 steps | ret(last 50)=+27.43 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,051,304 steps | ret(last 50)=+27.70 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,151,304 steps | ret(last 50)=+26.53 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,251,304 steps | ret(last 50)=+27.24 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,351,304 steps | ret(last 50)=+27.14 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,451,304 steps | ret(last 50)=+27.43 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,551,304 steps | ret(last 50)=+27.25 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,651,304 steps | ret(last 50)=+27.40 win_sr=98% cum_sr=100%]
... [trial 1 | 8 sheep | 11,751,304 steps | ret(last 50)=+27.35 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,851,304 steps | ret(last 50)=+26.33 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 11,951,304 steps | ret(last 50)=+26.89 win_sr=100% cum_sr=100%]
... [trial 1 | 8 sheep | 12,051,304 steps | ret(last 50)=+27.86 win_sr=100% cum_sr=100%]
[Stage n_sheep=8] evaluating 30 eps
[Stage n_sheep=8] sr=100% mean_len=1311 mean_min_pen=3.5m mean_act=1.38
[Stage n_sheep=9] training 1,500,000 steps
... [trial 1 | 9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 9 sheep | 12,158,632 steps | ret(last 50)=+29.62 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,258,632 steps | ret(last 50)=+31.32 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,358,632 steps | ret(last 50)=+30.30 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,458,632 steps | ret(last 50)=+29.33 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,558,632 steps | ret(last 50)=+28.83 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,658,632 steps | ret(last 50)=+29.02 win_sr=98% cum_sr=100%]
... [trial 1 | 9 sheep | 12,758,632 steps | ret(last 50)=+29.60 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,858,632 steps | ret(last 50)=+29.88 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 12,958,632 steps | ret(last 50)=+30.12 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,058,632 steps | ret(last 50)=+28.80 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,158,632 steps | ret(last 50)=+30.33 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,258,632 steps | ret(last 50)=+27.85 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,358,632 steps | ret(last 50)=+28.21 win_sr=96% cum_sr=100%]
... [trial 1 | 9 sheep | 13,458,632 steps | ret(last 50)=+29.88 win_sr=100% cum_sr=100%]
... [trial 1 | 9 sheep | 13,558,632 steps | ret(last 50)=+29.06 win_sr=98% cum_sr=100%]
[Stage n_sheep=9] evaluating 30 eps
[Stage n_sheep=9] sr=100% mean_len=1435 mean_min_pen=3.6m mean_act=1.39
[Stage n_sheep=10] training 1,500,000 steps
... [trial 1 | 10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | 10 sheep | 13,665,960 steps | ret(last 50)=+30.42 win_sr=96% cum_sr=96%]
... [trial 1 | 10 sheep | 13,765,960 steps | ret(last 50)=+29.97 win_sr=92% cum_sr=95%]
... [trial 1 | 10 sheep | 13,865,960 steps | ret(last 50)=+30.45 win_sr=82% cum_sr=90%]
... [trial 1 | 10 sheep | 13,965,960 steps | ret(last 50)=+29.82 win_sr=90% cum_sr=91%]
... [trial 1 | 10 sheep | 14,065,960 steps | ret(last 50)=+29.66 win_sr=90% cum_sr=91%]
... [trial 1 | 10 sheep | 14,165,960 steps | ret(last 50)=+31.57 win_sr=98% cum_sr=92%]
... [trial 1 | 10 sheep | 14,265,960 steps | ret(last 50)=+31.71 win_sr=96% cum_sr=93%]
... [trial 1 | 10 sheep | 14,365,960 steps | ret(last 50)=+31.75 win_sr=94% cum_sr=93%]
... [trial 1 | 10 sheep | 14,465,960 steps | ret(last 50)=+29.46 win_sr=88% cum_sr=93%]
... [trial 1 | 10 sheep | 14,565,960 steps | ret(last 50)=+29.62 win_sr=94% cum_sr=93%]
... [trial 1 | 10 sheep | 14,665,960 steps | ret(last 50)=+31.64 win_sr=98% cum_sr=93%]
... [trial 1 | 10 sheep | 14,765,960 steps | ret(last 50)=+30.86 win_sr=90% cum_sr=93%]
... [trial 1 | 10 sheep | 14,865,960 steps | ret(last 50)=+31.65 win_sr=90% cum_sr=93%]
... [trial 1 | 10 sheep | 14,965,960 steps | ret(last 50)=+31.75 win_sr=92% cum_sr=93%]
... [trial 1 | 10 sheep | 15,065,960 steps | ret(last 50)=+30.24 win_sr=100% cum_sr=93%]
[Stage n_sheep=10] evaluating 30 eps
[Stage n_sheep=10] sr=90% mean_len=1841 mean_min_pen=3.6m mean_act=1.39
[Consolidation] mixed n_sheep ∈ [1, 10], 2,000,000 steps
... [trial 1 | consolidate | 15,073,288 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [trial 1 | consolidate | 15,173,288 steps | ret(last 50)=+20.69 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 15,273,288 steps | ret(last 50)=+20.62 win_sr=90% cum_sr=92%]
... [trial 1 | consolidate | 15,373,288 steps | ret(last 50)=+20.25 win_sr=94% cum_sr=93%]
... [trial 1 | consolidate | 15,473,288 steps | ret(last 50)=+19.82 win_sr=96% cum_sr=94%]
... [trial 1 | consolidate | 15,573,288 steps | ret(last 50)=+20.56 win_sr=94% cum_sr=94%]
... [trial 1 | consolidate | 15,673,288 steps | ret(last 50)=+20.56 win_sr=92% cum_sr=94%]
... [trial 1 | consolidate | 15,773,288 steps | ret(last 50)=+19.43 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 15,873,288 steps | ret(last 50)=+21.85 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 15,973,288 steps | ret(last 50)=+21.84 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 16,073,288 steps | ret(last 50)=+22.13 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,173,288 steps | ret(last 50)=+21.89 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 16,273,288 steps | ret(last 50)=+21.88 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,373,288 steps | ret(last 50)=+20.81 win_sr=94% cum_sr=95%]
... [trial 1 | consolidate | 16,473,288 steps | ret(last 50)=+20.91 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,573,288 steps | ret(last 50)=+21.13 win_sr=98% cum_sr=95%]
... [trial 1 | consolidate | 16,673,288 steps | ret(last 50)=+19.85 win_sr=100% cum_sr=95%]
... [trial 1 | consolidate | 16,773,288 steps | ret(last 50)=+22.30 win_sr=92% cum_sr=95%]
... [trial 1 | consolidate | 16,873,288 steps | ret(last 50)=+20.61 win_sr=96% cum_sr=95%]
... [trial 1 | consolidate | 16,973,288 steps | ret(last 50)=+21.93 win_sr=98% cum_sr=96%]
... [trial 1 | consolidate | 17,073,288 steps | ret(last 50)=+21.86 win_sr=98% cum_sr=96%]
[Consolidation] re-evaluating all sheep counts
[Consolidation] n_sheep=1 sr=97% mean_len=377 mean_min_pen=3.5m mean_act=1.39
[Consolidation] n_sheep=2 sr=47% mean_len=1718 mean_min_pen=2.4m mean_act=1.39
[Consolidation] n_sheep=3 sr=93% mean_len=970 mean_min_pen=3.2m mean_act=1.39
[Consolidation] n_sheep=4 sr=97% mean_len=1008 mean_min_pen=3.3m mean_act=1.39
[Consolidation] n_sheep=5 sr=100% mean_len=1176 mean_min_pen=3.3m mean_act=1.39
[Consolidation] n_sheep=6 sr=100% mean_len=1305 mean_min_pen=3.3m mean_act=1.39
[Consolidation] n_sheep=7 sr=100% mean_len=1300 mean_min_pen=3.4m mean_act=1.39
[Consolidation] n_sheep=8 sr=100% mean_len=1461 mean_min_pen=3.5m mean_act=1.39
[Consolidation] n_sheep=9 sr=87% mean_len=1607 mean_min_pen=3.8m mean_act=1.39
[Consolidation] n_sheep=10 sr=80% mean_len=1801 mean_min_pen=3.7m mean_act=1.39
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr= 97% len= 377 min_pen= 3.5m act=1.39
n_sheep=2 sr= 47% len= 1718 min_pen= 2.4m act=1.39
n_sheep=3 sr= 93% len= 970 min_pen= 3.2m act=1.39
n_sheep=4 sr= 97% len= 1008 min_pen= 3.3m act=1.39
n_sheep=5 sr=100% len= 1176 min_pen= 3.3m act=1.39
n_sheep=6 sr=100% len= 1305 min_pen= 3.3m act=1.39
n_sheep=7 sr=100% len= 1300 min_pen= 3.4m act=1.39
n_sheep=8 sr=100% len= 1461 min_pen= 3.5m act=1.39
n_sheep=9 sr= 87% len= 1607 min_pen= 3.8m act=1.39
n_sheep=10 sr= 80% len= 1801 min_pen= 3.7m act=1.39
Total time: 110.1 min
Artefacts: runs/final_v3/
-11
View File
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
-72
View File
@@ -1,72 +0,0 @@
[
{
"n_sheep": 1,
"sr": 0.9666666666666667,
"mean_len": 377.3666666666667,
"mean_min_pen": 3.5389957586924234,
"mean_act": 1.3908841227086732
},
{
"n_sheep": 2,
"sr": 0.4666666666666667,
"mean_len": 1717.6333333333334,
"mean_min_pen": 2.4164488633473713,
"mean_act": 1.3922284740020803
},
{
"n_sheep": 3,
"sr": 0.9333333333333333,
"mean_len": 970.2666666666667,
"mean_min_pen": 3.203955141703288,
"mean_act": 1.3945290882248416
},
{
"n_sheep": 4,
"sr": 0.9666666666666667,
"mean_len": 1008.0,
"mean_min_pen": 3.279213563601176,
"mean_act": 1.3918021049325862
},
{
"n_sheep": 5,
"sr": 1.0,
"mean_len": 1175.8666666666666,
"mean_min_pen": 3.3209743976593016,
"mean_act": 1.3925684957666513
},
{
"n_sheep": 6,
"sr": 1.0,
"mean_len": 1305.0,
"mean_min_pen": 3.312229561805725,
"mean_act": 1.391130207932886
},
{
"n_sheep": 7,
"sr": 1.0,
"mean_len": 1300.0,
"mean_min_pen": 3.363971138000488,
"mean_act": 1.392986050516367
},
{
"n_sheep": 8,
"sr": 1.0,
"mean_len": 1461.3666666666666,
"mean_min_pen": 3.4741388003031415,
"mean_act": 1.392040583461347
},
{
"n_sheep": 9,
"sr": 0.8666666666666667,
"mean_len": 1606.7333333333333,
"mean_min_pen": 3.835897175470988,
"mean_act": 1.3907199496534952
},
{
"n_sheep": 10,
"sr": 0.8,
"mean_len": 1800.9666666666667,
"mean_min_pen": 3.741190282503764,
"mean_act": 1.392501896076031
}
]
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,11 +0,0 @@
{
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
}
Binary file not shown.
@@ -1,23 +0,0 @@
[
{
"n_sheep": 1,
"sr": 1.0,
"mean_len": 267.6333333333333,
"mean_min_pen": 3.7235233147939044,
"mean_act": 0.3746675180125346
},
{
"n_sheep": 2,
"sr": 0.06666666666666667,
"mean_len": 1458.6666666666667,
"mean_min_pen": 14.14484707514445,
"mean_act": 0.284232099657656
},
{
"n_sheep": 3,
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 12.514182837804158,
"mean_act": 1.2590703022670828
}
]
Binary file not shown.
-72
View File
@@ -1,72 +0,0 @@
Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
Run dir: runs/replay_20260425_152857
Curriculum: 1 → 3 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-20.83 sr=6%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-21.40 sr=4%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-22.31 sr=0%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-19.13 sr=4%]
... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-18.79 sr=8%]
... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-10.15 sr=8%]
... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=+10.14 sr=82%]
... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=+11.90 sr=100%]
... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=+11.32 sr=100%]
... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=+11.36 sr=100%]
... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=+11.18 sr=100%]
... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=+11.08 sr=100%]
... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=+11.14 sr=100%]
... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=+11.10 sr=100%]
... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+10.99 sr=100%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=100% mean_len=268 mean_min_pen=3.7m mean_act=0.37
[Stage n_sheep=2] training 1,500,000 steps
... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 50)=-3.10 sr=2%]
... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-3.41 sr=2%]
... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=-3.11 sr=6%]
... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=-2.65 sr=8%]
... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=-4.11 sr=2%]
... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=-3.19 sr=6%]
... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=-3.45 sr=4%]
... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=-4.13 sr=0%]
... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=-3.47 sr=8%]
... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=-3.83 sr=4%]
... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=-4.58 sr=0%]
... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=-3.94 sr=2%]
... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=-4.15 sr=2%]
... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=-3.95 sr=4%]
... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=-4.44 sr=0%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=7% mean_len=1459 mean_min_pen=14.1m mean_act=0.28
[Stage n_sheep=3] training 1,500,000 steps
... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=-4.16 sr=0%]
... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=-4.94 sr=0%]
... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=-4.42 sr=0%]
... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=-4.69 sr=0%]
... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=-3.72 sr=0%]
... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=-5.04 sr=0%]
... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=-4.26 sr=0%]
... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=-4.70 sr=0%]
... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=-4.61 sr=0%]
... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=-4.19 sr=0%]
... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=-4.35 sr=0%]
... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=-4.41 sr=0%]
... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=-4.42 sr=0%]
... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=-4.77 sr=0%]
... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=-4.49 sr=0%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=0% mean_len=1500 mean_min_pen=12.5m mean_act=1.26
============================================================
REPLAY SUMMARY
============================================================
n_sheep=1 sr=100% len= 268 min_pen= 3.7m act=0.37
n_sheep=2 sr= 7% len= 1459 min_pen= 14.1m act=0.28
n_sheep=3 sr= 0% len= 1500 min_pen= 12.5m act=1.26
Total time: 26.9 min
Artefacts: runs/replay_20260425_152857/
Binary file not shown.
Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.
Binary file not shown.
Binary file not shown.

Before

Width:  |  Height:  |  Size: 74 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.
Binary file not shown.
Binary file not shown.

Before

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.
@@ -1,41 +0,0 @@
{
"trial": 0,
"config": {
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.1,
"W_PEN_BONUS": 10.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 100.0,
"W_COMPACT": 3.0,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.005
},
"score": 0.06,
"sr": {
"1": 0.3,
"2": 0.0,
"3": 0.0
},
"details": {
"1": {
"sr": 0.3,
"mean_len": 1252.2,
"mean_min_pen": 2.1085331559181215,
"mean_act": 0.07743233270979732
},
"2": {
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 12.107558453083039,
"mean_act": 0.15608626089841424
},
"3": {
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 13.675278377532958,
"mean_act": 0.10535904271739319
}
},
"elapsed_s": 307.773992061615
}
@@ -1 +0,0 @@
{"trial": 0, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1252.2, "mean_min_pen": 2.1085331559181215, "mean_act": 0.07743233270979732}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.107558453083039, "mean_act": 0.15608626089841424}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.675278377532958, "mean_act": 0.10535904271739319}}, "elapsed_s": 307.773992061615}
@@ -1,41 +0,0 @@
{
"trial": 13,
"config": {
"W_PER_SHEEP": 1.0,
"W_ALIGN": 0.0,
"W_PEN_BONUS": 5.0,
"W_STEP_COST": 0.02,
"W_COMPLETE": 200.0,
"W_COMPACT": 1.5,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": false,
"ent_coef": 0.02
},
"score": 0.35,
"sr": {
"1": 1.0,
"2": 0.3,
"3": 0.0
},
"details": {
"1": {
"sr": 1.0,
"mean_len": 428.9,
"mean_min_pen": 3.731236696243286,
"mean_act": 0.33429858573849425
},
"2": {
"sr": 0.3,
"mean_len": 1242.7,
"mean_min_pen": 8.937442195415496,
"mean_act": 0.3998076917437125
},
"3": {
"sr": 0.0,
"mean_len": 1500.0,
"mean_min_pen": 14.061083602905274,
"mean_act": 0.5966902794524755
}
},
"elapsed_s": 313.8281009197235
}
@@ -1,25 +0,0 @@
{"trial": 0, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.15000000000000002, "sr": {"1": 0.5, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.5, "mean_len": 1051.6, "mean_min_pen": 3.0551586985588073, "mean_act": 0.0887192903536989}, "2": {"sr": 0.1, "mean_len": 1438.1, "mean_min_pen": 10.993862140178681, "mean_act": 0.1723056222816755}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.92835488319397, "mean_act": 0.15403316749989074}}, "elapsed_s": 316.9084241390228}
{"trial": 1, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.05, "W_COMPLETE": 200.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1153.8, "mean_min_pen": 3.8145030617713926, "mean_act": 0.15146865127462797}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.058024168014526, "mean_act": 0.10904584494279744}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.5988187789917, "mean_act": 0.09578829008591905}}, "elapsed_s": 310.8732409477234}
{"trial": 2, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.01}, "score": 0.27, "sr": {"1": 0.7, "2": 0.2, "3": 0.1}, "details": {"1": {"sr": 0.7, "mean_len": 772.1, "mean_min_pen": 2.92204372882843, "mean_act": 0.1583604314471399}, "2": {"sr": 0.2, "mean_len": 1390.6, "mean_min_pen": 12.992859578132629, "mean_act": 0.16090679360424953}, "3": {"sr": 0.1, "mean_len": 1403.7, "mean_min_pen": 13.045468378067017, "mean_act": 0.07991531561051667}}, "elapsed_s": 303.7708294391632}
{"trial": 3, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1137.5, "mean_min_pen": 2.1229824781417848, "mean_act": 0.08172097406143335}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.521494126319885, "mean_act": 0.16864279503144788}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.317158126831055, "mean_act": 0.05537428615499472}}, "elapsed_s": 301.6172459125519}
{"trial": 4, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.2, "sr": {"1": 1.0, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 1.0, "mean_len": 567.0, "mean_min_pen": 3.2795117855072022, "mean_act": 0.1855437107780058}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 9.976170372962951, "mean_act": 0.2074074002778701}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.89306182861328, "mean_act": 0.21666522849385267}}, "elapsed_s": 313.525591135025}
{"trial": 5, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.05, "W_COMPLETE": 200.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.01}, "score": 0.16000000000000003, "sr": {"1": 0.8, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.8, "mean_len": 675.5, "mean_min_pen": 3.1338732481002807, "mean_act": 0.11691584614814514}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 9.693846690654755, "mean_act": 0.19984676872865814}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.684805488586425, "mean_act": 0.06430307933471292}}, "elapsed_s": 312.4476580619812}
{"trial": 6, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.005, "W_COMPLETE": 200.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.01}, "score": 0.08000000000000002, "sr": {"1": 0.4, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.4, "mean_len": 1343.9, "mean_min_pen": 4.092962062358856, "mean_act": 0.07675616785431166}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.157618689537049, "mean_act": 0.13906600509098352}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.079688358306885, "mean_act": 0.07073271389845953}}, "elapsed_s": 337.7615342140198}
{"trial": 7, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.11, "sr": {"1": 0.3, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1177.5, "mean_min_pen": 2.261639392375946, "mean_act": 0.11013885321646562}, "2": {"sr": 0.1, "mean_len": 1437.5, "mean_min_pen": 5.9263048529624935, "mean_act": 0.16420815230170227}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.130784749984741, "mean_act": 0.20303070502222206}}, "elapsed_s": 451.2424490451813}
{"trial": 8, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.02, "W_COMPLETE": 50.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.05}, "score": 0.19, "sr": {"1": 0.7, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 874.2, "mean_min_pen": 4.152815592288971, "mean_act": 0.1303976929043709}, "2": {"sr": 0.1, "mean_len": 1381.4, "mean_min_pen": 12.115124177932739, "mean_act": 0.3749806733317197}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.596546864509582, "mean_act": 0.10082290474528718}}, "elapsed_s": 349.3926422595978}
{"trial": 9, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.02, "W_COMPLETE": 200.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.05}, "score": 0.0, "sr": {"1": 0.0, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 8.404254817962647, "mean_act": 0.6749623541596586}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.970247220993041, "mean_act": 0.45562502020561796}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.029277420043945, "mean_act": 0.1599790089856222}}, "elapsed_s": 319.38924622535706}
{"trial": 10, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.02, "W_COMPLETE": 200.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.16000000000000003, "sr": {"1": 0.8, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.8, "mean_len": 690.7, "mean_min_pen": 3.1264367938041686, "mean_act": 0.13493279961414406}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.040377330780029, "mean_act": 0.20203861368317985}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.379706478118896, "mean_act": 0.05979441475490263}}, "elapsed_s": 310.1806254386902}
{"trial": 11, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.05, "W_COMPLETE": 50.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.24, "sr": {"1": 0.7, "2": 0.2, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 727.5, "mean_min_pen": 2.933144009113312, "mean_act": 0.11888058594495643}, "2": {"sr": 0.2, "mean_len": 1317.8, "mean_min_pen": 10.2599928855896, "mean_act": 0.14370172662258304}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.231103086471558, "mean_act": 0.0614644922383149}}, "elapsed_s": 330.0620620250702}
{"trial": 12, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.005}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1244.8, "mean_min_pen": 2.1193889737129212, "mean_act": 0.08216679023110932}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.745809042453766, "mean_act": 0.16497857472260813}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.016976690292358, "mean_act": 0.09897869050660908}}, "elapsed_s": 323.27931213378906}
{"trial": 13, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.02, "W_COMPLETE": 200.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.35, "sr": {"1": 1.0, "2": 0.3, "3": 0.0}, "details": {"1": {"sr": 1.0, "mean_len": 428.9, "mean_min_pen": 3.731236696243286, "mean_act": 0.33429858573849425}, "2": {"sr": 0.3, "mean_len": 1242.7, "mean_min_pen": 8.937442195415496, "mean_act": 0.3998076917437125}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.061083602905274, "mean_act": 0.5966902794524755}}, "elapsed_s": 313.8281009197235}
{"trial": 14, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.05}, "score": 0.13999999999999999, "sr": {"1": 0.7, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 912.4, "mean_min_pen": 2.940706562995911, "mean_act": 1.3471978399000248}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 9.901372599601746, "mean_act": 0.9463685217667609}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.291404342651367, "mean_act": 0.08601266834173493}}, "elapsed_s": 322.57220220565796}
{"trial": 15, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.01}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1210.5, "mean_min_pen": 2.107759189605713, "mean_act": 0.08131515106917063}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.824185514450074, "mean_act": 0.20362997558291535}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.042323064804076, "mean_act": 0.17125511734669563}}, "elapsed_s": 312.3465087413788}
{"trial": 16, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 5.0, "W_STEP_COST": 0.005, "W_COMPLETE": 200.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "near", "ALIGN_GATED": true, "ent_coef": 0.05}, "score": 0.24, "sr": {"1": 0.7, "2": 0.2, "3": 0.0}, "details": {"1": {"sr": 0.7, "mean_len": 650.1, "mean_min_pen": 2.981771671772003, "mean_act": 0.1621352170537764}, "2": {"sr": 0.2, "mean_len": 1435.5, "mean_min_pen": 8.686615812778474, "mean_act": 0.3279171284351484}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.374159717559815, "mean_act": 0.04937917392927017}}, "elapsed_s": 303.71519470214844}
{"trial": 17, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.005, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.16, "sr": {"1": 0.3, "2": 0.2, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1088.1, "mean_min_pen": 3.4793057322502134, "mean_act": 0.09515179877670824}, "2": {"sr": 0.2, "mean_len": 1428.5, "mean_min_pen": 10.024536824226379, "mean_act": 0.4135459636897354}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.302330660820008, "mean_act": 0.34973196326509737}}, "elapsed_s": 315.76633620262146}
{"trial": 18, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 50.0, "W_COMPACT": 0.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.16000000000000003, "sr": {"1": 0.8, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.8, "mean_len": 645.4, "mean_min_pen": 3.1326077818870544, "mean_act": 0.15081361126264722}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.723365247249603, "mean_act": 0.10806036127302399}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.303192138671875, "mean_act": 0.08246586098832388}}, "elapsed_s": 318.483638048172}
{"trial": 19, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.02}, "score": 0.13, "sr": {"1": 0.4, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.4, "mean_len": 1231.4, "mean_min_pen": 2.6246669054031373, "mean_act": 0.07338090033141094}, "2": {"sr": 0.1, "mean_len": 1420.2, "mean_min_pen": 8.371916389465332, "mean_act": 0.16944798908643302}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 14.287557554244994, "mean_act": 0.09957915147298428}}, "elapsed_s": 315.07627868652344}
{"trial": 20, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.05, "sr": {"1": 0.0, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 1.5734932541847229, "mean_act": 0.08394606926547861}, "2": {"sr": 0.1, "mean_len": 1498.9, "mean_min_pen": 6.444609999656677, "mean_act": 0.2938110977638972}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 11.258054113388061, "mean_act": 0.16288984295733971}}, "elapsed_s": 309.5854580402374}
{"trial": 21, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.05, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.02, "W_COMPLETE": 100.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.005}, "score": 0.11, "sr": {"1": 0.3, "2": 0.1, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1324.6, "mean_min_pen": 3.3425565361976624, "mean_act": 0.1115106962044226}, "2": {"sr": 0.1, "mean_len": 1443.0, "mean_min_pen": 11.069470012187958, "mean_act": 0.17271345215252376}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.845431709289551, "mean_act": 0.13337391122176}}, "elapsed_s": 315.54923272132874}
{"trial": 22, "config": {"W_PER_SHEEP": 2.0, "W_ALIGN": 0.1, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 100.0, "W_COMPACT": 1.5, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ent_coef": 0.05}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1220.2, "mean_min_pen": 2.1276236534118653, "mean_act": 0.4312911105166665}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 8.770305395126343, "mean_act": 0.6047595652043354}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.12634140253067, "mean_act": 0.14348885283676113}}, "elapsed_s": 471.740927696228}
{"trial": 23, "config": {"W_PER_SHEEP": 6.0, "W_ALIGN": 0.025, "W_PEN_BONUS": 20.0, "W_STEP_COST": 0.005, "W_COMPLETE": 200.0, "W_COMPACT": 3.0, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": false, "ent_coef": 0.01}, "score": 0.06, "sr": {"1": 0.3, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.3, "mean_len": 1209.4, "mean_min_pen": 3.811609184741974, "mean_act": 0.08888363576016632}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.143073177337646, "mean_act": 0.27062979487000655}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 15.135865116119385, "mean_act": 0.3670903712440903}}, "elapsed_s": 335.26912212371826}
{"trial": 24, "config": {"W_PER_SHEEP": 1.0, "W_ALIGN": 0.0, "W_PEN_BONUS": 10.0, "W_STEP_COST": 0.05, "W_COMPLETE": 50.0, "W_COMPACT": 0.5, "ALIGN_SHAPE": "near", "ALIGN_GATED": true, "ent_coef": 0.02}, "score": 0.0, "sr": {"1": 0.0, "2": 0.0, "3": 0.0}, "details": {"1": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 10.014724779129029, "mean_act": 1.024556803444028}, "2": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 12.734652400016785, "mean_act": 1.0186923123559604}, "3": {"sr": 0.0, "mean_len": 1500.0, "mean_min_pen": 13.690151166915893, "mean_act": 1.000638129701217}}, "elapsed_s": 306.1110165119171}
-681
View File
@@ -1,681 +0,0 @@
Sweep dir: runs/sweep_20260425_124630
Search space: ['W_PER_SHEEP', 'W_ALIGN', 'W_PEN_BONUS', 'W_STEP_COST', 'W_COMPLETE', 'W_COMPACT', 'ALIGN_SHAPE', 'ALIGN_GATED', 'ent_coef']
Per-trial: 1,000,000 steps train + 30 eval eps
Time budget: 7.5h
[Trial 1] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 1 | 1 sheep | 50,000 steps | ret(last 33)=-7.72 sr=6%]
... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-10.07 sr=2%]
... [trial 1 | 1 sheep | 150,000 steps | ret(last 50)=-9.89 sr=2%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-7.94 sr=4%]
... [trial 1 | 1 sheep | 250,000 steps | ret(last 50)=+2.69 sr=2%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=+18.25 sr=24%]
... [trial 1 | 1 sheep | 350,000 steps | ret(last 50)=+24.63 sr=20%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=+24.83 sr=26%]
... [trial 1 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 2 sheep | 459,608 steps | ret(last 32)=+10.08 sr=0%]
... [trial 1 | 2 sheep | 509,608 steps | ret(last 50)=+11.51 sr=0%]
... [trial 1 | 2 sheep | 559,608 steps | ret(last 50)=+12.82 sr=0%]
... [trial 1 | 2 sheep | 609,608 steps | ret(last 50)=+14.39 sr=0%]
... [trial 1 | 2 sheep | 659,608 steps | ret(last 50)=+14.14 sr=0%]
... [trial 1 | 2 sheep | 709,608 steps | ret(last 50)=+12.36 sr=2%]
... [trial 1 | 2 sheep | 759,608 steps | ret(last 50)=+13.08 sr=0%]
... [trial 1 | 2 sheep | 809,608 steps | ret(last 50)=+13.24 sr=0%]
... [trial 1 | 2 sheep | 859,608 steps | ret(last 50)=+13.23 sr=0%]
... [trial 1 | 2 sheep | 909,608 steps | ret(last 50)=+14.23 sr=2%]
... [trial 1 | 2 sheep | 959,608 steps | ret(last 50)=+14.69 sr=0%]
... [trial 1 | 2 sheep | 1,009,608 steps | ret(last 50)=+20.23 sr=0%]
... [trial 1 | eval n=1]
... [trial 1 | eval n=2]
... [trial 1 | eval n=3]
→ score=0.150 sr1=0.50 sr2=0.10 sr3=0.00 [317s]
[Trial 2] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 2 | 1 sheep | 50,000 steps | ret(last 34)=-24.61 sr=9%]
... [trial 2 | 1 sheep | 100,000 steps | ret(last 50)=-28.20 sr=10%]
... [trial 2 | 1 sheep | 150,000 steps | ret(last 50)=-28.14 sr=8%]
... [trial 2 | 1 sheep | 200,000 steps | ret(last 50)=-31.36 sr=2%]
... [trial 2 | 1 sheep | 250,000 steps | ret(last 50)=-31.38 sr=6%]
... [trial 2 | 1 sheep | 300,000 steps | ret(last 50)=-32.89 sr=4%]
... [trial 2 | 1 sheep | 350,000 steps | ret(last 50)=-29.11 sr=8%]
... [trial 2 | 1 sheep | 400,000 steps | ret(last 50)=-19.16 sr=30%]
... [trial 2 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 2 | 2 sheep | 459,608 steps | ret(last 34)=-17.61 sr=9%]
... [trial 2 | 2 sheep | 509,608 steps | ret(last 50)=-18.59 sr=2%]
... [trial 2 | 2 sheep | 559,608 steps | ret(last 50)=-16.92 sr=0%]
... [trial 2 | 2 sheep | 609,608 steps | ret(last 50)=-17.40 sr=0%]
... [trial 2 | 2 sheep | 659,608 steps | ret(last 50)=-18.13 sr=0%]
... [trial 2 | 2 sheep | 709,608 steps | ret(last 50)=-17.45 sr=0%]
... [trial 2 | 2 sheep | 759,608 steps | ret(last 50)=-16.06 sr=0%]
... [trial 2 | 2 sheep | 809,608 steps | ret(last 50)=-15.35 sr=0%]
... [trial 2 | 2 sheep | 859,608 steps | ret(last 50)=-12.63 sr=0%]
... [trial 2 | 2 sheep | 909,608 steps | ret(last 50)=-12.41 sr=0%]
... [trial 2 | 2 sheep | 959,608 steps | ret(last 50)=-12.91 sr=0%]
... [trial 2 | 2 sheep | 1,009,608 steps | ret(last 50)=-10.94 sr=0%]
... [trial 2 | eval n=1]
... [trial 2 | eval n=2]
... [trial 2 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [311s]
[Trial 3] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.01}
... [trial 3 | 1 sheep | 50,000 steps | ret(last 32)=-1.75 sr=0%]
... [trial 3 | 1 sheep | 100,000 steps | ret(last 50)=-3.70 sr=0%]
... [trial 3 | 1 sheep | 150,000 steps | ret(last 50)=-6.09 sr=2%]
... [trial 3 | 1 sheep | 200,000 steps | ret(last 50)=-3.44 sr=4%]
... [trial 3 | 1 sheep | 250,000 steps | ret(last 50)=+6.68 sr=8%]
... [trial 3 | 1 sheep | 300,000 steps | ret(last 50)=+14.58 sr=22%]
... [trial 3 | 1 sheep | 350,000 steps | ret(last 50)=+15.28 sr=64%]
... [trial 3 | 1 sheep | 400,000 steps | ret(last 50)=+14.70 sr=74%]
... [trial 3 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 3 | 2 sheep | 459,608 steps | ret(last 35)=+0.82 sr=9%]
... [trial 3 | 2 sheep | 509,608 steps | ret(last 50)=-0.66 sr=2%]
... [trial 3 | 2 sheep | 559,608 steps | ret(last 50)=-0.02 sr=0%]
... [trial 3 | 2 sheep | 609,608 steps | ret(last 50)=-0.02 sr=0%]
... [trial 3 | 2 sheep | 659,608 steps | ret(last 50)=+1.37 sr=4%]
... [trial 3 | 2 sheep | 709,608 steps | ret(last 50)=+2.75 sr=8%]
... [trial 3 | 2 sheep | 759,608 steps | ret(last 50)=+1.25 sr=6%]
... [trial 3 | 2 sheep | 809,608 steps | ret(last 50)=+4.20 sr=10%]
... [trial 3 | 2 sheep | 859,608 steps | ret(last 50)=+2.14 sr=4%]
... [trial 3 | 2 sheep | 909,608 steps | ret(last 50)=+3.13 sr=8%]
... [trial 3 | 2 sheep | 959,608 steps | ret(last 50)=+5.16 sr=6%]
... [trial 3 | 2 sheep | 1,009,608 steps | ret(last 50)=+5.95 sr=8%]
... [trial 3 | eval n=1]
... [trial 3 | eval n=2]
... [trial 3 | eval n=3]
→ score=0.270 sr1=0.70 sr2=0.20 sr3=0.10 [304s]
[Trial 4] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 4 | 1 sheep | 50,000 steps | ret(last 33)=-2.86 sr=9%]
... [trial 4 | 1 sheep | 100,000 steps | ret(last 50)=-3.54 sr=6%]
... [trial 4 | 1 sheep | 150,000 steps | ret(last 50)=-2.76 sr=8%]
... [trial 4 | 1 sheep | 200,000 steps | ret(last 50)=-1.56 sr=8%]
... [trial 4 | 1 sheep | 250,000 steps | ret(last 50)=+9.18 sr=24%]
... [trial 4 | 1 sheep | 300,000 steps | ret(last 50)=+18.46 sr=46%]
... [trial 4 | 1 sheep | 350,000 steps | ret(last 50)=+15.01 sr=34%]
... [trial 4 | 1 sheep | 400,000 steps | ret(last 50)=+14.44 sr=42%]
... [trial 4 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 4 | 2 sheep | 459,608 steps | ret(last 35)=+6.77 sr=9%]
... [trial 4 | 2 sheep | 509,608 steps | ret(last 50)=+5.50 sr=6%]
... [trial 4 | 2 sheep | 559,608 steps | ret(last 50)=+4.39 sr=0%]
... [trial 4 | 2 sheep | 609,608 steps | ret(last 50)=+4.54 sr=0%]
... [trial 4 | 2 sheep | 659,608 steps | ret(last 50)=+6.97 sr=0%]
... [trial 4 | 2 sheep | 709,608 steps | ret(last 50)=+4.28 sr=4%]
... [trial 4 | 2 sheep | 759,608 steps | ret(last 50)=+4.30 sr=2%]
... [trial 4 | 2 sheep | 809,608 steps | ret(last 50)=+6.34 sr=4%]
... [trial 4 | 2 sheep | 859,608 steps | ret(last 50)=+7.27 sr=2%]
... [trial 4 | 2 sheep | 909,608 steps | ret(last 50)=+8.22 sr=4%]
... [trial 4 | 2 sheep | 959,608 steps | ret(last 50)=+7.23 sr=6%]
... [trial 4 | 2 sheep | 1,009,608 steps | ret(last 50)=+7.24 sr=2%]
... [trial 4 | eval n=1]
... [trial 4 | eval n=2]
... [trial 4 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [302s]
[Trial 5] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 5 | 1 sheep | 50,000 steps | ret(last 33)=+3.70 sr=6%]
... [trial 5 | 1 sheep | 100,000 steps | ret(last 50)=-2.32 sr=0%]
... [trial 5 | 1 sheep | 150,000 steps | ret(last 50)=-4.36 sr=4%]
... [trial 5 | 1 sheep | 200,000 steps | ret(last 50)=-4.30 sr=6%]
... [trial 5 | 1 sheep | 250,000 steps | ret(last 50)=-0.15 sr=14%]
... [trial 5 | 1 sheep | 300,000 steps | ret(last 50)=+1.39 sr=8%]
... [trial 5 | 1 sheep | 350,000 steps | ret(last 50)=+11.40 sr=36%]
... [trial 5 | 1 sheep | 400,000 steps | ret(last 50)=+11.08 sr=24%]
... [trial 5 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 5 | 2 sheep | 459,608 steps | ret(last 34)=+6.85 sr=6%]
... [trial 5 | 2 sheep | 509,608 steps | ret(last 50)=+7.35 sr=8%]
... [trial 5 | 2 sheep | 559,608 steps | ret(last 50)=+7.57 sr=4%]
... [trial 5 | 2 sheep | 609,608 steps | ret(last 50)=+6.64 sr=2%]
... [trial 5 | 2 sheep | 659,608 steps | ret(last 50)=+9.15 sr=10%]
... [trial 5 | 2 sheep | 709,608 steps | ret(last 50)=+14.27 sr=10%]
... [trial 5 | 2 sheep | 759,608 steps | ret(last 50)=+10.93 sr=6%]
... [trial 5 | 2 sheep | 809,608 steps | ret(last 50)=+10.17 sr=12%]
... [trial 5 | 2 sheep | 859,608 steps | ret(last 50)=+8.20 sr=8%]
... [trial 5 | 2 sheep | 909,608 steps | ret(last 50)=+9.61 sr=14%]
... [trial 5 | 2 sheep | 959,608 steps | ret(last 50)=+11.14 sr=10%]
... [trial 5 | 2 sheep | 1,009,608 steps | ret(last 50)=+10.75 sr=12%]
... [trial 5 | eval n=1]
... [trial 5 | eval n=2]
... [trial 5 | eval n=3]
→ score=0.200 sr1=1.00 sr2=0.00 sr3=0.00 [314s]
[Trial 6] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 200.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.01}
... [trial 6 | 1 sheep | 50,000 steps | ret(last 32)=-13.18 sr=9%]
... [trial 6 | 1 sheep | 100,000 steps | ret(last 50)=-10.28 sr=16%]
... [trial 6 | 1 sheep | 150,000 steps | ret(last 50)=+5.28 sr=44%]
... [trial 6 | 1 sheep | 200,000 steps | ret(last 50)=+9.40 sr=38%]
... [trial 6 | 1 sheep | 250,000 steps | ret(last 50)=+8.62 sr=32%]
... [trial 6 | 1 sheep | 300,000 steps | ret(last 50)=+9.14 sr=34%]
... [trial 6 | 1 sheep | 350,000 steps | ret(last 50)=+12.59 sr=60%]
... [trial 6 | 1 sheep | 400,000 steps | ret(last 50)=+14.10 sr=72%]
... [trial 6 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 6 | 2 sheep | 459,608 steps | ret(last 34)=+0.12 sr=9%]
... [trial 6 | 2 sheep | 509,608 steps | ret(last 50)=-2.84 sr=4%]
... [trial 6 | 2 sheep | 559,608 steps | ret(last 50)=-2.11 sr=10%]
... [trial 6 | 2 sheep | 609,608 steps | ret(last 50)=-1.91 sr=14%]
... [trial 6 | 2 sheep | 659,608 steps | ret(last 50)=-2.14 sr=14%]
... [trial 6 | 2 sheep | 709,608 steps | ret(last 50)=-4.30 sr=6%]
... [trial 6 | 2 sheep | 759,608 steps | ret(last 50)=-1.89 sr=10%]
... [trial 6 | 2 sheep | 809,608 steps | ret(last 50)=-3.47 sr=8%]
... [trial 6 | 2 sheep | 859,608 steps | ret(last 50)=-1.45 sr=8%]
... [trial 6 | 2 sheep | 909,608 steps | ret(last 50)=-3.55 sr=2%]
... [trial 6 | 2 sheep | 959,608 steps | ret(last 50)=-2.93 sr=4%]
... [trial 6 | 2 sheep | 1,009,608 steps | ret(last 50)=-1.45 sr=10%]
... [trial 6 | eval n=1]
... [trial 6 | eval n=2]
... [trial 6 | eval n=3]
→ score=0.160 sr1=0.80 sr2=0.00 sr3=0.00 [312s]
[Trial 7] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.01}
... [trial 7 | 1 sheep | 50,000 steps | ret(last 32)=-8.47 sr=0%]
... [trial 7 | 1 sheep | 100,000 steps | ret(last 50)=-5.40 sr=4%]
... [trial 7 | 1 sheep | 150,000 steps | ret(last 50)=-2.72 sr=10%]
... [trial 7 | 1 sheep | 200,000 steps | ret(last 50)=-1.59 sr=10%]
... [trial 7 | 1 sheep | 250,000 steps | ret(last 50)=-1.58 sr=6%]
... [trial 7 | 1 sheep | 300,000 steps | ret(last 50)=-3.68 sr=2%]
... [trial 7 | 1 sheep | 350,000 steps | ret(last 50)=+4.82 sr=10%]
... [trial 7 | 1 sheep | 400,000 steps | ret(last 50)=+15.81 sr=54%]
... [trial 7 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 7 | 2 sheep | 459,608 steps | ret(last 32)=-2.50 sr=6%]
... [trial 7 | 2 sheep | 509,608 steps | ret(last 50)=-2.32 sr=2%]
... [trial 7 | 2 sheep | 559,608 steps | ret(last 50)=+0.76 sr=4%]
... [trial 7 | 2 sheep | 609,608 steps | ret(last 50)=+0.45 sr=0%]
... [trial 7 | 2 sheep | 659,608 steps | ret(last 50)=+1.03 sr=8%]
... [trial 7 | 2 sheep | 709,608 steps | ret(last 50)=+0.62 sr=6%]
... [trial 7 | 2 sheep | 759,608 steps | ret(last 50)=+0.36 sr=8%]
... [trial 7 | 2 sheep | 809,608 steps | ret(last 50)=+2.27 sr=10%]
... [trial 7 | 2 sheep | 859,608 steps | ret(last 50)=+2.31 sr=6%]
... [trial 7 | 2 sheep | 909,608 steps | ret(last 50)=+3.78 sr=4%]
... [trial 7 | 2 sheep | 959,608 steps | ret(last 50)=+2.21 sr=10%]
... [trial 7 | 2 sheep | 1,009,608 steps | ret(last 50)=+2.66 sr=4%]
... [trial 7 | eval n=1]
... [trial 7 | eval n=2]
... [trial 7 | eval n=3]
→ score=0.080 sr1=0.40 sr2=0.00 sr3=0.00 [338s]
[Trial 8] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 8 | 1 sheep | 50,000 steps | ret(last 32)=-7.73 sr=6%]
... [trial 8 | 1 sheep | 100,000 steps | ret(last 50)=-9.58 sr=8%]
... [trial 8 | 1 sheep | 150,000 steps | ret(last 50)=-10.87 sr=8%]
... [trial 8 | 1 sheep | 200,000 steps | ret(last 50)=-9.79 sr=6%]
... [trial 8 | 1 sheep | 250,000 steps | ret(last 50)=-7.19 sr=8%]
... [trial 8 | 1 sheep | 300,000 steps | ret(last 50)=-3.84 sr=18%]
... [trial 8 | 1 sheep | 350,000 steps | ret(last 50)=-0.03 sr=26%]
... [trial 8 | 1 sheep | 400,000 steps | ret(last 50)=+6.80 sr=44%]
... [trial 8 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 8 | 2 sheep | 459,608 steps | ret(last 35)=-3.00 sr=9%]
... [trial 8 | 2 sheep | 509,608 steps | ret(last 50)=-4.26 sr=4%]
... [trial 8 | 2 sheep | 559,608 steps | ret(last 50)=+1.91 sr=14%]
... [trial 8 | 2 sheep | 609,608 steps | ret(last 50)=-0.57 sr=16%]
... [trial 8 | 2 sheep | 659,608 steps | ret(last 50)=+1.65 sr=14%]
... [trial 8 | 2 sheep | 709,608 steps | ret(last 50)=+2.90 sr=8%]
... [trial 8 | 2 sheep | 759,608 steps | ret(last 50)=+0.98 sr=2%]
... [trial 8 | 2 sheep | 809,608 steps | ret(last 50)=-2.52 sr=4%]
... [trial 8 | 2 sheep | 859,608 steps | ret(last 50)=-1.11 sr=2%]
... [trial 8 | 2 sheep | 909,608 steps | ret(last 50)=+2.74 sr=2%]
... [trial 8 | 2 sheep | 959,608 steps | ret(last 50)=+2.94 sr=0%]
... [trial 8 | 2 sheep | 1,009,608 steps | ret(last 50)=+5.13 sr=0%]
... [trial 8 | eval n=1]
... [trial 8 | eval n=2]
... [trial 8 | eval n=3]
→ score=0.110 sr1=0.30 sr2=0.10 sr3=0.00 [451s]
[Trial 9] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.05}
... [trial 9 | 1 sheep | 50,000 steps | ret(last 34)=-11.25 sr=15%]
... [trial 9 | 1 sheep | 100,000 steps | ret(last 50)=-11.98 sr=8%]
... [trial 9 | 1 sheep | 150,000 steps | ret(last 50)=-10.46 sr=14%]
... [trial 9 | 1 sheep | 200,000 steps | ret(last 50)=-2.86 sr=14%]
... [trial 9 | 1 sheep | 250,000 steps | ret(last 50)=+8.65 sr=60%]
... [trial 9 | 1 sheep | 300,000 steps | ret(last 50)=+10.48 sr=58%]
... [trial 9 | 1 sheep | 350,000 steps | ret(last 50)=+8.65 sr=56%]
... [trial 9 | 1 sheep | 400,000 steps | ret(last 50)=+10.25 sr=68%]
... [trial 9 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 9 | 2 sheep | 459,608 steps | ret(last 35)=-0.75 sr=20%]
... [trial 9 | 2 sheep | 509,608 steps | ret(last 50)=-6.64 sr=2%]
... [trial 9 | 2 sheep | 559,608 steps | ret(last 50)=-7.43 sr=4%]
... [trial 9 | 2 sheep | 609,608 steps | ret(last 50)=-4.32 sr=6%]
... [trial 9 | 2 sheep | 659,608 steps | ret(last 50)=-3.64 sr=6%]
... [trial 9 | 2 sheep | 709,608 steps | ret(last 50)=-7.09 sr=0%]
... [trial 9 | 2 sheep | 759,608 steps | ret(last 50)=-5.60 sr=4%]
... [trial 9 | 2 sheep | 809,608 steps | ret(last 50)=-5.70 sr=6%]
... [trial 9 | 2 sheep | 859,608 steps | ret(last 50)=-4.99 sr=4%]
... [trial 9 | 2 sheep | 909,608 steps | ret(last 50)=-4.60 sr=6%]
... [trial 9 | 2 sheep | 959,608 steps | ret(last 50)=-6.53 sr=4%]
... [trial 9 | 2 sheep | 1,009,608 steps | ret(last 50)=-7.46 sr=2%]
... [trial 9 | eval n=1]
... [trial 9 | eval n=2]
... [trial 9 | eval n=3]
→ score=0.190 sr1=0.70 sr2=0.10 sr3=0.00 [349s]
[Trial 10] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.05}
... [trial 10 | 1 sheep | 50,000 steps | ret(last 32)=-13.35 sr=3%]
... [trial 10 | 1 sheep | 100,000 steps | ret(last 50)=-12.49 sr=4%]
... [trial 10 | 1 sheep | 150,000 steps | ret(last 50)=-13.24 sr=8%]
... [trial 10 | 1 sheep | 200,000 steps | ret(last 50)=-12.73 sr=10%]
... [trial 10 | 1 sheep | 250,000 steps | ret(last 50)=-15.27 sr=4%]
... [trial 10 | 1 sheep | 300,000 steps | ret(last 50)=-9.43 sr=8%]
... [trial 10 | 1 sheep | 350,000 steps | ret(last 50)=-2.65 sr=22%]
... [trial 10 | 1 sheep | 400,000 steps | ret(last 50)=+5.12 sr=46%]
... [trial 10 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 10 | 2 sheep | 459,608 steps | ret(last 34)=-4.93 sr=6%]
... [trial 10 | 2 sheep | 509,608 steps | ret(last 50)=-6.25 sr=2%]
... [trial 10 | 2 sheep | 559,608 steps | ret(last 50)=-5.57 sr=4%]
... [trial 10 | 2 sheep | 609,608 steps | ret(last 50)=-6.24 sr=4%]
... [trial 10 | 2 sheep | 659,608 steps | ret(last 50)=-9.34 sr=0%]
... [trial 10 | 2 sheep | 709,608 steps | ret(last 50)=-8.23 sr=0%]
... [trial 10 | 2 sheep | 759,608 steps | ret(last 50)=-8.34 sr=0%]
... [trial 10 | 2 sheep | 809,608 steps | ret(last 50)=-5.27 sr=0%]
... [trial 10 | 2 sheep | 859,608 steps | ret(last 50)=-8.24 sr=0%]
... [trial 10 | 2 sheep | 909,608 steps | ret(last 50)=-8.75 sr=0%]
... [trial 10 | 2 sheep | 959,608 steps | ret(last 50)=-9.15 sr=0%]
... [trial 10 | 2 sheep | 1,009,608 steps | ret(last 50)=-9.75 sr=0%]
... [trial 10 | eval n=1]
... [trial 10 | eval n=2]
... [trial 10 | eval n=3]
→ score=0.000 sr1=0.00 sr2=0.00 sr3=0.00 [319s]
[Trial 11] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 11 | 1 sheep | 50,000 steps | ret(last 32)=-3.50 sr=12%]
... [trial 11 | 1 sheep | 100,000 steps | ret(last 50)=-5.79 sr=6%]
... [trial 11 | 1 sheep | 150,000 steps | ret(last 50)=-2.10 sr=18%]
... [trial 11 | 1 sheep | 200,000 steps | ret(last 50)=+2.60 sr=8%]
... [trial 11 | 1 sheep | 250,000 steps | ret(last 50)=+11.49 sr=8%]
... [trial 11 | 1 sheep | 300,000 steps | ret(last 50)=+21.73 sr=26%]
... [trial 11 | 1 sheep | 350,000 steps | ret(last 50)=+20.73 sr=36%]
... [trial 11 | 1 sheep | 400,000 steps | ret(last 50)=+19.77 sr=62%]
... [trial 11 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 11 | 2 sheep | 459,608 steps | ret(last 36)=+10.19 sr=11%]
... [trial 11 | 2 sheep | 509,608 steps | ret(last 50)=+11.56 sr=6%]
... [trial 11 | 2 sheep | 559,608 steps | ret(last 50)=+13.61 sr=2%]
... [trial 11 | 2 sheep | 609,608 steps | ret(last 50)=+15.44 sr=4%]
... [trial 11 | 2 sheep | 659,608 steps | ret(last 50)=+15.61 sr=10%]
... [trial 11 | 2 sheep | 709,608 steps | ret(last 50)=+16.30 sr=6%]
... [trial 11 | 2 sheep | 759,608 steps | ret(last 50)=+17.33 sr=4%]
... [trial 11 | 2 sheep | 809,608 steps | ret(last 50)=+18.36 sr=2%]
... [trial 11 | 2 sheep | 859,608 steps | ret(last 50)=+19.78 sr=8%]
... [trial 11 | 2 sheep | 909,608 steps | ret(last 50)=+20.12 sr=14%]
... [trial 11 | 2 sheep | 959,608 steps | ret(last 50)=+18.93 sr=8%]
... [trial 11 | 2 sheep | 1,009,608 steps | ret(last 50)=+18.16 sr=2%]
... [trial 11 | eval n=1]
... [trial 11 | eval n=2]
... [trial 11 | eval n=3]
→ score=0.160 sr1=0.80 sr2=0.00 sr3=0.00 [310s]
[Trial 12] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 50.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 12 | 1 sheep | 50,000 steps | ret(last 32)=-42.77 sr=0%]
... [trial 12 | 1 sheep | 100,000 steps | ret(last 50)=-39.16 sr=2%]
... [trial 12 | 1 sheep | 150,000 steps | ret(last 50)=-35.02 sr=6%]
... [trial 12 | 1 sheep | 200,000 steps | ret(last 50)=-31.49 sr=4%]
... [trial 12 | 1 sheep | 250,000 steps | ret(last 50)=-8.31 sr=16%]
... [trial 12 | 1 sheep | 300,000 steps | ret(last 50)=+7.97 sr=36%]
... [trial 12 | 1 sheep | 350,000 steps | ret(last 50)=+11.77 sr=68%]
... [trial 12 | 1 sheep | 400,000 steps | ret(last 50)=+12.47 sr=74%]
... [trial 12 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 12 | 2 sheep | 459,608 steps | ret(last 34)=-9.76 sr=0%]
... [trial 12 | 2 sheep | 509,608 steps | ret(last 50)=-4.85 sr=0%]
... [trial 12 | 2 sheep | 559,608 steps | ret(last 50)=-2.81 sr=8%]
... [trial 12 | 2 sheep | 609,608 steps | ret(last 50)=+2.27 sr=10%]
... [trial 12 | 2 sheep | 659,608 steps | ret(last 50)=+1.66 sr=6%]
... [trial 12 | 2 sheep | 709,608 steps | ret(last 50)=+3.42 sr=4%]
... [trial 12 | 2 sheep | 759,608 steps | ret(last 50)=+4.08 sr=2%]
... [trial 12 | 2 sheep | 809,608 steps | ret(last 50)=+5.49 sr=2%]
... [trial 12 | 2 sheep | 859,608 steps | ret(last 50)=+7.12 sr=10%]
... [trial 12 | 2 sheep | 909,608 steps | ret(last 50)=+7.91 sr=6%]
... [trial 12 | 2 sheep | 959,608 steps | ret(last 50)=+6.87 sr=2%]
... [trial 12 | 2 sheep | 1,009,608 steps | ret(last 50)=+5.83 sr=2%]
... [trial 12 | eval n=1]
... [trial 12 | eval n=2]
... [trial 12 | eval n=3]
→ score=0.240 sr1=0.70 sr2=0.20 sr3=0.00 [330s]
[Trial 13] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 13 | 1 sheep | 50,000 steps | ret(last 34)=-31.15 sr=9%]
... [trial 13 | 1 sheep | 100,000 steps | ret(last 50)=-32.34 sr=4%]
... [trial 13 | 1 sheep | 150,000 steps | ret(last 50)=-33.16 sr=0%]
... [trial 13 | 1 sheep | 200,000 steps | ret(last 50)=-29.98 sr=6%]
... [trial 13 | 1 sheep | 250,000 steps | ret(last 50)=-28.64 sr=4%]
... [trial 13 | 1 sheep | 300,000 steps | ret(last 50)=-17.91 sr=14%]
... [trial 13 | 1 sheep | 350,000 steps | ret(last 50)=-15.27 sr=22%]
... [trial 13 | 1 sheep | 400,000 steps | ret(last 50)=-11.36 sr=16%]
... [trial 13 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 13 | 2 sheep | 459,608 steps | ret(last 34)=-16.78 sr=0%]
... [trial 13 | 2 sheep | 509,608 steps | ret(last 50)=-16.84 sr=2%]
... [trial 13 | 2 sheep | 559,608 steps | ret(last 50)=-14.28 sr=0%]
... [trial 13 | 2 sheep | 609,608 steps | ret(last 50)=-12.35 sr=6%]
... [trial 13 | 2 sheep | 659,608 steps | ret(last 50)=-14.50 sr=2%]
... [trial 13 | 2 sheep | 709,608 steps | ret(last 50)=-12.96 sr=2%]
... [trial 13 | 2 sheep | 759,608 steps | ret(last 50)=-9.86 sr=4%]
... [trial 13 | 2 sheep | 809,608 steps | ret(last 50)=-13.88 sr=2%]
... [trial 13 | 2 sheep | 859,608 steps | ret(last 50)=-14.76 sr=0%]
... [trial 13 | 2 sheep | 909,608 steps | ret(last 50)=-12.79 sr=0%]
... [trial 13 | 2 sheep | 959,608 steps | ret(last 50)=-12.54 sr=0%]
... [trial 13 | 2 sheep | 1,009,608 steps | ret(last 50)=-12.11 sr=8%]
... [trial 13 | eval n=1]
... [trial 13 | eval n=2]
... [trial 13 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [323s]
[Trial 14] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 14 | 1 sheep | 50,000 steps | ret(last 32)=-20.15 sr=9%]
... [trial 14 | 1 sheep | 100,000 steps | ret(last 50)=-15.28 sr=8%]
... [trial 14 | 1 sheep | 150,000 steps | ret(last 50)=-8.87 sr=26%]
... [trial 14 | 1 sheep | 200,000 steps | ret(last 50)=-9.94 sr=8%]
... [trial 14 | 1 sheep | 250,000 steps | ret(last 50)=-9.04 sr=8%]
... [trial 14 | 1 sheep | 300,000 steps | ret(last 50)=-7.40 sr=14%]
... [trial 14 | 1 sheep | 350,000 steps | ret(last 50)=+2.22 sr=50%]
... [trial 14 | 1 sheep | 400,000 steps | ret(last 50)=+4.06 sr=58%]
... [trial 14 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 14 | 2 sheep | 459,608 steps | ret(last 33)=-5.93 sr=3%]
... [trial 14 | 2 sheep | 509,608 steps | ret(last 50)=-6.85 sr=4%]
... [trial 14 | 2 sheep | 559,608 steps | ret(last 50)=-6.81 sr=6%]
... [trial 14 | 2 sheep | 609,608 steps | ret(last 50)=-4.80 sr=4%]
... [trial 14 | 2 sheep | 659,608 steps | ret(last 50)=-6.55 sr=4%]
... [trial 14 | 2 sheep | 709,608 steps | ret(last 50)=-4.81 sr=12%]
... [trial 14 | 2 sheep | 759,608 steps | ret(last 50)=-5.41 sr=10%]
... [trial 14 | 2 sheep | 809,608 steps | ret(last 50)=-0.00 sr=30%]
... [trial 14 | 2 sheep | 859,608 steps | ret(last 50)=+1.17 sr=26%]
... [trial 14 | 2 sheep | 909,608 steps | ret(last 50)=+0.17 sr=20%]
... [trial 14 | 2 sheep | 959,608 steps | ret(last 50)=-0.96 sr=18%]
... [trial 14 | 2 sheep | 1,009,608 steps | ret(last 50)=-1.33 sr=20%]
... [trial 14 | eval n=1]
... [trial 14 | eval n=2]
... [trial 14 | eval n=3]
→ score=0.350 sr1=1.00 sr2=0.30 sr3=0.00 [314s]
[Trial 15] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.05}
... [trial 15 | 1 sheep | 50,000 steps | ret(last 32)=-6.83 sr=3%]
... [trial 15 | 1 sheep | 100,000 steps | ret(last 50)=-7.59 sr=4%]
... [trial 15 | 1 sheep | 150,000 steps | ret(last 50)=-5.74 sr=6%]
... [trial 15 | 1 sheep | 200,000 steps | ret(last 50)=-5.92 sr=6%]
... [trial 15 | 1 sheep | 250,000 steps | ret(last 50)=+8.14 sr=22%]
... [trial 15 | 1 sheep | 300,000 steps | ret(last 50)=+15.51 sr=22%]
... [trial 15 | 1 sheep | 350,000 steps | ret(last 50)=+21.46 sr=20%]
... [trial 15 | 1 sheep | 400,000 steps | ret(last 50)=+22.52 sr=16%]
... [trial 15 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 15 | 2 sheep | 459,608 steps | ret(last 35)=+6.28 sr=0%]
... [trial 15 | 2 sheep | 509,608 steps | ret(last 50)=+13.19 sr=2%]
... [trial 15 | 2 sheep | 559,608 steps | ret(last 50)=+15.58 sr=4%]
... [trial 15 | 2 sheep | 609,608 steps | ret(last 50)=+18.78 sr=10%]
... [trial 15 | 2 sheep | 659,608 steps | ret(last 50)=+22.71 sr=10%]
... [trial 15 | 2 sheep | 709,608 steps | ret(last 50)=+23.95 sr=6%]
... [trial 15 | 2 sheep | 759,608 steps | ret(last 50)=+24.84 sr=14%]
... [trial 15 | 2 sheep | 809,608 steps | ret(last 50)=+24.00 sr=8%]
... [trial 15 | 2 sheep | 859,608 steps | ret(last 50)=+23.91 sr=2%]
... [trial 15 | 2 sheep | 909,608 steps | ret(last 50)=+23.73 sr=4%]
... [trial 15 | 2 sheep | 959,608 steps | ret(last 50)=+24.23 sr=4%]
... [trial 15 | 2 sheep | 1,009,608 steps | ret(last 50)=+24.77 sr=4%]
... [trial 15 | eval n=1]
... [trial 15 | eval n=2]
... [trial 15 | eval n=3]
→ score=0.140 sr1=0.70 sr2=0.00 sr3=0.00 [323s]
[Trial 16] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.01}
... [trial 16 | 1 sheep | 50,000 steps | ret(last 32)=-7.14 sr=9%]
... [trial 16 | 1 sheep | 100,000 steps | ret(last 50)=-5.58 sr=12%]
... [trial 16 | 1 sheep | 150,000 steps | ret(last 50)=+5.93 sr=26%]
... [trial 16 | 1 sheep | 200,000 steps | ret(last 50)=+15.53 sr=68%]
... [trial 16 | 1 sheep | 250,000 steps | ret(last 50)=+14.88 sr=56%]
... [trial 16 | 1 sheep | 300,000 steps | ret(last 50)=+13.86 sr=36%]
... [trial 16 | 1 sheep | 350,000 steps | ret(last 50)=+14.84 sr=54%]
... [trial 16 | 1 sheep | 400,000 steps | ret(last 50)=+15.15 sr=70%]
... [trial 16 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 16 | 2 sheep | 459,608 steps | ret(last 34)=-1.47 sr=6%]
... [trial 16 | 2 sheep | 509,608 steps | ret(last 50)=-1.63 sr=2%]
... [trial 16 | 2 sheep | 559,608 steps | ret(last 50)=-3.78 sr=2%]
... [trial 16 | 2 sheep | 609,608 steps | ret(last 50)=-2.17 sr=4%]
... [trial 16 | 2 sheep | 659,608 steps | ret(last 50)=+1.25 sr=6%]
... [trial 16 | 2 sheep | 709,608 steps | ret(last 50)=+0.28 sr=4%]
... [trial 16 | 2 sheep | 759,608 steps | ret(last 50)=+2.74 sr=4%]
... [trial 16 | 2 sheep | 809,608 steps | ret(last 50)=+7.19 sr=6%]
... [trial 16 | 2 sheep | 859,608 steps | ret(last 50)=+7.68 sr=4%]
... [trial 16 | 2 sheep | 909,608 steps | ret(last 50)=+2.38 sr=0%]
... [trial 16 | 2 sheep | 959,608 steps | ret(last 50)=+3.43 sr=0%]
... [trial 16 | 2 sheep | 1,009,608 steps | ret(last 50)=+11.11 sr=0%]
... [trial 16 | eval n=1]
... [trial 16 | eval n=2]
... [trial 16 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [312s]
[Trial 17] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 200.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': True, 'ent_coef': 0.05}
... [trial 17 | 1 sheep | 50,000 steps | ret(last 32)=+2.15 sr=6%]
... [trial 17 | 1 sheep | 100,000 steps | ret(last 50)=-0.51 sr=2%]
... [trial 17 | 1 sheep | 150,000 steps | ret(last 50)=+0.84 sr=6%]
... [trial 17 | 1 sheep | 200,000 steps | ret(last 50)=+2.96 sr=6%]
... [trial 17 | 1 sheep | 250,000 steps | ret(last 50)=+3.04 sr=4%]
... [trial 17 | 1 sheep | 300,000 steps | ret(last 50)=+10.58 sr=10%]
... [trial 17 | 1 sheep | 350,000 steps | ret(last 50)=+21.95 sr=36%]
... [trial 17 | 1 sheep | 400,000 steps | ret(last 50)=+19.20 sr=16%]
... [trial 17 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 17 | 2 sheep | 459,608 steps | ret(last 32)=+10.27 sr=16%]
... [trial 17 | 2 sheep | 509,608 steps | ret(last 50)=+12.25 sr=6%]
... [trial 17 | 2 sheep | 559,608 steps | ret(last 50)=+12.94 sr=6%]
... [trial 17 | 2 sheep | 609,608 steps | ret(last 50)=+11.82 sr=4%]
... [trial 17 | 2 sheep | 659,608 steps | ret(last 50)=+13.45 sr=4%]
... [trial 17 | 2 sheep | 709,608 steps | ret(last 50)=+13.03 sr=4%]
... [trial 17 | 2 sheep | 759,608 steps | ret(last 50)=+10.69 sr=6%]
... [trial 17 | 2 sheep | 809,608 steps | ret(last 50)=+7.79 sr=6%]
... [trial 17 | 2 sheep | 859,608 steps | ret(last 50)=+12.16 sr=16%]
... [trial 17 | 2 sheep | 909,608 steps | ret(last 50)=+11.75 sr=12%]
... [trial 17 | 2 sheep | 959,608 steps | ret(last 50)=+13.65 sr=16%]
... [trial 17 | 2 sheep | 1,009,608 steps | ret(last 50)=+12.43 sr=10%]
... [trial 17 | eval n=1]
... [trial 17 | eval n=2]
... [trial 17 | eval n=3]
→ score=0.240 sr1=0.70 sr2=0.20 sr3=0.00 [304s]
[Trial 18] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 18 | 1 sheep | 50,000 steps | ret(last 32)=-3.63 sr=3%]
... [trial 18 | 1 sheep | 100,000 steps | ret(last 50)=-2.28 sr=12%]
... [trial 18 | 1 sheep | 150,000 steps | ret(last 50)=-3.15 sr=10%]
... [trial 18 | 1 sheep | 200,000 steps | ret(last 50)=-3.31 sr=6%]
... [trial 18 | 1 sheep | 250,000 steps | ret(last 50)=-3.23 sr=2%]
... [trial 18 | 1 sheep | 300,000 steps | ret(last 50)=+3.55 sr=22%]
... [trial 18 | 1 sheep | 350,000 steps | ret(last 50)=+8.15 sr=28%]
... [trial 18 | 1 sheep | 400,000 steps | ret(last 50)=+10.56 sr=18%]
... [trial 18 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 18 | 2 sheep | 459,608 steps | ret(last 34)=+3.80 sr=0%]
... [trial 18 | 2 sheep | 509,608 steps | ret(last 50)=+7.30 sr=4%]
... [trial 18 | 2 sheep | 559,608 steps | ret(last 50)=+9.61 sr=10%]
... [trial 18 | 2 sheep | 609,608 steps | ret(last 50)=+7.70 sr=8%]
... [trial 18 | 2 sheep | 659,608 steps | ret(last 50)=+6.01 sr=2%]
... [trial 18 | 2 sheep | 709,608 steps | ret(last 50)=+8.28 sr=6%]
... [trial 18 | 2 sheep | 759,608 steps | ret(last 50)=+6.74 sr=0%]
... [trial 18 | 2 sheep | 809,608 steps | ret(last 50)=+10.61 sr=0%]
... [trial 18 | 2 sheep | 859,608 steps | ret(last 50)=+12.20 sr=0%]
... [trial 18 | 2 sheep | 909,608 steps | ret(last 50)=+11.25 sr=2%]
... [trial 18 | 2 sheep | 959,608 steps | ret(last 50)=+13.58 sr=4%]
... [trial 18 | 2 sheep | 1,009,608 steps | ret(last 50)=+16.61 sr=20%]
... [trial 18 | eval n=1]
... [trial 18 | eval n=2]
... [trial 18 | eval n=3]
→ score=0.160 sr1=0.30 sr2=0.20 sr3=0.00 [316s]
[Trial 19] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 19 | 1 sheep | 50,000 steps | ret(last 32)=-36.89 sr=3%]
... [trial 19 | 1 sheep | 100,000 steps | ret(last 50)=-30.93 sr=4%]
... [trial 19 | 1 sheep | 150,000 steps | ret(last 50)=-28.35 sr=12%]
... [trial 19 | 1 sheep | 200,000 steps | ret(last 50)=-30.73 sr=8%]
... [trial 19 | 1 sheep | 250,000 steps | ret(last 50)=-29.54 sr=4%]
... [trial 19 | 1 sheep | 300,000 steps | ret(last 50)=-20.15 sr=20%]
... [trial 19 | 1 sheep | 350,000 steps | ret(last 50)=-0.07 sr=68%]
... [trial 19 | 1 sheep | 400,000 steps | ret(last 50)=+1.66 sr=52%]
... [trial 19 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 19 | 2 sheep | 459,608 steps | ret(last 36)=-12.82 sr=19%]
... [trial 19 | 2 sheep | 509,608 steps | ret(last 50)=-20.66 sr=0%]
... [trial 19 | 2 sheep | 559,608 steps | ret(last 50)=-16.54 sr=4%]
... [trial 19 | 2 sheep | 609,608 steps | ret(last 50)=-17.11 sr=4%]
... [trial 19 | 2 sheep | 659,608 steps | ret(last 50)=-19.32 sr=0%]
... [trial 19 | 2 sheep | 709,608 steps | ret(last 50)=-16.20 sr=0%]
... [trial 19 | 2 sheep | 759,608 steps | ret(last 50)=-13.12 sr=2%]
... [trial 19 | 2 sheep | 809,608 steps | ret(last 50)=-17.18 sr=4%]
... [trial 19 | 2 sheep | 859,608 steps | ret(last 50)=-18.16 sr=2%]
... [trial 19 | 2 sheep | 909,608 steps | ret(last 50)=-18.12 sr=4%]
... [trial 19 | 2 sheep | 959,608 steps | ret(last 50)=-17.79 sr=2%]
... [trial 19 | 2 sheep | 1,009,608 steps | ret(last 50)=-17.58 sr=0%]
... [trial 19 | eval n=1]
... [trial 19 | eval n=2]
... [trial 19 | eval n=3]
→ score=0.160 sr1=0.80 sr2=0.00 sr3=0.00 [318s]
[Trial 20] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 20 | 1 sheep | 50,000 steps | ret(last 33)=-15.83 sr=9%]
... [trial 20 | 1 sheep | 100,000 steps | ret(last 50)=-18.74 sr=10%]
... [trial 20 | 1 sheep | 150,000 steps | ret(last 50)=-22.88 sr=6%]
... [trial 20 | 1 sheep | 200,000 steps | ret(last 50)=-23.86 sr=4%]
... [trial 20 | 1 sheep | 250,000 steps | ret(last 50)=-21.10 sr=6%]
... [trial 20 | 1 sheep | 300,000 steps | ret(last 50)=-18.42 sr=6%]
... [trial 20 | 1 sheep | 350,000 steps | ret(last 50)=+1.74 sr=14%]
... [trial 20 | 1 sheep | 400,000 steps | ret(last 50)=+7.62 sr=34%]
... [trial 20 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 20 | 2 sheep | 459,608 steps | ret(last 34)=-2.63 sr=3%]
... [trial 20 | 2 sheep | 509,608 steps | ret(last 50)=+1.10 sr=2%]
... [trial 20 | 2 sheep | 559,608 steps | ret(last 50)=+5.57 sr=4%]
... [trial 20 | 2 sheep | 609,608 steps | ret(last 50)=+8.54 sr=8%]
... [trial 20 | 2 sheep | 659,608 steps | ret(last 50)=+12.02 sr=8%]
... [trial 20 | 2 sheep | 709,608 steps | ret(last 50)=+11.28 sr=4%]
... [trial 20 | 2 sheep | 759,608 steps | ret(last 50)=+11.45 sr=2%]
... [trial 20 | 2 sheep | 809,608 steps | ret(last 50)=+9.52 sr=0%]
... [trial 20 | 2 sheep | 859,608 steps | ret(last 50)=+9.07 sr=2%]
... [trial 20 | 2 sheep | 909,608 steps | ret(last 50)=+12.06 sr=8%]
... [trial 20 | 2 sheep | 959,608 steps | ret(last 50)=+12.77 sr=8%]
... [trial 20 | 2 sheep | 1,009,608 steps | ret(last 50)=+11.55 sr=2%]
... [trial 20 | eval n=1]
... [trial 20 | eval n=2]
... [trial 20 | eval n=3]
→ score=0.130 sr1=0.40 sr2=0.10 sr3=0.00 [315s]
[Trial 21] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 21 | 1 sheep | 50,000 steps | ret(last 32)=-14.94 sr=6%]
... [trial 21 | 1 sheep | 100,000 steps | ret(last 50)=-12.47 sr=4%]
... [trial 21 | 1 sheep | 150,000 steps | ret(last 50)=-12.65 sr=6%]
... [trial 21 | 1 sheep | 200,000 steps | ret(last 50)=-12.44 sr=2%]
... [trial 21 | 1 sheep | 250,000 steps | ret(last 50)=-12.95 sr=6%]
... [trial 21 | 1 sheep | 300,000 steps | ret(last 50)=-13.04 sr=6%]
... [trial 21 | 1 sheep | 350,000 steps | ret(last 50)=-5.14 sr=8%]
... [trial 21 | 1 sheep | 400,000 steps | ret(last 50)=-0.46 sr=8%]
... [trial 21 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 21 | 2 sheep | 459,608 steps | ret(last 33)=-7.10 sr=0%]
... [trial 21 | 2 sheep | 509,608 steps | ret(last 50)=-8.26 sr=0%]
... [trial 21 | 2 sheep | 559,608 steps | ret(last 50)=-6.17 sr=4%]
... [trial 21 | 2 sheep | 609,608 steps | ret(last 50)=-4.23 sr=4%]
... [trial 21 | 2 sheep | 659,608 steps | ret(last 50)=-5.62 sr=0%]
... [trial 21 | 2 sheep | 709,608 steps | ret(last 50)=-3.72 sr=0%]
... [trial 21 | 2 sheep | 759,608 steps | ret(last 50)=-2.06 sr=0%]
... [trial 21 | 2 sheep | 809,608 steps | ret(last 50)=-1.23 sr=0%]
... [trial 21 | 2 sheep | 859,608 steps | ret(last 50)=-0.14 sr=0%]
... [trial 21 | 2 sheep | 909,608 steps | ret(last 50)=+1.30 sr=2%]
... [trial 21 | 2 sheep | 959,608 steps | ret(last 50)=+0.64 sr=2%]
... [trial 21 | 2 sheep | 1,009,608 steps | ret(last 50)=+2.62 sr=6%]
... [trial 21 | eval n=1]
... [trial 21 | eval n=2]
... [trial 21 | eval n=3]
→ score=0.050 sr1=0.00 sr2=0.10 sr3=0.00 [310s]
[Trial 22] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 22 | 1 sheep | 50,000 steps | ret(last 32)=-11.10 sr=6%]
... [trial 22 | 1 sheep | 100,000 steps | ret(last 50)=-10.61 sr=8%]
... [trial 22 | 1 sheep | 150,000 steps | ret(last 50)=-11.16 sr=4%]
... [trial 22 | 1 sheep | 200,000 steps | ret(last 50)=-11.15 sr=4%]
... [trial 22 | 1 sheep | 250,000 steps | ret(last 50)=-10.56 sr=6%]
... [trial 22 | 1 sheep | 300,000 steps | ret(last 50)=-14.90 sr=0%]
... [trial 22 | 1 sheep | 350,000 steps | ret(last 50)=-5.11 sr=14%]
... [trial 22 | 1 sheep | 400,000 steps | ret(last 50)=+2.22 sr=24%]
... [trial 22 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 22 | 2 sheep | 459,608 steps | ret(last 35)=-4.69 sr=6%]
... [trial 22 | 2 sheep | 509,608 steps | ret(last 50)=-3.17 sr=0%]
... [trial 22 | 2 sheep | 559,608 steps | ret(last 50)=+2.18 sr=2%]
... [trial 22 | 2 sheep | 609,608 steps | ret(last 50)=+4.53 sr=8%]
... [trial 22 | 2 sheep | 659,608 steps | ret(last 50)=+4.97 sr=10%]
... [trial 22 | 2 sheep | 709,608 steps | ret(last 50)=+5.06 sr=8%]
... [trial 22 | 2 sheep | 759,608 steps | ret(last 50)=+6.04 sr=4%]
... [trial 22 | 2 sheep | 809,608 steps | ret(last 50)=+5.95 sr=4%]
... [trial 22 | 2 sheep | 859,608 steps | ret(last 50)=+3.34 sr=2%]
... [trial 22 | 2 sheep | 909,608 steps | ret(last 50)=+6.80 sr=8%]
... [trial 22 | 2 sheep | 959,608 steps | ret(last 50)=+4.13 sr=8%]
... [trial 22 | 2 sheep | 1,009,608 steps | ret(last 50)=+4.17 sr=2%]
... [trial 22 | eval n=1]
... [trial 22 | eval n=2]
... [trial 22 | eval n=3]
→ score=0.110 sr1=0.30 sr2=0.10 sr3=0.00 [316s]
[Trial 23] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.05}
... [trial 23 | 1 sheep | 50,000 steps | ret(last 32)=-22.59 sr=9%]
... [trial 23 | 1 sheep | 100,000 steps | ret(last 50)=-21.14 sr=6%]
... [trial 23 | 1 sheep | 150,000 steps | ret(last 50)=-20.75 sr=6%]
... [trial 23 | 1 sheep | 200,000 steps | ret(last 50)=-20.37 sr=8%]
... [trial 23 | 1 sheep | 250,000 steps | ret(last 50)=-5.04 sr=18%]
... [trial 23 | 1 sheep | 300,000 steps | ret(last 50)=+7.25 sr=12%]
... [trial 23 | 1 sheep | 350,000 steps | ret(last 50)=+11.34 sr=32%]
... [trial 23 | 1 sheep | 400,000 steps | ret(last 50)=+13.02 sr=24%]
... [trial 23 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 23 | 2 sheep | 459,608 steps | ret(last 32)=+0.29 sr=3%]
... [trial 23 | 2 sheep | 509,608 steps | ret(last 50)=-0.39 sr=4%]
... [trial 23 | 2 sheep | 559,608 steps | ret(last 50)=+6.56 sr=2%]
... [trial 23 | 2 sheep | 609,608 steps | ret(last 50)=+10.45 sr=2%]
... [trial 23 | 2 sheep | 659,608 steps | ret(last 50)=+9.75 sr=2%]
... [trial 23 | 2 sheep | 709,608 steps | ret(last 50)=+7.98 sr=6%]
... [trial 23 | 2 sheep | 759,608 steps | ret(last 50)=+9.20 sr=4%]
... [trial 23 | 2 sheep | 809,608 steps | ret(last 50)=+11.03 sr=6%]
... [trial 23 | 2 sheep | 859,608 steps | ret(last 50)=+12.53 sr=6%]
... [trial 23 | 2 sheep | 909,608 steps | ret(last 50)=+10.86 sr=6%]
... [trial 23 | 2 sheep | 959,608 steps | ret(last 50)=+13.16 sr=14%]
... [trial 23 | 2 sheep | 1,009,608 steps | ret(last 50)=+12.36 sr=12%]
... [trial 23 | eval n=1]
... [trial 23 | eval n=2]
... [trial 23 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [472s]
[Trial 24] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 200.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.01}
... [trial 24 | 1 sheep | 50,000 steps | ret(last 32)=-1.97 sr=0%]
... [trial 24 | 1 sheep | 100,000 steps | ret(last 50)=-1.86 sr=2%]
... [trial 24 | 1 sheep | 150,000 steps | ret(last 50)=-2.97 sr=4%]
... [trial 24 | 1 sheep | 200,000 steps | ret(last 50)=-0.45 sr=8%]
... [trial 24 | 1 sheep | 250,000 steps | ret(last 50)=-1.73 sr=4%]
... [trial 24 | 1 sheep | 300,000 steps | ret(last 50)=+0.64 sr=4%]
... [trial 24 | 1 sheep | 350,000 steps | ret(last 50)=+1.35 sr=2%]
... [trial 24 | 1 sheep | 400,000 steps | ret(last 50)=+0.95 sr=4%]
... [trial 24 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 24 | 2 sheep | 459,608 steps | ret(last 33)=+1.34 sr=0%]
... [trial 24 | 2 sheep | 509,608 steps | ret(last 50)=+1.48 sr=0%]
... [trial 24 | 2 sheep | 559,608 steps | ret(last 50)=+6.05 sr=0%]
... [trial 24 | 2 sheep | 609,608 steps | ret(last 50)=+3.58 sr=0%]
... [trial 24 | 2 sheep | 659,608 steps | ret(last 50)=+2.33 sr=0%]
... [trial 24 | 2 sheep | 709,608 steps | ret(last 50)=+4.05 sr=2%]
... [trial 24 | 2 sheep | 759,608 steps | ret(last 50)=+0.93 sr=0%]
... [trial 24 | 2 sheep | 809,608 steps | ret(last 50)=-0.39 sr=0%]
... [trial 24 | 2 sheep | 859,608 steps | ret(last 50)=-2.68 sr=0%]
... [trial 24 | 2 sheep | 909,608 steps | ret(last 50)=+0.90 sr=0%]
... [trial 24 | 2 sheep | 959,608 steps | ret(last 50)=+2.63 sr=0%]
... [trial 24 | 2 sheep | 1,009,608 steps | ret(last 50)=+2.88 sr=0%]
... [trial 24 | eval n=1]
... [trial 24 | eval n=2]
... [trial 24 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [335s]
[Trial 25] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': True, 'ent_coef': 0.02}
... [trial 25 | 1 sheep | 50,000 steps | ret(last 32)=-56.03 sr=3%]
... [trial 25 | 1 sheep | 100,000 steps | ret(last 50)=-53.61 sr=4%]
... [trial 25 | 1 sheep | 150,000 steps | ret(last 50)=-54.50 sr=4%]
... [trial 25 | 1 sheep | 200,000 steps | ret(last 50)=-57.55 sr=4%]
... [trial 25 | 1 sheep | 250,000 steps | ret(last 50)=-54.77 sr=8%]
... [trial 25 | 1 sheep | 300,000 steps | ret(last 50)=-55.53 sr=4%]
... [trial 25 | 1 sheep | 350,000 steps | ret(last 50)=-55.26 sr=4%]
... [trial 25 | 1 sheep | 400,000 steps | ret(last 50)=-56.11 sr=4%]
... [trial 25 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 25 | 2 sheep | 459,608 steps | ret(last 32)=-48.36 sr=0%]
... [trial 25 | 2 sheep | 509,608 steps | ret(last 50)=-54.87 sr=0%]
... [trial 25 | 2 sheep | 559,608 steps | ret(last 50)=-56.08 sr=0%]
... [trial 25 | 2 sheep | 609,608 steps | ret(last 50)=-54.86 sr=0%]
... [trial 25 | 2 sheep | 659,608 steps | ret(last 50)=-50.62 sr=0%]
... [trial 25 | 2 sheep | 709,608 steps | ret(last 50)=-49.92 sr=0%]
... [trial 25 | 2 sheep | 759,608 steps | ret(last 50)=-50.11 sr=0%]
... [trial 25 | 2 sheep | 809,608 steps | ret(last 50)=-51.41 sr=0%]
... [trial 25 | 2 sheep | 859,608 steps | ret(last 50)=-51.02 sr=0%]
... [trial 25 | 2 sheep | 909,608 steps | ret(last 50)=-50.80 sr=0%]
... [trial 25 | 2 sheep | 959,608 steps | ret(last 50)=-50.01 sr=0%]
... [trial 25 | 2 sheep | 1,009,608 steps | ret(last 50)=-49.71 sr=0%]
... [trial 25 | eval n=1]
... [trial 25 | eval n=2]
... [trial 25 | eval n=3]
→ score=0.000 sr1=0.00 sr2=0.00 sr3=0.00 [306s]
============================================================================================
LEADERBOARD
============================================================================================
rank score sr1 sr2 sr3 config
----------------------------------------------------------------------------------------
1 0.350 1.00 0.30 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.0 W_PEN_BONUS=5.0 W_STEP_COST=0.02 W_COMPLETE=200.0 W_COMPACT=1.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.02
2 0.270 0.70 0.20 0.10 W_PER_SHEEP=6.0 W_ALIGN=0.025 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=50.0 W_COMPACT=3.0 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.01
3 0.240 0.70 0.20 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=5.0 W_STEP_COST=0.05 W_COMPLETE=50.0 W_COMPACT=3.0 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.02
4 0.240 0.70 0.20 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.1 W_PEN_BONUS=5.0 W_STEP_COST=0.005 W_COMPLETE=200.0 W_COMPACT=0.0 ALIGN_SHAPE=near ALIGN_GATED=True ent_coef=0.05
5 0.200 1.00 0.00 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.1 W_PEN_BONUS=5.0 W_STEP_COST=0.02 W_COMPLETE=50.0 W_COMPACT=3.0 ALIGN_SHAPE=near ALIGN_GATED=True ent_coef=0.005
6 0.190 0.70 0.10 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.0 W_PEN_BONUS=20.0 W_STEP_COST=0.02 W_COMPLETE=50.0 W_COMPACT=0.0 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.05
7 0.160 0.80 0.00 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.025 W_PEN_BONUS=20.0 W_STEP_COST=0.05 W_COMPLETE=200.0 W_COMPACT=3.0 ALIGN_SHAPE=standoff ALIGN_GATED=True ent_coef=0.01
8 0.160 0.80 0.00 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.1 W_PEN_BONUS=20.0 W_STEP_COST=0.02 W_COMPLETE=200.0 W_COMPACT=0.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.02
9 0.160 0.80 0.00 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.025 W_PEN_BONUS=10.0 W_STEP_COST=0.05 W_COMPLETE=50.0 W_COMPACT=0.0 ALIGN_SHAPE=standoff ALIGN_GATED=True ent_coef=0.005
10 0.160 0.30 0.20 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.025 W_PEN_BONUS=10.0 W_STEP_COST=0.005 W_COMPLETE=100.0 W_COMPACT=1.5 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.02
11 0.150 0.50 0.10 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=100.0 W_COMPACT=3.0 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.005
12 0.140 0.70 0.00 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=100.0 W_COMPACT=1.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.05
13 0.130 0.40 0.10 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=20.0 W_STEP_COST=0.05 W_COMPLETE=100.0 W_COMPACT=1.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.02
14 0.110 0.30 0.10 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.025 W_PEN_BONUS=5.0 W_STEP_COST=0.05 W_COMPLETE=100.0 W_COMPACT=0.0 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.005
15 0.110 0.30 0.10 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.05 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=100.0 W_COMPACT=3.0 ALIGN_SHAPE=standoff ALIGN_GATED=True ent_coef=0.005
Best config saved to runs/sweep_20260425_124630/best.json
Total trials: 25 (25 successful, 0 failed)
Total time: 2.28h

Some files were not shown because too many files have changed in this diff Show More