Mimics webots approach better + debug. Lucky number

This commit is contained in:
Johnny Fernandes
2026-04-26 18:55:53 +01:00
parent 1af7d03ce2
commit deeae3193e
142 changed files with 138 additions and 31306 deletions
+11
View File
@@ -8,3 +8,14 @@ __pycache__/
# Training
training/**/events.out.tfevents.*
training/**/checkpoints/
training/runs/**
!training/runs/.gitkeep
# Controller runtime artefacts
controllers/shepherd_dog_rl/debug*.csv
controllers/shepherd_dog_rl/debug_out*/
controllers/shepherd_dog_rl/final_model*.zip
controllers/shepherd_dog_rl/vecnorm*.pkl
# Optional env parity debug
dog_debug.csv
File diff suppressed because it is too large Load Diff
Binary file not shown.

Before

Width:  |  Height:  |  Size: 164 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 74 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 220 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 266 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+126 -30
View File
@@ -19,6 +19,7 @@ Permutation-invariant by design: curriculum stages share the same obs dim
so VecNormalize statistics transfer as n_sheep advances.
"""
import csv
import numpy as np
import gymnasium as gym
from gymnasium import spaces
@@ -31,7 +32,8 @@ class HerdingEnv(gym.Env):
# World constants — must match Webots world file
# -----------------------------------------------------------------------
MAX_SHEEP = 10
FIELD = 15.0 # half-size; positions ∈ [-FIELD, FIELD]
FIELD = 15.0 # field wall geometry in world file
SHEEP_WALL_INNER = 14.5 # sheep.py wall checks use ±14.5
PEN_X = (10.0, 13.0)
PEN_Y = (-15.0, -8.0)
PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
@@ -41,17 +43,22 @@ class HerdingEnv(gym.Env):
# Dynamics — calibrated to match Webots robot specs
# -----------------------------------------------------------------------
DOG_SPEED = 2.5 # m/s
SHEEP_FLEE_V = 0.65 # m/s
SHEEP_WANDER_V = 0.20 # m/s
SHEEP_FLEE_V = 0.62 # m/s (20 rad/s * 0.031 m wheel radius in sheep.py)
SHEEP_WANDER_V = 0.093 # m/s (3 rad/s * 0.031 m wheel radius in sheep.py)
DT = 0.1 # seconds per step
# Wheeled dog dynamics — mirror the Webots controller's drive():
# forward speed gated by cos(heading_error); turn rate proportional to
# error. Without this, the env treats the dog as a particle that can
# change direction instantly, producing policies that bang-bang and don't
# transfer to the wheeled Webots robot.
DOG_K_TURN = 4.0 # rad/s per rad (heading-error gain)
DOG_MAX_TURN_RATE = 6.0 # rad/s (cap on turn rate)
# Differential-drive dog dynamics — mirrors shepherd_dog_rl.py drive():
# speed_ms = ||a|| * DOG_SPEED
# err = wrap(target_heading - heading)
# fwd_ms = speed_ms * max(0, cos(err))
# fwd_rad = fwd_ms / DOG_WHEEL_R
# turn = DOG_K_TURN * err
# l = clamp(fwd_rad - turn), r = clamp(fwd_rad + turn)
# Then integrated as unicycle kinematics using wheel geometry.
DOG_K_TURN = 4.0 # rad/s per rad (matches Webots controller)
DOG_WHEEL_R = 0.038 # m (ShepherdDog.proto wheel radius)
DOG_AXLE_TRACK = 0.28 # m (wheel anchors at y=±0.14 in proto)
DOG_MOTOR_MAX = 70.0 # rad/s (ShepherdDog.proto motor maxVelocity)
DOG_STOP_THRESHOLD = 0.05 # ||action|| below this → dog stops in place
# Boid parameters — identical to sheep.py
@@ -135,6 +142,15 @@ class HerdingEnv(gym.Env):
self.wander_ang = np.zeros(self.MAX_SHEEP, dtype=np.float32)
self._fig = None
# Differential-drive debug CSV for sim/Webots parity checks.
# Always on by design.
self._dog_debug_file = open("dog_debug.csv", "w", newline="")
self._dog_debug_writer = csv.writer(self._dog_debug_file)
self._dog_debug_writer.writerow([
"step", "act_x", "act_y", "act_mag", "heading", "target_heading",
"heading_err", "fwd_speed", "left_w", "right_w", "v", "w",
"dog_x", "dog_y",
])
# ------------------------------------------------------------------
# Curriculum interface
@@ -234,12 +250,19 @@ class HerdingEnv(gym.Env):
act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
old_dog = self.dog_pos.copy()
dog_dbg = {
"target_heading": float(self.dog_heading),
"err": 0.0,
"fwd_speed": 0.0,
"left_w": 0.0,
"right_w": 0.0,
"v": 0.0,
"w": 0.0,
}
# Wheeled-dog kinematics — mirrors the Webots controller's drive():
# interpret (vx, vy) as a desired velocity vector in world frame; the
# dog turns toward it at a limited rate, and forward speed is gated
# by cos(heading_error). Bang-bang policies still produce smooth
# motion (the dog can't sidestep — it has to turn first).
# Differential-drive kinematics — mirrors Webots drive():
# action -> desired heading/speed -> wheel angular velocities (with
# saturation) -> body linear/angular velocity via wheel geometry.
act_mag = float(np.linalg.norm(act))
if act_mag < self.DOG_STOP_THRESHOLD:
# Below threshold the Webots dog stops; treat the same way here.
@@ -249,19 +272,36 @@ class HerdingEnv(gym.Env):
err = target_heading - self.dog_heading
# Wrap to (-pi, pi]
err = (err + np.pi) % (2 * np.pi) - np.pi
turn_rate = np.clip(self.DOG_K_TURN * err,
-self.DOG_MAX_TURN_RATE,
self.DOG_MAX_TURN_RATE)
self.dog_heading = float(
((self.dog_heading + turn_rate * self.DT) + np.pi)
% (2 * np.pi) - np.pi
)
target_speed = act_mag * self.DOG_SPEED
fwd_speed = target_speed * max(0.0, float(np.cos(err)))
step_vec = np.array([np.cos(self.dog_heading),
np.sin(self.dog_heading)], dtype=np.float32)
fwd_rad = fwd_speed / self.DOG_WHEEL_R
turn = self.DOG_K_TURN * err
left_w = np.clip(fwd_rad - turn, -self.DOG_MOTOR_MAX, self.DOG_MOTOR_MAX)
right_w = np.clip(fwd_rad + turn, -self.DOG_MOTOR_MAX, self.DOG_MOTOR_MAX)
v = self.DOG_WHEEL_R * 0.5 * (right_w + left_w)
w = (self.DOG_WHEEL_R / self.DOG_AXLE_TRACK) * (right_w - left_w)
dog_dbg.update({
"target_heading": target_heading,
"err": float(err),
"fwd_speed": float(fwd_speed),
"left_w": float(left_w),
"right_w": float(right_w),
"v": float(v),
"w": float(w),
})
self.dog_heading = float(
((self.dog_heading + w * self.DT) + np.pi) % (2 * np.pi) - np.pi
)
step_vec = np.array(
[np.cos(self.dog_heading), np.sin(self.dog_heading)],
dtype=np.float32
)
new_dog = np.clip(
self.dog_pos + step_vec * fwd_speed * self.DT,
self.dog_pos + step_vec * v * self.DT,
-self.FIELD, self.FIELD,
)
@@ -281,8 +321,6 @@ class HerdingEnv(gym.Env):
self.dog_pos = new_dog.astype(np.float32)
for i in range(self.n_sheep):
if self.penned[i]:
continue
self.sheep_pos[i] = self._step_sheep(i)
if self._in_pen(self.sheep_pos[i]):
self.penned[i] = True
@@ -295,7 +333,18 @@ class HerdingEnv(gym.Env):
terminated = n_penned == self.n_sheep
truncated = self._step_count >= self.max_steps
info = {"n_penned": n_penned, "n_sheep": self.n_sheep,
"rcomps": rcomps}
"rcomps": rcomps, "dog_dyn": dog_dbg}
self._dog_debug_writer.writerow([
self._step_count,
float(act[0]), float(act[1]), act_mag,
float(self.dog_heading), dog_dbg["target_heading"], dog_dbg["err"],
dog_dbg["fwd_speed"], dog_dbg["left_w"], dog_dbg["right_w"],
dog_dbg["v"], dog_dbg["w"],
float(self.dog_pos[0]), float(self.dog_pos[1]),
])
if self._step_count % 200 == 0:
self._dog_debug_file.flush()
if self.render_mode == "human":
self.render()
@@ -357,6 +406,7 @@ class HerdingEnv(gym.Env):
import matplotlib.pyplot as plt
plt.close(self._fig)
self._fig = None
self._dog_debug_file.close()
# ------------------------------------------------------------------
# Internals
@@ -529,6 +579,41 @@ class HerdingEnv(gym.Env):
old_pos = self.sheep_pos[i].copy() # saved for pen wall collision check
pos = old_pos.copy()
fx, fy = 0.0, 0.0
if self.penned[i]:
# Webots latch behavior: once in pen, sheep keep moving under
# confinement + penned-sheep separation + wander.
pm = 0.8 # PEN_MARGIN in sheep.py
px0, px1 = self.PEN_X
py0, py1 = self.PEN_Y
x, y = float(pos[0]), float(pos[1])
if x < px0 + pm: fx += ((px0 + pm - x) / pm) * 15.0
if x > px1 - pm: fx -= ((x - (px1 - pm)) / pm) * 15.0
if y < py0 + pm: fy += ((py0 + pm - y) / pm) * 15.0
if y > py1 - pm: fy -= ((y - (py1 - pm)) / pm) * 15.0
for j in range(self.n_sheep):
if j == i or not self.penned[j]:
continue
dv = self.sheep_pos[j] - pos
dj = float(np.linalg.norm(dv))
if 0.05 < dj < self.SEPARATION_DIST:
push = (self.SEPARATION_DIST - dj) / dj
fx -= (dv[0] / dj) * push * 2.5
fy -= (dv[1] / dj) * push * 2.5
if self.np_random.random() < 0.02:
self.wander_ang[i] += float(self.np_random.uniform(-0.6, 0.6))
fx += float(np.cos(self.wander_ang[i])) * 0.5
fy += float(np.sin(self.wander_ang[i])) * 0.5
force = np.array([fx, fy], dtype=np.float32)
mag = float(np.linalg.norm(force))
if mag > 0.01:
speed = min(self.SHEEP_FLEE_V, mag * 0.3)
pos = np.clip(pos + (force / mag) * speed * self.DT,
-self.FIELD, self.FIELD)
return pos.astype(np.float32)
fleeing = False
# Flee from dog — quadratic ramp
@@ -536,11 +621,19 @@ class HerdingEnv(gym.Env):
dist = float(np.linalg.norm(diff))
if 0.01 < dist < self.FLEE_DIST:
t = 1.0 - dist / self.FLEE_DIST
s = t * t * 5.0
s = t * t * 20.0
fx -= (diff[0] / dist) * s
fy -= (diff[1] / dist) * s
fleeing = True
# Repel unpenned sheep from pen side-wall exteriors (sheep.py PEN_EXT_MARGIN).
if self.PEN_Y[0] < pos[1] < self.PEN_Y[1]:
pem = 0.8
if self.PEN_X[0] - pem < pos[0] < self.PEN_X[0]:
fx -= ((pos[0] - (self.PEN_X[0] - pem)) / pem) * 6.0
if self.PEN_X[1] < pos[0] < self.PEN_X[1] + pem:
fx += ((self.PEN_X[1] + pem - pos[0]) / pem) * 6.0
# Separation (inverse-distance) + Cohesion
cx, cy, cn = 0.0, 0.0, 0
for j in range(self.n_sheep):
@@ -562,7 +655,7 @@ class HerdingEnv(gym.Env):
fy += (cy / cn - pos[1]) * w
# Wall avoidance
m, F = self.WALL_MARGIN, self.FIELD
m, F = self.WALL_MARGIN, self.SHEEP_WALL_INNER
if pos[0] < -F + m: fx += ((-F + m - pos[0]) / m) * 6.0
if pos[0] > F - m: fx -= ((pos[0] - (F - m)) / m) * 6.0
if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
@@ -609,5 +702,8 @@ class HerdingEnv(gym.Env):
# Block crossing through east wall from outside
if old_pos[0] > px1 >= pos[0] and py0 < pos[1] < py1:
pos = np.array([px1 + 1e-3, pos[1]], dtype=np.float32)
# Block crossing through south wall from outside
if old_pos[1] < py0 <= pos[1] and px0 < pos[0] < px1:
pos = np.array([pos[0], py0 - 1e-3], dtype=np.float32)
return pos.astype(np.float32)
+1
View File
@@ -0,0 +1 @@
-242
View File
@@ -1,242 +0,0 @@
Config loaded from config.json
Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.01, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
Run dir: runs/v1
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [1 sheep | 100,000 steps | ret(last 40)=-19.52 win_sr=2% cum_sr=2%]
... [1 sheep | 200,000 steps | ret(last 50)=-21.11 win_sr=4% cum_sr=4%]
... [1 sheep | 300,000 steps | ret(last 50)=-7.06 win_sr=12% cum_sr=7%]
... [1 sheep | 400,000 steps | ret(last 50)=+18.18 win_sr=90% cum_sr=40%]
... [1 sheep | 500,000 steps | ret(last 50)=+16.17 win_sr=100% cum_sr=69%]
... [1 sheep | 600,000 steps | ret(last 50)=+14.68 win_sr=100% cum_sr=82%]
... [1 sheep | 700,000 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=88%]
... [1 sheep | 800,000 steps | ret(last 50)=+14.20 win_sr=100% cum_sr=91%]
... [1 sheep | 900,000 steps | ret(last 50)=+13.82 win_sr=100% cum_sr=93%]
... [1 sheep | 1,000,000 steps | ret(last 50)=+13.76 win_sr=100% cum_sr=94%]
... [1 sheep | 1,100,000 steps | ret(last 50)=+13.72 win_sr=100% cum_sr=95%]
... [1 sheep | 1,200,000 steps | ret(last 50)=+13.41 win_sr=100% cum_sr=95%]
... [1 sheep | 1,300,000 steps | ret(last 50)=+13.42 win_sr=100% cum_sr=96%]
... [1 sheep | 1,400,000 steps | ret(last 50)=+13.40 win_sr=100% cum_sr=96%]
... [1 sheep | 1,500,000 steps | ret(last 50)=+13.24 win_sr=100% cum_sr=97%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=100% mean_len=243 mean_min_pen=3.7m mean_act=0.39
failure modes: SUCCESS=30
reward/step: progress=+0.1141 alignment=+0.0003 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0412 step_cost=-0.0200 complete=+0.4115
[Stage n_sheep=2] training 1,500,000 steps
... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [2 sheep | 1,607,336 steps | ret(last 42)=-4.07 win_sr=7% cum_sr=7%]
... [2 sheep | 1,707,336 steps | ret(last 50)=-6.10 win_sr=0% cum_sr=4%]
... [2 sheep | 1,807,336 steps | ret(last 50)=-5.57 win_sr=2% cum_sr=3%]
... [2 sheep | 1,907,336 steps | ret(last 50)=-5.04 win_sr=4% cum_sr=4%]
... [2 sheep | 2,007,336 steps | ret(last 50)=-4.27 win_sr=10% cum_sr=5%]
... [2 sheep | 2,107,336 steps | ret(last 50)=-4.42 win_sr=6% cum_sr=5%]
... [2 sheep | 2,207,336 steps | ret(last 50)=+4.57 win_sr=50% cum_sr=13%]
... [2 sheep | 2,307,336 steps | ret(last 50)=+11.35 win_sr=70% cum_sr=24%]
... [2 sheep | 2,407,336 steps | ret(last 50)=+15.75 win_sr=86% cum_sr=32%]
... [2 sheep | 2,507,336 steps | ret(last 50)=+19.97 win_sr=100% cum_sr=44%]
... [2 sheep | 2,607,336 steps | ret(last 50)=+20.73 win_sr=100% cum_sr=54%]
... [2 sheep | 2,707,336 steps | ret(last 50)=+19.81 win_sr=100% cum_sr=62%]
... [2 sheep | 2,807,336 steps | ret(last 50)=+20.83 win_sr=100% cum_sr=67%]
... [2 sheep | 2,907,336 steps | ret(last 50)=+20.43 win_sr=100% cum_sr=72%]
... [2 sheep | 3,007,336 steps | ret(last 50)=+19.65 win_sr=100% cum_sr=75%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=63% mean_len=1325 mean_min_pen=3.1m mean_act=0.42
failure modes: SUCCESS=19 PARTIAL_1of2=10 COMPACT_CANT_DRIVE=1
reward/step: progress=+0.0453 alignment=+0.0065 compact=+0.0000 wall_touch=-0.0052 pen_bonus=+0.0123 step_cost=-0.0200 complete=+0.0478
[Stage n_sheep=3] training 1,500,000 steps
... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [3 sheep | 3,114,664 steps | ret(last 50)=+28.02 win_sr=100% cum_sr=100%]
... [3 sheep | 3,214,664 steps | ret(last 50)=+24.04 win_sr=96% cum_sr=99%]
... [3 sheep | 3,314,664 steps | ret(last 50)=+27.02 win_sr=100% cum_sr=99%]
... [3 sheep | 3,414,664 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=99%]
... [3 sheep | 3,514,664 steps | ret(last 50)=+25.13 win_sr=96% cum_sr=99%]
... [3 sheep | 3,614,664 steps | ret(last 50)=+26.45 win_sr=100% cum_sr=99%]
... [3 sheep | 3,714,664 steps | ret(last 50)=+25.83 win_sr=100% cum_sr=99%]
... [3 sheep | 3,814,664 steps | ret(last 50)=+26.07 win_sr=100% cum_sr=99%]
... [3 sheep | 3,914,664 steps | ret(last 50)=+25.03 win_sr=96% cum_sr=99%]
... [3 sheep | 4,014,664 steps | ret(last 50)=+24.53 win_sr=98% cum_sr=99%]
... [3 sheep | 4,114,664 steps | ret(last 50)=+24.98 win_sr=100% cum_sr=99%]
... [3 sheep | 4,214,664 steps | ret(last 50)=+26.81 win_sr=100% cum_sr=99%]
... [3 sheep | 4,314,664 steps | ret(last 50)=+24.78 win_sr=98% cum_sr=99%]
... [3 sheep | 4,414,664 steps | ret(last 50)=+26.79 win_sr=100% cum_sr=99%]
... [3 sheep | 4,514,664 steps | ret(last 50)=+26.26 win_sr=100% cum_sr=99%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=97% mean_len=828 mean_min_pen=2.7m mean_act=1.15
failure modes: SUCCESS=29 PARTIAL_1of3=1
reward/step: progress=+0.1017 alignment=+0.0139 compact=+0.0000 wall_touch=-0.0023 pen_bonus=+0.0354 step_cost=-0.0200 complete=+0.1168
[Stage n_sheep=4] training 1,500,000 steps
... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [4 sheep | 4,621,992 steps | ret(last 50)=+28.27 win_sr=90% cum_sr=93%]
... [4 sheep | 4,721,992 steps | ret(last 50)=+31.16 win_sr=98% cum_sr=95%]
... [4 sheep | 4,821,992 steps | ret(last 50)=+30.45 win_sr=100% cum_sr=96%]
... [4 sheep | 4,921,992 steps | ret(last 50)=+31.12 win_sr=100% cum_sr=96%]
... [4 sheep | 5,021,992 steps | ret(last 50)=+30.78 win_sr=100% cum_sr=97%]
... [4 sheep | 5,121,992 steps | ret(last 50)=+30.42 win_sr=100% cum_sr=97%]
... [4 sheep | 5,221,992 steps | ret(last 50)=+31.14 win_sr=100% cum_sr=98%]
... [4 sheep | 5,321,992 steps | ret(last 50)=+31.20 win_sr=100% cum_sr=98%]
... [4 sheep | 5,421,992 steps | ret(last 50)=+30.47 win_sr=98% cum_sr=98%]
... [4 sheep | 5,521,992 steps | ret(last 50)=+30.13 win_sr=100% cum_sr=98%]
... [4 sheep | 5,621,992 steps | ret(last 50)=+28.52 win_sr=98% cum_sr=98%]
... [4 sheep | 5,721,992 steps | ret(last 50)=+31.40 win_sr=100% cum_sr=98%]
... [4 sheep | 5,821,992 steps | ret(last 50)=+30.30 win_sr=100% cum_sr=99%]
... [4 sheep | 5,921,992 steps | ret(last 50)=+29.45 win_sr=100% cum_sr=99%]
... [4 sheep | 6,021,992 steps | ret(last 50)=+30.26 win_sr=100% cum_sr=99%]
[Stage n_sheep=4] evaluating 30 eps
[Stage n_sheep=4] sr=57% mean_len=1686 mean_min_pen=1.8m mean_act=1.01
failure modes: SUCCESS=17 PARTIAL_1of4=10 PARTIAL_2of4=3
reward/step: progress=+0.0721 alignment=+0.0085 compact=+0.0000 wall_touch=-0.0110 pen_bonus=+0.0166 step_cost=-0.0200 complete=+0.0336
[Stage n_sheep=5] training 1,500,000 steps
... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [5 sheep | 6,129,320 steps | ret(last 50)=+34.59 win_sr=100% cum_sr=100%]
... [5 sheep | 6,229,320 steps | ret(last 50)=+35.53 win_sr=100% cum_sr=100%]
... [5 sheep | 6,329,320 steps | ret(last 50)=+34.77 win_sr=100% cum_sr=100%]
... [5 sheep | 6,429,320 steps | ret(last 50)=+34.30 win_sr=100% cum_sr=100%]
... [5 sheep | 6,529,320 steps | ret(last 50)=+35.12 win_sr=100% cum_sr=100%]
... [5 sheep | 6,629,320 steps | ret(last 50)=+33.76 win_sr=100% cum_sr=100%]
... [5 sheep | 6,729,320 steps | ret(last 50)=+34.81 win_sr=100% cum_sr=100%]
... [5 sheep | 6,829,320 steps | ret(last 50)=+31.82 win_sr=100% cum_sr=100%]
... [5 sheep | 6,929,320 steps | ret(last 50)=+33.69 win_sr=98% cum_sr=100%]
... [5 sheep | 7,029,320 steps | ret(last 50)=+31.65 win_sr=100% cum_sr=100%]
... [5 sheep | 7,129,320 steps | ret(last 50)=+31.83 win_sr=96% cum_sr=99%]
... [5 sheep | 7,229,320 steps | ret(last 50)=+33.96 win_sr=100% cum_sr=99%]
... [5 sheep | 7,329,320 steps | ret(last 50)=+33.65 win_sr=98% cum_sr=99%]
... [5 sheep | 7,429,320 steps | ret(last 50)=+34.20 win_sr=100% cum_sr=99%]
... [5 sheep | 7,529,320 steps | ret(last 50)=+35.27 win_sr=98% cum_sr=99%]
[Stage n_sheep=5] evaluating 30 eps
[Stage n_sheep=5] sr=63% mean_len=1654 mean_min_pen=1.6m mean_act=1.36
failure modes: SUCCESS=19 PARTIAL_2of5=9 PARTIAL_3of5=2
reward/step: progress=+0.1043 alignment=+0.0108 compact=+0.0000 wall_touch=-0.0100 pen_bonus=+0.0240 step_cost=-0.0200 complete=+0.0383
[Stage n_sheep=6] training 1,500,000 steps
... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [6 sheep | 7,636,648 steps | ret(last 50)=+37.79 win_sr=98% cum_sr=95%]
... [6 sheep | 7,736,648 steps | ret(last 50)=+38.87 win_sr=94% cum_sr=95%]
... [6 sheep | 7,836,648 steps | ret(last 50)=+37.50 win_sr=98% cum_sr=96%]
... [6 sheep | 7,936,648 steps | ret(last 50)=+39.42 win_sr=94% cum_sr=96%]
... [6 sheep | 8,036,648 steps | ret(last 50)=+38.28 win_sr=98% cum_sr=96%]
... [6 sheep | 8,136,648 steps | ret(last 50)=+36.39 win_sr=100% cum_sr=97%]
... [6 sheep | 8,236,648 steps | ret(last 50)=+39.29 win_sr=100% cum_sr=97%]
... [6 sheep | 8,336,648 steps | ret(last 50)=+37.92 win_sr=98% cum_sr=97%]
... [6 sheep | 8,436,648 steps | ret(last 50)=+38.64 win_sr=98% cum_sr=97%]
... [6 sheep | 8,536,648 steps | ret(last 50)=+38.46 win_sr=98% cum_sr=97%]
... [6 sheep | 8,636,648 steps | ret(last 50)=+38.08 win_sr=98% cum_sr=97%]
... [6 sheep | 8,736,648 steps | ret(last 50)=+36.78 win_sr=100% cum_sr=97%]
... [6 sheep | 8,836,648 steps | ret(last 50)=+36.81 win_sr=98% cum_sr=98%]
... [6 sheep | 8,936,648 steps | ret(last 50)=+37.89 win_sr=98% cum_sr=98%]
... [6 sheep | 9,036,648 steps | ret(last 50)=+36.17 win_sr=98% cum_sr=98%]
[Stage n_sheep=6] evaluating 30 eps
[Stage n_sheep=6] sr=33% mean_len=2161 mean_min_pen=1.8m mean_act=1.37
failure modes: PARTIAL_5of6=14 SUCCESS=10 PARTIAL_4of6=4 COMPACT_CANT_DRIVE=2
reward/step: progress=+0.0915 alignment=+0.0102 compact=+0.0000 wall_touch=-0.0068 pen_bonus=+0.0225 step_cost=-0.0200 complete=+0.0154
[Stage n_sheep=7] training 1,500,000 steps
... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [7 sheep | 9,143,976 steps | ret(last 50)=+39.61 win_sr=98% cum_sr=97%]
... [7 sheep | 9,243,976 steps | ret(last 50)=+42.39 win_sr=100% cum_sr=99%]
... [7 sheep | 9,343,976 steps | ret(last 50)=+39.89 win_sr=96% cum_sr=98%]
... [7 sheep | 9,443,976 steps | ret(last 50)=+42.48 win_sr=98% cum_sr=98%]
... [7 sheep | 9,543,976 steps | ret(last 50)=+39.16 win_sr=98% cum_sr=98%]
... [7 sheep | 9,643,976 steps | ret(last 50)=+38.80 win_sr=96% cum_sr=98%]
... [7 sheep | 9,743,976 steps | ret(last 50)=+43.06 win_sr=96% cum_sr=98%]
... [7 sheep | 9,843,976 steps | ret(last 50)=+40.04 win_sr=94% cum_sr=98%]
... [7 sheep | 9,943,976 steps | ret(last 50)=+40.45 win_sr=98% cum_sr=97%]
... [7 sheep | 10,043,976 steps | ret(last 50)=+39.21 win_sr=96% cum_sr=97%]
... [7 sheep | 10,143,976 steps | ret(last 50)=+40.23 win_sr=100% cum_sr=97%]
... [7 sheep | 10,243,976 steps | ret(last 50)=+41.51 win_sr=96% cum_sr=97%]
... [7 sheep | 10,343,976 steps | ret(last 50)=+40.05 win_sr=98% cum_sr=97%]
... [7 sheep | 10,443,976 steps | ret(last 50)=+39.17 win_sr=96% cum_sr=97%]
... [7 sheep | 10,543,976 steps | ret(last 50)=+41.80 win_sr=98% cum_sr=97%]
[Stage n_sheep=7] evaluating 30 eps
[Stage n_sheep=7] sr=20% mean_len=2211 mean_min_pen=1.8m mean_act=1.36
failure modes: PARTIAL_4of7=11 PARTIAL_3of7=8 SUCCESS=6 PARTIAL_6of7=3 PARTIAL_5of7=2
reward/step: progress=+0.0879 alignment=+0.0086 compact=+0.0000 wall_touch=-0.0142 pen_bonus=+0.0208 step_cost=-0.0200 complete=+0.0090
[Stage n_sheep=8] training 1,500,000 steps
... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [8 sheep | 10,651,304 steps | ret(last 50)=+48.67 win_sr=96% cum_sr=97%]
... [8 sheep | 10,751,304 steps | ret(last 50)=+46.60 win_sr=96% cum_sr=97%]
... [8 sheep | 10,851,304 steps | ret(last 50)=+41.39 win_sr=98% cum_sr=96%]
... [8 sheep | 10,951,304 steps | ret(last 50)=+41.47 win_sr=96% cum_sr=96%]
... [8 sheep | 11,051,304 steps | ret(last 50)=+40.29 win_sr=96% cum_sr=95%]
... [8 sheep | 11,151,304 steps | ret(last 50)=+42.96 win_sr=100% cum_sr=96%]
... [8 sheep | 11,251,304 steps | ret(last 50)=+42.87 win_sr=94% cum_sr=96%]
... [8 sheep | 11,351,304 steps | ret(last 50)=+44.71 win_sr=100% cum_sr=96%]
... [8 sheep | 11,451,304 steps | ret(last 50)=+45.20 win_sr=96% cum_sr=96%]
... [8 sheep | 11,551,304 steps | ret(last 50)=+46.82 win_sr=96% cum_sr=96%]
... [8 sheep | 11,651,304 steps | ret(last 50)=+43.23 win_sr=96% cum_sr=96%]
... [8 sheep | 11,751,304 steps | ret(last 50)=+43.77 win_sr=94% cum_sr=96%]
... [8 sheep | 11,851,304 steps | ret(last 50)=+48.78 win_sr=98% cum_sr=96%]
... [8 sheep | 11,951,304 steps | ret(last 50)=+43.19 win_sr=94% cum_sr=96%]
... [8 sheep | 12,051,304 steps | ret(last 50)=+42.83 win_sr=96% cum_sr=96%]
[Stage n_sheep=8] evaluating 30 eps
[Stage n_sheep=8] sr=63% mean_len=1745 mean_min_pen=1.7m mean_act=1.37
failure modes: SUCCESS=19 PARTIAL_4of8=9 PARTIAL_1of8=1 PARTIAL_6of8=1
reward/step: progress=+0.1198 alignment=+0.0134 compact=+0.0000 wall_touch=-0.0107 pen_bonus=+0.0373 step_cost=-0.0200 complete=+0.0363
[Stage n_sheep=9] training 1,500,000 steps
... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [9 sheep | 12,158,632 steps | ret(last 50)=+49.04 win_sr=98% cum_sr=97%]
... [9 sheep | 12,258,632 steps | ret(last 50)=+47.01 win_sr=96% cum_sr=97%]
... [9 sheep | 12,358,632 steps | ret(last 50)=+48.47 win_sr=90% cum_sr=95%]
... [9 sheep | 12,458,632 steps | ret(last 50)=+46.43 win_sr=88% cum_sr=94%]
... [9 sheep | 12,558,632 steps | ret(last 50)=+44.78 win_sr=94% cum_sr=94%]
... [9 sheep | 12,658,632 steps | ret(last 50)=+49.15 win_sr=100% cum_sr=95%]
... [9 sheep | 12,758,632 steps | ret(last 50)=+47.87 win_sr=94% cum_sr=95%]
... [9 sheep | 12,858,632 steps | ret(last 50)=+50.32 win_sr=96% cum_sr=95%]
... [9 sheep | 12,958,632 steps | ret(last 50)=+47.07 win_sr=94% cum_sr=95%]
... [9 sheep | 13,058,632 steps | ret(last 50)=+48.71 win_sr=100% cum_sr=96%]
... [9 sheep | 13,158,632 steps | ret(last 50)=+47.69 win_sr=96% cum_sr=96%]
... [9 sheep | 13,258,632 steps | ret(last 50)=+46.83 win_sr=98% cum_sr=96%]
... [9 sheep | 13,358,632 steps | ret(last 50)=+48.27 win_sr=94% cum_sr=96%]
... [9 sheep | 13,458,632 steps | ret(last 50)=+47.61 win_sr=88% cum_sr=95%]
... [9 sheep | 13,558,632 steps | ret(last 50)=+47.29 win_sr=96% cum_sr=95%]
[Stage n_sheep=9] evaluating 30 eps
[Stage n_sheep=9] sr=83% mean_len=1723 mean_min_pen=1.8m mean_act=1.38
failure modes: SUCCESS=25 PARTIAL_5of9=3 NEVER_COMPACT=1 PARTIAL_6of9=1
reward/step: progress=+0.1562 alignment=+0.0155 compact=+0.0000 wall_touch=-0.0073 pen_bonus=+0.0480 step_cost=-0.0200 complete=+0.0484
[Stage n_sheep=10] training 1,500,000 steps
... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [10 sheep | 13,665,960 steps | ret(last 50)=+50.02 win_sr=94% cum_sr=95%]
... [10 sheep | 13,765,960 steps | ret(last 50)=+48.46 win_sr=84% cum_sr=90%]
... [10 sheep | 13,865,960 steps | ret(last 50)=+48.24 win_sr=84% cum_sr=87%]
... [10 sheep | 13,965,960 steps | ret(last 50)=+50.64 win_sr=82% cum_sr=86%]
... [10 sheep | 14,065,960 steps | ret(last 50)=+53.92 win_sr=86% cum_sr=86%]
... [10 sheep | 14,165,960 steps | ret(last 50)=+51.33 win_sr=80% cum_sr=85%]
... [10 sheep | 14,265,960 steps | ret(last 50)=+49.55 win_sr=76% cum_sr=84%]
... [10 sheep | 14,365,960 steps | ret(last 50)=+51.05 win_sr=92% cum_sr=85%]
... [10 sheep | 14,465,960 steps | ret(last 50)=+47.58 win_sr=86% cum_sr=85%]
... [10 sheep | 14,565,960 steps | ret(last 50)=+47.98 win_sr=74% cum_sr=84%]
... [10 sheep | 14,665,960 steps | ret(last 50)=+50.60 win_sr=82% cum_sr=84%]
... [10 sheep | 14,765,960 steps | ret(last 50)=+51.25 win_sr=88% cum_sr=84%]
... [10 sheep | 14,865,960 steps | ret(last 50)=+50.54 win_sr=92% cum_sr=85%]
... [10 sheep | 14,965,960 steps | ret(last 50)=+50.94 win_sr=92% cum_sr=86%]
... [10 sheep | 15,065,960 steps | ret(last 50)=+50.54 win_sr=90% cum_sr=86%]
[Stage n_sheep=10] evaluating 30 eps
[Stage n_sheep=10] sr=27% mean_len=2267 mean_min_pen=2.2m mean_act=1.38
failure modes: PARTIAL_6of10=16 SUCCESS=8 COMPACT_CANT_DRIVE=2 PARTIAL_7of10=1 PARTIAL_9of10=1 PARTIAL_5of10=1 PARTIAL_8of10=1
reward/step: progress=+0.1360 alignment=+0.0134 compact=+0.0000 wall_touch=-0.0122 pen_bonus=+0.0301 step_cost=-0.0200 complete=+0.0118
======================================================================
TRAINING SUMMARY
======================================================================
n_sheep=1 sr=100% len= 243 min_pen= 3.7m act=0.39
n_sheep=2 sr= 63% len= 1325 min_pen= 3.1m act=0.42
n_sheep=3 sr= 97% len= 828 min_pen= 2.7m act=1.15
n_sheep=4 sr= 57% len= 1686 min_pen= 1.8m act=1.01
n_sheep=5 sr= 63% len= 1654 min_pen= 1.6m act=1.36
n_sheep=6 sr= 33% len= 2161 min_pen= 1.8m act=1.37
n_sheep=7 sr= 20% len= 2211 min_pen= 1.8m act=1.36
n_sheep=8 sr= 63% len= 1745 min_pen= 1.7m act=1.37
n_sheep=9 sr= 83% len= 1723 min_pen= 1.8m act=1.38
n_sheep=10 sr= 27% len= 2267 min_pen= 2.2m act=1.38
Total time: 97.6 min
Artefacts: runs/v1/
Plots: runs/v1/success_rate.png, runs/v1/eval/
-14
View File
@@ -1,14 +0,0 @@
{
"W_PER_SHEEP": 2.0,
"W_ALIGN": 0.05,
"W_PEN_BONUS": 10.0,
"W_COMPLETE": 100.0,
"W_STEP_COST": 0.02,
"W_COMPACT": 0.0,
"W_WALL_TOUCH": 0.01,
"WALL_TOUCH_BUFFER": 0.4,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": true,
"ENTRY_AWARE": true,
"ent_coef": 0.02
}
Binary file not shown.

Before

Width:  |  Height:  |  Size: 186 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 201 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 150 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.
-218
View File
@@ -1,218 +0,0 @@
[
{
"sr": 1.0,
"mean_len": 243.0,
"mean_min_pen": 3.7120999256769815,
"mean_act": 0.3930775734995823,
"failure_modes": {
"SUCCESS": 30
},
"reward_per_step": {
"progress": 0.11411363949746262,
"alignment": 0.00034729298515464674,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.0411522633744856,
"step_cost": -0.020000000000000108,
"complete": 0.411522633744856
},
"n_sheep": 1
},
{
"sr": 0.6333333333333333,
"mean_len": 1324.9333333333334,
"mean_min_pen": 3.108120004336039,
"mean_act": 0.41626948835668365,
"failure_modes": {
"SUCCESS": 19,
"PARTIAL_1of2": 10,
"COMPACT_CANT_DRIVE": 1
},
"reward_per_step": {
"progress": 0.045259184195888084,
"alignment": 0.006548802090560675,
"compact": 0.0,
"wall_touch": -0.005243643148915256,
"pen_bonus": 0.012327664284995472,
"step_cost": -0.019999999999989106,
"complete": 0.04780114722753346
},
"n_sheep": 2
},
{
"sr": 0.9666666666666667,
"mean_len": 827.7,
"mean_min_pen": 2.727696478366852,
"mean_act": 1.1521936838813016,
"failure_modes": {
"PARTIAL_1of3": 1,
"SUCCESS": 29
},
"reward_per_step": {
"progress": 0.10166334638295625,
"alignment": 0.013859153429505626,
"compact": 0.0,
"wall_touch": -0.0022604217500245883,
"pen_bonus": 0.03543957150336273,
"step_cost": -0.019999999999993488,
"complete": 0.11678949699971809
},
"n_sheep": 3
},
{
"sr": 0.5666666666666667,
"mean_len": 1686.0333333333333,
"mean_min_pen": 1.7675368865331014,
"mean_act": 1.0093803780622697,
"failure_modes": {
"PARTIAL_1of4": 10,
"SUCCESS": 17,
"PARTIAL_2of4": 3
},
"reward_per_step": {
"progress": 0.07213990871824405,
"alignment": 0.008500170591885925,
"compact": 0.0,
"wall_touch": -0.01096873654520888,
"pen_bonus": 0.016607026353769202,
"step_cost": -0.019999999999987545,
"complete": 0.03360945809691386
},
"n_sheep": 4
},
{
"sr": 0.6333333333333333,
"mean_len": 1653.8333333333333,
"mean_min_pen": 1.6310479640960693,
"mean_act": 1.3572492104366454,
"failure_modes": {
"PARTIAL_2of5": 9,
"SUCCESS": 19,
"PARTIAL_3of5": 2
},
"reward_per_step": {
"progress": 0.10426509678994506,
"alignment": 0.010847962450905363,
"compact": 0.0,
"wall_touch": -0.010001784418012447,
"pen_bonus": 0.02398468205179885,
"step_cost": -0.019999999999987656,
"complete": 0.038294870502872114
},
"n_sheep": 5
},
{
"sr": 0.3333333333333333,
"mean_len": 2161.0333333333333,
"mean_min_pen": 1.7910769859949747,
"mean_act": 1.3728399181766682,
"failure_modes": {
"SUCCESS": 10,
"PARTIAL_5of6": 14,
"PARTIAL_4of6": 4,
"COMPACT_CANT_DRIVE": 2
},
"reward_per_step": {
"progress": 0.09152597398477412,
"alignment": 0.010169068168091603,
"compact": 0.0,
"wall_touch": -0.006849364742307595,
"pen_bonus": 0.022520090697351575,
"step_cost": -0.019999999999986286,
"complete": 0.015424719655720258
},
"n_sheep": 6
},
{
"sr": 0.2,
"mean_len": 2211.2,
"mean_min_pen": 1.8339664101600648,
"mean_act": 1.3635542380694952,
"failure_modes": {
"PARTIAL_5of7": 2,
"SUCCESS": 6,
"PARTIAL_6of7": 3,
"PARTIAL_3of7": 8,
"PARTIAL_4of7": 11
},
"reward_per_step": {
"progress": 0.08794138462674025,
"alignment": 0.008588877237149285,
"compact": 0.0,
"wall_touch": -0.014176997336213705,
"pen_bonus": 0.020803183791606367,
"step_cost": -0.019999999999986185,
"complete": 0.009044862518089725
},
"n_sheep": 7
},
{
"sr": 0.6333333333333333,
"mean_len": 1744.5666666666666,
"mean_min_pen": 1.7331914146741232,
"mean_act": 1.366222499606064,
"failure_modes": {
"SUCCESS": 19,
"PARTIAL_4of8": 9,
"PARTIAL_1of8": 1,
"PARTIAL_6of8": 1
},
"reward_per_step": {
"progress": 0.11981066786559799,
"alignment": 0.013385751275637974,
"compact": 0.0,
"wall_touch": -0.010737474453107049,
"pen_bonus": 0.037258536026138295,
"step_cost": -0.019999999999987354,
"complete": 0.03630318894854501
},
"n_sheep": 8
},
{
"sr": 0.8333333333333334,
"mean_len": 1723.1333333333334,
"mean_min_pen": 1.7584208091100058,
"mean_act": 1.3848404770822742,
"failure_modes": {
"NEVER_COMPACT": 1,
"SUCCESS": 25,
"PARTIAL_5of9": 3,
"PARTIAL_6of9": 1
},
"reward_per_step": {
"progress": 0.1561655017464111,
"alignment": 0.01548957874142236,
"compact": 0.0,
"wall_touch": -0.00728069638883058,
"pen_bonus": 0.04797461987851588,
"step_cost": -0.019999999999987424,
"complete": 0.048361511974310364
},
"n_sheep": 9
},
{
"sr": 0.26666666666666666,
"mean_len": 2266.5333333333333,
"mean_min_pen": 2.1789512236913047,
"mean_act": 1.3836169439830954,
"failure_modes": {
"PARTIAL_6of10": 16,
"SUCCESS": 8,
"PARTIAL_7of10": 1,
"PARTIAL_9of10": 1,
"PARTIAL_5of10": 1,
"COMPACT_CANT_DRIVE": 2,
"PARTIAL_8of10": 1
},
"reward_per_step": {
"progress": 0.13603502511276877,
"alignment": 0.013359252519942029,
"compact": 0.0,
"wall_touch": -0.012232639033891629,
"pen_bonus": 0.030148832284252015,
"step_cost": -0.019999999999986078,
"complete": 0.011765397964586153
},
"n_sheep": 10
}
]
Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.
-242
View File
@@ -1,242 +0,0 @@
Config loaded from config.json
Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
Run dir: runs/v2
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [1 sheep | 100,000 steps | ret(last 40)=-23.39 win_sr=8% cum_sr=8%]
... [1 sheep | 200,000 steps | ret(last 50)=-22.10 win_sr=10% cum_sr=9%]
... [1 sheep | 300,000 steps | ret(last 50)=-23.02 win_sr=10% cum_sr=10%]
... [1 sheep | 400,000 steps | ret(last 50)=-18.97 win_sr=18% cum_sr=12%]
... [1 sheep | 500,000 steps | ret(last 50)=-20.01 win_sr=8% cum_sr=11%]
... [1 sheep | 600,000 steps | ret(last 50)=-18.57 win_sr=14% cum_sr=12%]
... [1 sheep | 700,000 steps | ret(last 50)=-17.55 win_sr=22% cum_sr=14%]
... [1 sheep | 800,000 steps | ret(last 50)=+7.41 win_sr=66% cum_sr=23%]
... [1 sheep | 900,000 steps | ret(last 50)=+17.61 win_sr=100% cum_sr=47%]
... [1 sheep | 1,000,000 steps | ret(last 50)=+16.11 win_sr=100% cum_sr=65%]
... [1 sheep | 1,100,000 steps | ret(last 50)=+15.82 win_sr=100% cum_sr=74%]
... [1 sheep | 1,200,000 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=80%]
... [1 sheep | 1,300,000 steps | ret(last 50)=+14.19 win_sr=100% cum_sr=84%]
... [1 sheep | 1,400,000 steps | ret(last 50)=+14.00 win_sr=100% cum_sr=87%]
... [1 sheep | 1,500,000 steps | ret(last 50)=+13.96 win_sr=100% cum_sr=89%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=100% mean_len=234 mean_min_pen=3.7m mean_act=0.41
failure modes: SUCCESS=30
reward/step: progress=+0.1118 alignment=+0.0003 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0427 step_cost=-0.0200 complete=+0.4274
[Stage n_sheep=2] training 1,500,000 steps
... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [2 sheep | 1,607,336 steps | ret(last 40)=-4.45 win_sr=8% cum_sr=8%]
... [2 sheep | 1,707,336 steps | ret(last 50)=-4.56 win_sr=8% cum_sr=9%]
... [2 sheep | 1,807,336 steps | ret(last 50)=-2.33 win_sr=12% cum_sr=10%]
... [2 sheep | 1,907,336 steps | ret(last 50)=+1.93 win_sr=24% cum_sr=14%]
... [2 sheep | 2,007,336 steps | ret(last 50)=+7.32 win_sr=52% cum_sr=24%]
... [2 sheep | 2,107,336 steps | ret(last 50)=+10.52 win_sr=58% cum_sr=30%]
... [2 sheep | 2,207,336 steps | ret(last 50)=+15.67 win_sr=76% cum_sr=39%]
... [2 sheep | 2,307,336 steps | ret(last 50)=+16.91 win_sr=78% cum_sr=46%]
... [2 sheep | 2,407,336 steps | ret(last 50)=+21.91 win_sr=96% cum_sr=53%]
... [2 sheep | 2,507,336 steps | ret(last 50)=+21.08 win_sr=94% cum_sr=60%]
... [2 sheep | 2,607,336 steps | ret(last 50)=+20.24 win_sr=92% cum_sr=65%]
... [2 sheep | 2,707,336 steps | ret(last 50)=+21.40 win_sr=96% cum_sr=70%]
... [2 sheep | 2,807,336 steps | ret(last 50)=+21.95 win_sr=100% cum_sr=73%]
... [2 sheep | 2,907,336 steps | ret(last 50)=+20.73 win_sr=100% cum_sr=76%]
... [2 sheep | 3,007,336 steps | ret(last 50)=+21.25 win_sr=100% cum_sr=79%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=87% mean_len=1064 mean_min_pen=4.1m mean_act=0.59
failure modes: SUCCESS=26 COMPACT_CANT_DRIVE=4
reward/step: progress=+0.0565 alignment=+0.0071 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0163 step_cost=-0.0200 complete=+0.0815
[Stage n_sheep=3] training 1,500,000 steps
... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [3 sheep | 3,114,664 steps | ret(last 50)=+17.60 win_sr=72% cum_sr=73%]
... [3 sheep | 3,214,664 steps | ret(last 50)=+25.44 win_sr=98% cum_sr=87%]
... [3 sheep | 3,314,664 steps | ret(last 50)=+25.73 win_sr=92% cum_sr=90%]
... [3 sheep | 3,414,664 steps | ret(last 50)=+28.01 win_sr=98% cum_sr=92%]
... [3 sheep | 3,514,664 steps | ret(last 50)=+25.71 win_sr=94% cum_sr=93%]
... [3 sheep | 3,614,664 steps | ret(last 50)=+24.73 win_sr=94% cum_sr=93%]
... [3 sheep | 3,714,664 steps | ret(last 50)=+23.51 win_sr=88% cum_sr=92%]
... [3 sheep | 3,814,664 steps | ret(last 50)=+25.11 win_sr=96% cum_sr=93%]
... [3 sheep | 3,914,664 steps | ret(last 50)=+27.02 win_sr=100% cum_sr=93%]
... [3 sheep | 4,014,664 steps | ret(last 50)=+24.67 win_sr=94% cum_sr=94%]
... [3 sheep | 4,114,664 steps | ret(last 50)=+26.08 win_sr=98% cum_sr=94%]
... [3 sheep | 4,214,664 steps | ret(last 50)=+26.69 win_sr=98% cum_sr=94%]
... [3 sheep | 4,314,664 steps | ret(last 50)=+24.01 win_sr=92% cum_sr=94%]
... [3 sheep | 4,414,664 steps | ret(last 50)=+25.74 win_sr=98% cum_sr=94%]
... [3 sheep | 4,514,664 steps | ret(last 50)=+27.43 win_sr=100% cum_sr=95%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=100% mean_len=769 mean_min_pen=3.5m mean_act=0.72
failure modes: SUCCESS=30
reward/step: progress=+0.1121 alignment=+0.0078 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0390 step_cost=-0.0200 complete=+0.1301
[Stage n_sheep=4] training 1,500,000 steps
... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [4 sheep | 4,621,992 steps | ret(last 50)=+32.50 win_sr=100% cum_sr=96%]
... [4 sheep | 4,721,992 steps | ret(last 50)=+31.21 win_sr=100% cum_sr=98%]
... [4 sheep | 4,821,992 steps | ret(last 50)=+34.05 win_sr=100% cum_sr=99%]
... [4 sheep | 4,921,992 steps | ret(last 50)=+32.04 win_sr=100% cum_sr=99%]
... [4 sheep | 5,021,992 steps | ret(last 50)=+29.20 win_sr=100% cum_sr=99%]
... [4 sheep | 5,121,992 steps | ret(last 50)=+31.56 win_sr=100% cum_sr=99%]
... [4 sheep | 5,221,992 steps | ret(last 50)=+31.25 win_sr=100% cum_sr=100%]
... [4 sheep | 5,321,992 steps | ret(last 50)=+30.62 win_sr=100% cum_sr=100%]
... [4 sheep | 5,421,992 steps | ret(last 50)=+30.44 win_sr=100% cum_sr=100%]
... [4 sheep | 5,521,992 steps | ret(last 50)=+32.84 win_sr=100% cum_sr=100%]
... [4 sheep | 5,621,992 steps | ret(last 50)=+30.98 win_sr=100% cum_sr=100%]
... [4 sheep | 5,721,992 steps | ret(last 50)=+28.77 win_sr=98% cum_sr=100%]
... [4 sheep | 5,821,992 steps | ret(last 50)=+29.24 win_sr=100% cum_sr=100%]
... [4 sheep | 5,921,992 steps | ret(last 50)=+30.83 win_sr=100% cum_sr=100%]
... [4 sheep | 6,021,992 steps | ret(last 50)=+30.06 win_sr=100% cum_sr=100%]
[Stage n_sheep=4] evaluating 30 eps
[Stage n_sheep=4] sr=100% mean_len=750 mean_min_pen=3.5m mean_act=1.23
failure modes: SUCCESS=30
reward/step: progress=+0.1586 alignment=+0.0113 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0533 step_cost=-0.0200 complete=+0.1334
[Stage n_sheep=5] training 1,500,000 steps
... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [5 sheep | 6,129,320 steps | ret(last 50)=+31.97 win_sr=100% cum_sr=100%]
... [5 sheep | 6,229,320 steps | ret(last 50)=+32.32 win_sr=100% cum_sr=100%]
... [5 sheep | 6,329,320 steps | ret(last 50)=+34.26 win_sr=100% cum_sr=100%]
... [5 sheep | 6,429,320 steps | ret(last 50)=+33.75 win_sr=100% cum_sr=100%]
... [5 sheep | 6,529,320 steps | ret(last 50)=+34.77 win_sr=100% cum_sr=100%]
... [5 sheep | 6,629,320 steps | ret(last 50)=+34.06 win_sr=100% cum_sr=100%]
... [5 sheep | 6,729,320 steps | ret(last 50)=+32.39 win_sr=96% cum_sr=100%]
... [5 sheep | 6,829,320 steps | ret(last 50)=+32.33 win_sr=100% cum_sr=100%]
... [5 sheep | 6,929,320 steps | ret(last 50)=+33.29 win_sr=100% cum_sr=100%]
... [5 sheep | 7,029,320 steps | ret(last 50)=+32.12 win_sr=100% cum_sr=100%]
... [5 sheep | 7,129,320 steps | ret(last 50)=+32.58 win_sr=100% cum_sr=100%]
... [5 sheep | 7,229,320 steps | ret(last 50)=+33.27 win_sr=100% cum_sr=100%]
... [5 sheep | 7,329,320 steps | ret(last 50)=+33.64 win_sr=100% cum_sr=100%]
... [5 sheep | 7,429,320 steps | ret(last 50)=+32.67 win_sr=100% cum_sr=100%]
... [5 sheep | 7,529,320 steps | ret(last 50)=+32.79 win_sr=100% cum_sr=100%]
[Stage n_sheep=5] evaluating 30 eps
[Stage n_sheep=5] sr=97% mean_len=921 mean_min_pen=3.2m mean_act=1.33
failure modes: SUCCESS=29 PARTIAL_3of5=1
reward/step: progress=+0.1565 alignment=+0.0135 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0536 step_cost=-0.0200 complete=+0.1050
[Stage n_sheep=6] training 1,500,000 steps
... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [6 sheep | 7,636,648 steps | ret(last 50)=+35.93 win_sr=100% cum_sr=96%]
... [6 sheep | 7,736,648 steps | ret(last 50)=+37.56 win_sr=100% cum_sr=97%]
... [6 sheep | 7,836,648 steps | ret(last 50)=+34.93 win_sr=100% cum_sr=98%]
... [6 sheep | 7,936,648 steps | ret(last 50)=+32.71 win_sr=98% cum_sr=98%]
... [6 sheep | 8,036,648 steps | ret(last 50)=+36.84 win_sr=100% cum_sr=99%]
... [6 sheep | 8,136,648 steps | ret(last 50)=+35.11 win_sr=100% cum_sr=99%]
... [6 sheep | 8,236,648 steps | ret(last 50)=+36.54 win_sr=100% cum_sr=99%]
... [6 sheep | 8,336,648 steps | ret(last 50)=+34.67 win_sr=100% cum_sr=99%]
... [6 sheep | 8,436,648 steps | ret(last 50)=+36.14 win_sr=100% cum_sr=99%]
... [6 sheep | 8,536,648 steps | ret(last 50)=+36.95 win_sr=100% cum_sr=99%]
... [6 sheep | 8,636,648 steps | ret(last 50)=+35.42 win_sr=100% cum_sr=99%]
... [6 sheep | 8,736,648 steps | ret(last 50)=+33.44 win_sr=100% cum_sr=100%]
... [6 sheep | 8,836,648 steps | ret(last 50)=+36.70 win_sr=100% cum_sr=100%]
... [6 sheep | 8,936,648 steps | ret(last 50)=+34.03 win_sr=100% cum_sr=100%]
... [6 sheep | 9,036,648 steps | ret(last 50)=+34.53 win_sr=100% cum_sr=100%]
[Stage n_sheep=6] evaluating 30 eps
[Stage n_sheep=6] sr=97% mean_len=1193 mean_min_pen=3.4m mean_act=1.36
failure modes: SUCCESS=29 COMPACT_CANT_DRIVE=1
reward/step: progress=+0.1597 alignment=+0.0173 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0492 step_cost=-0.0200 complete=+0.0810
[Stage n_sheep=7] training 1,500,000 steps
... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [7 sheep | 9,143,976 steps | ret(last 50)=+40.54 win_sr=100% cum_sr=100%]
... [7 sheep | 9,243,976 steps | ret(last 50)=+38.70 win_sr=98% cum_sr=99%]
... [7 sheep | 9,343,976 steps | ret(last 50)=+38.13 win_sr=100% cum_sr=100%]
... [7 sheep | 9,443,976 steps | ret(last 50)=+40.37 win_sr=100% cum_sr=100%]
... [7 sheep | 9,543,976 steps | ret(last 50)=+39.40 win_sr=100% cum_sr=99%]
... [7 sheep | 9,643,976 steps | ret(last 50)=+40.44 win_sr=98% cum_sr=99%]
... [7 sheep | 9,743,976 steps | ret(last 50)=+37.74 win_sr=100% cum_sr=99%]
... [7 sheep | 9,843,976 steps | ret(last 50)=+39.91 win_sr=98% cum_sr=99%]
... [7 sheep | 9,943,976 steps | ret(last 50)=+40.67 win_sr=100% cum_sr=99%]
... [7 sheep | 10,043,976 steps | ret(last 50)=+35.38 win_sr=100% cum_sr=99%]
... [7 sheep | 10,143,976 steps | ret(last 50)=+38.31 win_sr=100% cum_sr=99%]
... [7 sheep | 10,243,976 steps | ret(last 50)=+40.86 win_sr=100% cum_sr=99%]
... [7 sheep | 10,343,976 steps | ret(last 50)=+40.95 win_sr=100% cum_sr=99%]
... [7 sheep | 10,443,976 steps | ret(last 50)=+37.90 win_sr=100% cum_sr=99%]
... [7 sheep | 10,543,976 steps | ret(last 50)=+39.07 win_sr=100% cum_sr=99%]
[Stage n_sheep=7] evaluating 30 eps
[Stage n_sheep=7] sr=100% mean_len=1209 mean_min_pen=3.2m mean_act=1.37
failure modes: SUCCESS=30
reward/step: progress=+0.1774 alignment=+0.0179 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0579 step_cost=-0.0200 complete=+0.0827
[Stage n_sheep=8] training 1,500,000 steps
... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [8 sheep | 10,651,304 steps | ret(last 50)=+42.81 win_sr=100% cum_sr=100%]
... [8 sheep | 10,751,304 steps | ret(last 50)=+44.59 win_sr=100% cum_sr=100%]
... [8 sheep | 10,851,304 steps | ret(last 50)=+45.59 win_sr=98% cum_sr=99%]
... [8 sheep | 10,951,304 steps | ret(last 50)=+42.27 win_sr=98% cum_sr=99%]
... [8 sheep | 11,051,304 steps | ret(last 50)=+45.05 win_sr=98% cum_sr=99%]
... [8 sheep | 11,151,304 steps | ret(last 50)=+45.50 win_sr=100% cum_sr=99%]
... [8 sheep | 11,251,304 steps | ret(last 50)=+43.60 win_sr=100% cum_sr=99%]
... [8 sheep | 11,351,304 steps | ret(last 50)=+40.26 win_sr=100% cum_sr=99%]
... [8 sheep | 11,451,304 steps | ret(last 50)=+43.00 win_sr=100% cum_sr=99%]
... [8 sheep | 11,551,304 steps | ret(last 50)=+43.16 win_sr=100% cum_sr=100%]
... [8 sheep | 11,651,304 steps | ret(last 50)=+42.78 win_sr=100% cum_sr=100%]
... [8 sheep | 11,751,304 steps | ret(last 50)=+42.32 win_sr=98% cum_sr=99%]
... [8 sheep | 11,851,304 steps | ret(last 50)=+41.62 win_sr=100% cum_sr=99%]
... [8 sheep | 11,951,304 steps | ret(last 50)=+42.56 win_sr=98% cum_sr=99%]
... [8 sheep | 12,051,304 steps | ret(last 50)=+41.83 win_sr=100% cum_sr=99%]
[Stage n_sheep=8] evaluating 30 eps
[Stage n_sheep=8] sr=100% mean_len=1492 mean_min_pen=3.2m mean_act=1.38
failure modes: SUCCESS=30
reward/step: progress=+0.1916 alignment=+0.0190 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0536 step_cost=-0.0200 complete=+0.0670
[Stage n_sheep=9] training 1,500,000 steps
... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [9 sheep | 12,158,632 steps | ret(last 50)=+46.03 win_sr=100% cum_sr=100%]
... [9 sheep | 12,258,632 steps | ret(last 50)=+46.87 win_sr=96% cum_sr=97%]
... [9 sheep | 12,358,632 steps | ret(last 50)=+45.48 win_sr=96% cum_sr=97%]
... [9 sheep | 12,458,632 steps | ret(last 50)=+47.02 win_sr=96% cum_sr=97%]
... [9 sheep | 12,558,632 steps | ret(last 50)=+44.66 win_sr=96% cum_sr=97%]
... [9 sheep | 12,658,632 steps | ret(last 50)=+46.60 win_sr=96% cum_sr=97%]
... [9 sheep | 12,758,632 steps | ret(last 50)=+41.85 win_sr=96% cum_sr=97%]
... [9 sheep | 12,858,632 steps | ret(last 50)=+47.81 win_sr=96% cum_sr=97%]
... [9 sheep | 12,958,632 steps | ret(last 50)=+44.92 win_sr=90% cum_sr=96%]
... [9 sheep | 13,058,632 steps | ret(last 50)=+47.40 win_sr=90% cum_sr=96%]
... [9 sheep | 13,158,632 steps | ret(last 50)=+47.16 win_sr=92% cum_sr=95%]
... [9 sheep | 13,258,632 steps | ret(last 50)=+45.55 win_sr=98% cum_sr=96%]
... [9 sheep | 13,358,632 steps | ret(last 50)=+46.87 win_sr=96% cum_sr=96%]
... [9 sheep | 13,458,632 steps | ret(last 50)=+47.69 win_sr=98% cum_sr=96%]
... [9 sheep | 13,558,632 steps | ret(last 50)=+45.17 win_sr=94% cum_sr=96%]
[Stage n_sheep=9] evaluating 30 eps
[Stage n_sheep=9] sr=90% mean_len=1628 mean_min_pen=3.2m mean_act=1.38
failure modes: SUCCESS=27 COMPACT_CANT_DRIVE=3
reward/step: progress=+0.1802 alignment=+0.0204 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0514 step_cost=-0.0200 complete=+0.0553
[Stage n_sheep=10] training 1,500,000 steps
... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [10 sheep | 13,665,960 steps | ret(last 50)=+49.00 win_sr=82% cum_sr=82%]
... [10 sheep | 13,765,960 steps | ret(last 50)=+48.55 win_sr=86% cum_sr=84%]
... [10 sheep | 13,865,960 steps | ret(last 50)=+46.53 win_sr=80% cum_sr=83%]
... [10 sheep | 13,965,960 steps | ret(last 50)=+44.70 win_sr=82% cum_sr=83%]
... [10 sheep | 14,065,960 steps | ret(last 50)=+52.57 win_sr=92% cum_sr=85%]
... [10 sheep | 14,165,960 steps | ret(last 50)=+50.20 win_sr=82% cum_sr=85%]
... [10 sheep | 14,265,960 steps | ret(last 50)=+50.34 win_sr=90% cum_sr=85%]
... [10 sheep | 14,365,960 steps | ret(last 50)=+50.24 win_sr=90% cum_sr=86%]
... [10 sheep | 14,465,960 steps | ret(last 50)=+48.40 win_sr=86% cum_sr=86%]
... [10 sheep | 14,565,960 steps | ret(last 50)=+48.74 win_sr=88% cum_sr=87%]
... [10 sheep | 14,665,960 steps | ret(last 50)=+48.46 win_sr=80% cum_sr=86%]
... [10 sheep | 14,765,960 steps | ret(last 50)=+51.46 win_sr=70% cum_sr=85%]
... [10 sheep | 14,865,960 steps | ret(last 50)=+49.28 win_sr=92% cum_sr=85%]
... [10 sheep | 14,965,960 steps | ret(last 50)=+51.12 win_sr=88% cum_sr=86%]
... [10 sheep | 15,065,960 steps | ret(last 50)=+52.03 win_sr=84% cum_sr=85%]
[Stage n_sheep=10] evaluating 30 eps
[Stage n_sheep=10] sr=93% mean_len=1870 mean_min_pen=3.1m mean_act=1.38
failure modes: SUCCESS=28 COMPACT_CANT_DRIVE=2
reward/step: progress=+0.1727 alignment=+0.0219 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0522 step_cost=-0.0200 complete=+0.0499
======================================================================
TRAINING SUMMARY
======================================================================
n_sheep=1 sr=100% len= 234 min_pen= 3.7m act=0.41
n_sheep=2 sr= 87% len= 1064 min_pen= 4.1m act=0.59
n_sheep=3 sr=100% len= 769 min_pen= 3.5m act=0.72
n_sheep=4 sr=100% len= 750 min_pen= 3.5m act=1.23
n_sheep=5 sr= 97% len= 921 min_pen= 3.2m act=1.33
n_sheep=6 sr= 97% len= 1193 min_pen= 3.4m act=1.36
n_sheep=7 sr=100% len= 1209 min_pen= 3.2m act=1.37
n_sheep=8 sr=100% len= 1492 min_pen= 3.2m act=1.38
n_sheep=9 sr= 90% len= 1628 min_pen= 3.2m act=1.38
n_sheep=10 sr= 93% len= 1870 min_pen= 3.1m act=1.38
Total time: 92.0 min
Artefacts: runs/v2/
Plots: runs/v2/success_rate.png, runs/v2/eval/
-14
View File
@@ -1,14 +0,0 @@
{
"W_PER_SHEEP": 2.0,
"W_ALIGN": 0.05,
"W_PEN_BONUS": 10.0,
"W_COMPLETE": 100.0,
"W_STEP_COST": 0.02,
"W_COMPACT": 0.0,
"W_WALL_TOUCH": 0.0,
"WALL_TOUCH_BUFFER": 0.4,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": true,
"ENTRY_AWARE": true,
"ent_coef": 0.02
}
Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 259 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 453 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 658 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 904 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 169 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 172 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 141 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.
-197
View File
@@ -1,197 +0,0 @@
[
{
"sr": 1.0,
"mean_len": 234.0,
"mean_min_pen": 3.6668872674306234,
"mean_act": 0.4068990752695293,
"failure_modes": {
"SUCCESS": 30
},
"reward_per_step": {
"progress": 0.11183513424165568,
"alignment": 0.0002786317654047819,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.042735042735042736,
"step_cost": -0.019999999999999716,
"complete": 0.42735042735042733
},
"n_sheep": 1
},
{
"sr": 0.8666666666666667,
"mean_len": 1063.6666666666667,
"mean_min_pen": 4.120940693219503,
"mean_act": 0.5870139278816712,
"failure_modes": {
"SUCCESS": 26,
"COMPACT_CANT_DRIVE": 4
},
"reward_per_step": {
"progress": 0.05651345582855781,
"alignment": 0.007121706701510673,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.01629583202757756,
"step_cost": -0.0199999999999909,
"complete": 0.08147916013788781
},
"n_sheep": 2
},
{
"sr": 1.0,
"mean_len": 768.6,
"mean_min_pen": 3.4802104949951174,
"mean_act": 0.7173416881465967,
"failure_modes": {
"SUCCESS": 30
},
"reward_per_step": {
"progress": 0.11210350058336283,
"alignment": 0.007752684222105381,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.039032006245121,
"step_cost": -0.019999999999994387,
"complete": 0.13010668748373666
},
"n_sheep": 3
},
{
"sr": 1.0,
"mean_len": 749.8666666666667,
"mean_min_pen": 3.491257842381795,
"mean_act": 1.2302732761302806,
"failure_modes": {
"SUCCESS": 30
},
"reward_per_step": {
"progress": 0.15859288932254823,
"alignment": 0.011327628562653137,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.05334281650071124,
"step_cost": -0.0199999999999947,
"complete": 0.13335704125177808
},
"n_sheep": 4
},
{
"sr": 0.9666666666666667,
"mean_len": 920.5666666666667,
"mean_min_pen": 3.2368871172269187,
"mean_act": 1.329068384219205,
"failure_modes": {
"SUCCESS": 29,
"PARTIAL_3of5": 1
},
"reward_per_step": {
"progress": 0.15654392868672135,
"alignment": 0.013497823599666012,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.05359017996161784,
"step_cost": -0.019999999999992312,
"complete": 0.10500778505992686
},
"n_sheep": 5
},
{
"sr": 0.9666666666666667,
"mean_len": 1193.2333333333333,
"mean_min_pen": 3.4217512369155885,
"mean_act": 1.3575613093489967,
"failure_modes": {
"COMPACT_CANT_DRIVE": 1,
"SUCCESS": 29
},
"reward_per_step": {
"progress": 0.15969395095863717,
"alignment": 0.017340700156353795,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.049166131240048046,
"step_cost": -0.01999999999998991,
"complete": 0.08101237533871554
},
"n_sheep": 6
},
{
"sr": 1.0,
"mean_len": 1209.4666666666667,
"mean_min_pen": 3.2339003403981526,
"mean_act": 1.3714931576761524,
"failure_modes": {
"SUCCESS": 30
},
"reward_per_step": {
"progress": 0.17738547200352864,
"alignment": 0.017914342656107935,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.057876750082681075,
"step_cost": -0.019999999999989804,
"complete": 0.08268107154668725
},
"n_sheep": 7
},
{
"sr": 1.0,
"mean_len": 1491.7666666666667,
"mean_min_pen": 3.216744065284729,
"mean_act": 1.3783802580111435,
"failure_modes": {
"SUCCESS": 30
},
"reward_per_step": {
"progress": 0.19162546125035912,
"alignment": 0.018971863842493202,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.05362768976381472,
"step_cost": -0.01999999999998829,
"complete": 0.06703461220476839
},
"n_sheep": 8
},
{
"sr": 0.9,
"mean_len": 1627.5666666666666,
"mean_min_pen": 3.23857311407725,
"mean_act": 1.3832202011732966,
"failure_modes": {
"SUCCESS": 27,
"COMPACT_CANT_DRIVE": 3
},
"reward_per_step": {
"progress": 0.18015228593205654,
"alignment": 0.020407598899987247,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.05140598439388044,
"step_cost": -0.01999999999998775,
"complete": 0.055297274049194094
},
"n_sheep": 9
},
{
"sr": 0.9333333333333333,
"mean_len": 1869.9666666666667,
"mean_min_pen": 3.1344878753026326,
"mean_act": 1.3841143385300063,
"failure_modes": {
"SUCCESS": 28,
"COMPACT_CANT_DRIVE": 2
},
"reward_per_step": {
"progress": 0.17267533684098152,
"alignment": 0.021850885374692264,
"compact": 0.0,
"wall_touch": 0.0,
"pen_bonus": 0.05222909499278062,
"step_cost": -0.019999999999986983,
"complete": 0.04991176313303267
},
"n_sheep": 10
}
]
Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.
-242
View File
@@ -1,242 +0,0 @@
Config loaded from config.json
Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_SOUTH': 0.01, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
Run dir: runs/v3
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
[Stage n_sheep=1] training 1,500,000 steps
... [1 sheep | 100,000 steps | ret(last 24)=-47.74 win_sr=12% cum_sr=12%]
... [1 sheep | 200,000 steps | ret(last 50)=-40.77 win_sr=14% cum_sr=16%]
... [1 sheep | 300,000 steps | ret(last 50)=-36.39 win_sr=16% cum_sr=16%]
... [1 sheep | 400,000 steps | ret(last 50)=-40.04 win_sr=14% cum_sr=15%]
... [1 sheep | 500,000 steps | ret(last 50)=+7.09 win_sr=80% cum_sr=36%]
... [1 sheep | 600,000 steps | ret(last 50)=+15.87 win_sr=100% cum_sr=71%]
... [1 sheep | 700,000 steps | ret(last 50)=+14.78 win_sr=100% cum_sr=84%]
... [1 sheep | 800,000 steps | ret(last 50)=+14.04 win_sr=100% cum_sr=90%]
... [1 sheep | 900,000 steps | ret(last 50)=+14.08 win_sr=100% cum_sr=92%]
... [1 sheep | 1,000,000 steps | ret(last 50)=+13.33 win_sr=100% cum_sr=94%]
... [1 sheep | 1,100,000 steps | ret(last 50)=+13.99 win_sr=100% cum_sr=95%]
... [1 sheep | 1,200,000 steps | ret(last 50)=+13.38 win_sr=100% cum_sr=96%]
... [1 sheep | 1,300,000 steps | ret(last 50)=+13.18 win_sr=100% cum_sr=96%]
... [1 sheep | 1,400,000 steps | ret(last 50)=+13.53 win_sr=100% cum_sr=97%]
... [1 sheep | 1,500,000 steps | ret(last 50)=+13.46 win_sr=100% cum_sr=97%]
[Stage n_sheep=1] evaluating 30 eps
[Stage n_sheep=1] sr=100% mean_len=264 mean_min_pen=3.7m mean_act=0.45
failure modes: SUCCESS=30
reward/step: progress=+0.1156 alignment=+0.0001 south=-0.0005 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0378 step_cost=-0.0200 complete=+0.3784
[Stage n_sheep=2] training 1,500,000 steps
... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [2 sheep | 1,607,336 steps | ret(last 35)=-3.04 win_sr=49% cum_sr=49%]
... [2 sheep | 1,707,336 steps | ret(last 50)=-11.13 win_sr=20% cum_sr=33%]
... [2 sheep | 1,807,336 steps | ret(last 50)=-11.83 win_sr=18% cum_sr=31%]
... [2 sheep | 1,907,336 steps | ret(last 50)=-8.76 win_sr=30% cum_sr=31%]
... [2 sheep | 2,007,336 steps | ret(last 50)=-8.95 win_sr=30% cum_sr=30%]
... [2 sheep | 2,107,336 steps | ret(last 50)=-9.06 win_sr=32% cum_sr=30%]
... [2 sheep | 2,207,336 steps | ret(last 50)=-9.48 win_sr=32% cum_sr=30%]
... [2 sheep | 2,307,336 steps | ret(last 50)=-1.70 win_sr=44% cum_sr=33%]
... [2 sheep | 2,407,336 steps | ret(last 50)=+5.02 win_sr=64% cum_sr=38%]
... [2 sheep | 2,507,336 steps | ret(last 50)=+13.32 win_sr=88% cum_sr=46%]
... [2 sheep | 2,607,336 steps | ret(last 50)=+12.15 win_sr=90% cum_sr=54%]
... [2 sheep | 2,707,336 steps | ret(last 50)=+17.13 win_sr=98% cum_sr=63%]
... [2 sheep | 2,807,336 steps | ret(last 50)=+18.81 win_sr=98% cum_sr=69%]
... [2 sheep | 2,907,336 steps | ret(last 50)=+16.23 win_sr=92% cum_sr=73%]
... [2 sheep | 3,007,336 steps | ret(last 50)=+18.83 win_sr=100% cum_sr=76%]
[Stage n_sheep=2] evaluating 30 eps
[Stage n_sheep=2] sr=77% mean_len=1398 mean_min_pen=3.3m mean_act=0.97
failure modes: SUCCESS=23 PARTIAL_1of2=6 COMPACT_CANT_DRIVE=1
reward/step: progress=+0.0401 alignment=+0.0045 south=-0.0039 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0126 step_cost=-0.0200 complete=+0.0549
[Stage n_sheep=3] training 1,500,000 steps
... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [3 sheep | 3,114,664 steps | ret(last 50)=+13.79 win_sr=82% cum_sr=84%]
... [3 sheep | 3,214,664 steps | ret(last 50)=+21.64 win_sr=96% cum_sr=88%]
... [3 sheep | 3,314,664 steps | ret(last 50)=+23.45 win_sr=98% cum_sr=92%]
... [3 sheep | 3,414,664 steps | ret(last 50)=+22.18 win_sr=98% cum_sr=94%]
... [3 sheep | 3,514,664 steps | ret(last 50)=+24.83 win_sr=100% cum_sr=96%]
... [3 sheep | 3,614,664 steps | ret(last 50)=+19.77 win_sr=94% cum_sr=96%]
... [3 sheep | 3,714,664 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=96%]
... [3 sheep | 3,814,664 steps | ret(last 50)=+25.24 win_sr=100% cum_sr=97%]
... [3 sheep | 3,914,664 steps | ret(last 50)=+24.43 win_sr=100% cum_sr=97%]
... [3 sheep | 4,014,664 steps | ret(last 50)=+24.59 win_sr=100% cum_sr=97%]
... [3 sheep | 4,114,664 steps | ret(last 50)=+22.18 win_sr=98% cum_sr=98%]
... [3 sheep | 4,214,664 steps | ret(last 50)=+23.11 win_sr=96% cum_sr=97%]
... [3 sheep | 4,314,664 steps | ret(last 50)=+23.06 win_sr=98% cum_sr=97%]
... [3 sheep | 4,414,664 steps | ret(last 50)=+23.35 win_sr=100% cum_sr=97%]
... [3 sheep | 4,514,664 steps | ret(last 50)=+22.50 win_sr=100% cum_sr=98%]
[Stage n_sheep=3] evaluating 30 eps
[Stage n_sheep=3] sr=97% mean_len=1095 mean_min_pen=2.5m mean_act=0.95
failure modes: SUCCESS=29 COMPACT_CANT_DRIVE=1
reward/step: progress=+0.0821 alignment=+0.0113 south=-0.0087 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0265 step_cost=-0.0200 complete=+0.0883
[Stage n_sheep=4] training 1,500,000 steps
... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [4 sheep | 4,621,992 steps | ret(last 50)=+22.17 win_sr=92% cum_sr=94%]
... [4 sheep | 4,721,992 steps | ret(last 50)=+25.81 win_sr=94% cum_sr=93%]
... [4 sheep | 4,821,992 steps | ret(last 50)=+21.80 win_sr=90% cum_sr=93%]
... [4 sheep | 4,921,992 steps | ret(last 50)=+26.38 win_sr=98% cum_sr=94%]
... [4 sheep | 5,021,992 steps | ret(last 50)=+26.65 win_sr=98% cum_sr=95%]
... [4 sheep | 5,121,992 steps | ret(last 50)=+26.07 win_sr=98% cum_sr=95%]
... [4 sheep | 5,221,992 steps | ret(last 50)=+27.08 win_sr=98% cum_sr=96%]
... [4 sheep | 5,321,992 steps | ret(last 50)=+27.87 win_sr=100% cum_sr=96%]
... [4 sheep | 5,421,992 steps | ret(last 50)=+27.53 win_sr=100% cum_sr=97%]
... [4 sheep | 5,521,992 steps | ret(last 50)=+25.91 win_sr=100% cum_sr=97%]
... [4 sheep | 5,621,992 steps | ret(last 50)=+27.75 win_sr=100% cum_sr=97%]
... [4 sheep | 5,721,992 steps | ret(last 50)=+25.63 win_sr=100% cum_sr=97%]
... [4 sheep | 5,821,992 steps | ret(last 50)=+24.43 win_sr=98% cum_sr=97%]
... [4 sheep | 5,921,992 steps | ret(last 50)=+22.52 win_sr=94% cum_sr=97%]
... [4 sheep | 6,021,992 steps | ret(last 50)=+27.28 win_sr=100% cum_sr=98%]
[Stage n_sheep=4] evaluating 30 eps
[Stage n_sheep=4] sr=57% mean_len=2572 mean_min_pen=2.2m mean_act=1.28
failure modes: SUCCESS=17 PARTIAL_1of4=6 PARTIAL_2of4=5 DROVE_NO_SHEEP=1 NEVER_COMPACT=1
reward/step: progress=+0.0455 alignment=+0.0040 south=-0.0454 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0109 step_cost=-0.0200 complete=+0.0220
[Stage n_sheep=5] training 1,500,000 steps
... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [5 sheep | 6,129,320 steps | ret(last 50)=+28.06 win_sr=96% cum_sr=96%]
... [5 sheep | 6,229,320 steps | ret(last 50)=+31.40 win_sr=98% cum_sr=96%]
... [5 sheep | 6,329,320 steps | ret(last 50)=+27.81 win_sr=96% cum_sr=96%]
... [5 sheep | 6,429,320 steps | ret(last 50)=+22.08 win_sr=88% cum_sr=95%]
... [5 sheep | 6,529,320 steps | ret(last 50)=+26.99 win_sr=94% cum_sr=95%]
... [5 sheep | 6,629,320 steps | ret(last 50)=+21.24 win_sr=86% cum_sr=93%]
... [5 sheep | 6,729,320 steps | ret(last 50)=+24.58 win_sr=94% cum_sr=93%]
... [5 sheep | 6,829,320 steps | ret(last 50)=+29.66 win_sr=96% cum_sr=93%]
... [5 sheep | 6,929,320 steps | ret(last 50)=+27.53 win_sr=96% cum_sr=93%]
... [5 sheep | 7,029,320 steps | ret(last 50)=+28.99 win_sr=100% cum_sr=94%]
... [5 sheep | 7,129,320 steps | ret(last 50)=+27.59 win_sr=98% cum_sr=94%]
... [5 sheep | 7,229,320 steps | ret(last 50)=+30.79 win_sr=100% cum_sr=95%]
... [5 sheep | 7,329,320 steps | ret(last 50)=+30.56 win_sr=98% cum_sr=95%]
... [5 sheep | 7,429,320 steps | ret(last 50)=+31.55 win_sr=100% cum_sr=95%]
... [5 sheep | 7,529,320 steps | ret(last 50)=+29.95 win_sr=100% cum_sr=96%]
[Stage n_sheep=5] evaluating 30 eps
[Stage n_sheep=5] sr=0% mean_len=4000 mean_min_pen=1.7m mean_act=1.36
failure modes: PARTIAL_4of5=17 PARTIAL_1of5=9 PARTIAL_3of5=2 PARTIAL_2of5=2
reward/step: progress=+0.0396 alignment=+0.0034 south=-0.0393 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0073 step_cost=-0.0200 complete=+0.0000
[Stage n_sheep=6] training 1,500,000 steps
... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [6 sheep | 7,636,648 steps | ret(last 50)=+34.50 win_sr=100% cum_sr=100%]
... [6 sheep | 7,736,648 steps | ret(last 50)=+31.01 win_sr=100% cum_sr=100%]
... [6 sheep | 7,836,648 steps | ret(last 50)=+33.27 win_sr=100% cum_sr=100%]
... [6 sheep | 7,936,648 steps | ret(last 50)=+34.81 win_sr=100% cum_sr=100%]
... [6 sheep | 8,036,648 steps | ret(last 50)=+32.69 win_sr=100% cum_sr=100%]
... [6 sheep | 8,136,648 steps | ret(last 50)=+31.36 win_sr=96% cum_sr=99%]
... [6 sheep | 8,236,648 steps | ret(last 50)=+33.71 win_sr=100% cum_sr=99%]
... [6 sheep | 8,336,648 steps | ret(last 50)=+34.71 win_sr=100% cum_sr=99%]
... [6 sheep | 8,436,648 steps | ret(last 50)=+31.89 win_sr=96% cum_sr=99%]
... [6 sheep | 8,536,648 steps | ret(last 50)=+35.63 win_sr=100% cum_sr=99%]
... [6 sheep | 8,636,648 steps | ret(last 50)=+35.92 win_sr=100% cum_sr=99%]
... [6 sheep | 8,736,648 steps | ret(last 50)=+33.70 win_sr=100% cum_sr=99%]
... [6 sheep | 8,836,648 steps | ret(last 50)=+33.46 win_sr=100% cum_sr=99%]
... [6 sheep | 8,936,648 steps | ret(last 50)=+35.12 win_sr=100% cum_sr=99%]
... [6 sheep | 9,036,648 steps | ret(last 50)=+34.21 win_sr=100% cum_sr=100%]
[Stage n_sheep=6] evaluating 30 eps
[Stage n_sheep=6] sr=37% mean_len=3137 mean_min_pen=1.8m mean_act=1.37
failure modes: PARTIAL_4of6=14 SUCCESS=11 PARTIAL_3of6=5
reward/step: progress=+0.0654 alignment=+0.0085 south=-0.0392 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0146 step_cost=-0.0200 complete=+0.0117
[Stage n_sheep=7] training 1,500,000 steps
... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [7 sheep | 9,143,976 steps | ret(last 50)=+36.14 win_sr=100% cum_sr=100%]
... [7 sheep | 9,243,976 steps | ret(last 50)=+33.77 win_sr=98% cum_sr=99%]
... [7 sheep | 9,343,976 steps | ret(last 50)=+37.14 win_sr=100% cum_sr=100%]
... [7 sheep | 9,443,976 steps | ret(last 50)=+39.90 win_sr=100% cum_sr=100%]
... [7 sheep | 9,543,976 steps | ret(last 50)=+37.52 win_sr=100% cum_sr=100%]
... [7 sheep | 9,643,976 steps | ret(last 50)=+37.31 win_sr=100% cum_sr=100%]
... [7 sheep | 9,743,976 steps | ret(last 50)=+36.24 win_sr=100% cum_sr=100%]
... [7 sheep | 9,843,976 steps | ret(last 50)=+39.67 win_sr=100% cum_sr=100%]
... [7 sheep | 9,943,976 steps | ret(last 50)=+39.12 win_sr=100% cum_sr=100%]
... [7 sheep | 10,043,976 steps | ret(last 50)=+37.82 win_sr=100% cum_sr=100%]
... [7 sheep | 10,143,976 steps | ret(last 50)=+37.38 win_sr=100% cum_sr=100%]
... [7 sheep | 10,243,976 steps | ret(last 50)=+37.47 win_sr=98% cum_sr=100%]
... [7 sheep | 10,343,976 steps | ret(last 50)=+36.04 win_sr=98% cum_sr=99%]
... [7 sheep | 10,443,976 steps | ret(last 50)=+31.71 win_sr=98% cum_sr=99%]
... [7 sheep | 10,543,976 steps | ret(last 50)=+32.50 win_sr=96% cum_sr=99%]
[Stage n_sheep=7] evaluating 30 eps
[Stage n_sheep=7] sr=0% mean_len=4000 mean_min_pen=1.8m mean_act=1.38
failure modes: PARTIAL_5of7=18 PARTIAL_6of7=7 PARTIAL_3of7=3 PARTIAL_4of7=2
reward/step: progress=+0.0533 alignment=+0.0069 south=-0.0356 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0124 step_cost=-0.0200 complete=+0.0000
[Stage n_sheep=8] training 1,500,000 steps
... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [8 sheep | 10,651,304 steps | ret(last 50)=+36.01 win_sr=96% cum_sr=96%]
... [8 sheep | 10,751,304 steps | ret(last 50)=+37.97 win_sr=96% cum_sr=96%]
... [8 sheep | 10,851,304 steps | ret(last 50)=+39.12 win_sr=100% cum_sr=98%]
... [8 sheep | 10,951,304 steps | ret(last 50)=+36.54 win_sr=96% cum_sr=97%]
... [8 sheep | 11,051,304 steps | ret(last 50)=+40.58 win_sr=100% cum_sr=98%]
... [8 sheep | 11,151,304 steps | ret(last 50)=+39.00 win_sr=98% cum_sr=98%]
... [8 sheep | 11,251,304 steps | ret(last 50)=+38.54 win_sr=98% cum_sr=98%]
... [8 sheep | 11,351,304 steps | ret(last 50)=+39.29 win_sr=100% cum_sr=98%]
... [8 sheep | 11,451,304 steps | ret(last 50)=+38.36 win_sr=100% cum_sr=98%]
... [8 sheep | 11,551,304 steps | ret(last 50)=+40.04 win_sr=100% cum_sr=98%]
... [8 sheep | 11,651,304 steps | ret(last 50)=+37.92 win_sr=100% cum_sr=99%]
... [8 sheep | 11,751,304 steps | ret(last 50)=+40.01 win_sr=98% cum_sr=99%]
... [8 sheep | 11,851,304 steps | ret(last 50)=+39.06 win_sr=100% cum_sr=99%]
... [8 sheep | 11,951,304 steps | ret(last 50)=+41.39 win_sr=100% cum_sr=99%]
... [8 sheep | 12,051,304 steps | ret(last 50)=+40.05 win_sr=100% cum_sr=99%]
[Stage n_sheep=8] evaluating 30 eps
[Stage n_sheep=8] sr=60% mean_len=2472 mean_min_pen=1.6m mean_act=1.39
failure modes: SUCCESS=18 PARTIAL_6of8=9 PARTIAL_4of8=3
reward/step: progress=+0.0956 alignment=+0.0106 south=-0.0508 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0283 step_cost=-0.0200 complete=+0.0243
[Stage n_sheep=9] training 1,500,000 steps
... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [9 sheep | 12,158,632 steps | ret(last 50)=+41.35 win_sr=98% cum_sr=98%]
... [9 sheep | 12,258,632 steps | ret(last 50)=+41.63 win_sr=100% cum_sr=99%]
... [9 sheep | 12,358,632 steps | ret(last 50)=+41.85 win_sr=100% cum_sr=99%]
... [9 sheep | 12,458,632 steps | ret(last 50)=+42.49 win_sr=100% cum_sr=100%]
... [9 sheep | 12,558,632 steps | ret(last 50)=+40.87 win_sr=100% cum_sr=100%]
... [9 sheep | 12,658,632 steps | ret(last 50)=+39.09 win_sr=100% cum_sr=100%]
... [9 sheep | 12,758,632 steps | ret(last 50)=+42.23 win_sr=100% cum_sr=100%]
... [9 sheep | 12,858,632 steps | ret(last 50)=+41.00 win_sr=100% cum_sr=100%]
... [9 sheep | 12,958,632 steps | ret(last 50)=+43.02 win_sr=100% cum_sr=100%]
... [9 sheep | 13,058,632 steps | ret(last 50)=+41.13 win_sr=100% cum_sr=100%]
... [9 sheep | 13,158,632 steps | ret(last 50)=+41.02 win_sr=100% cum_sr=100%]
... [9 sheep | 13,258,632 steps | ret(last 50)=+42.88 win_sr=100% cum_sr=100%]
... [9 sheep | 13,358,632 steps | ret(last 50)=+46.16 win_sr=100% cum_sr=100%]
... [9 sheep | 13,458,632 steps | ret(last 50)=+44.69 win_sr=100% cum_sr=100%]
... [9 sheep | 13,558,632 steps | ret(last 50)=+44.49 win_sr=100% cum_sr=100%]
[Stage n_sheep=9] evaluating 30 eps
[Stage n_sheep=9] sr=0% mean_len=4000 mean_min_pen=1.5m mean_act=1.39
failure modes: PARTIAL_8of9=26 PARTIAL_7of9=4
reward/step: progress=+0.0787 alignment=+0.0079 south=-0.0184 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0197 step_cost=-0.0200 complete=+0.0000
[Stage n_sheep=10] training 1,500,000 steps
... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
... [10 sheep | 13,665,960 steps | ret(last 50)=+43.38 win_sr=100% cum_sr=100%]
... [10 sheep | 13,765,960 steps | ret(last 50)=+43.26 win_sr=100% cum_sr=100%]
... [10 sheep | 13,865,960 steps | ret(last 50)=+46.91 win_sr=100% cum_sr=100%]
... [10 sheep | 13,965,960 steps | ret(last 50)=+45.36 win_sr=100% cum_sr=100%]
... [10 sheep | 14,065,960 steps | ret(last 50)=+45.37 win_sr=100% cum_sr=100%]
... [10 sheep | 14,165,960 steps | ret(last 50)=+44.30 win_sr=100% cum_sr=100%]
... [10 sheep | 14,265,960 steps | ret(last 50)=+43.83 win_sr=100% cum_sr=100%]
... [10 sheep | 14,365,960 steps | ret(last 50)=+47.09 win_sr=100% cum_sr=100%]
... [10 sheep | 14,465,960 steps | ret(last 50)=+41.32 win_sr=100% cum_sr=100%]
... [10 sheep | 14,565,960 steps | ret(last 50)=+45.30 win_sr=100% cum_sr=100%]
... [10 sheep | 14,665,960 steps | ret(last 50)=+45.36 win_sr=98% cum_sr=100%]
... [10 sheep | 14,765,960 steps | ret(last 50)=+41.83 win_sr=100% cum_sr=100%]
... [10 sheep | 14,865,960 steps | ret(last 50)=+44.40 win_sr=100% cum_sr=100%]
... [10 sheep | 14,965,960 steps | ret(last 50)=+45.89 win_sr=100% cum_sr=100%]
... [10 sheep | 15,065,960 steps | ret(last 50)=+42.49 win_sr=100% cum_sr=100%]
[Stage n_sheep=10] evaluating 30 eps
[Stage n_sheep=10] sr=83% mean_len=2243 mean_min_pen=1.5m mean_act=1.40
failure modes: SUCCESS=25 PARTIAL_8of10=3 PARTIAL_7of10=2
reward/step: progress=+0.1387 alignment=+0.0150 south=-0.0437 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0428 step_cost=-0.0200 complete=+0.0372
======================================================================
TRAINING SUMMARY
======================================================================
n_sheep=1 sr=100% len= 264 min_pen= 3.7m act=0.45
n_sheep=2 sr= 77% len= 1398 min_pen= 3.3m act=0.97
n_sheep=3 sr= 97% len= 1095 min_pen= 2.5m act=0.95
n_sheep=4 sr= 57% len= 2572 min_pen= 2.2m act=1.28
n_sheep=5 sr= 0% len= 4000 min_pen= 1.7m act=1.36
n_sheep=6 sr= 37% len= 3137 min_pen= 1.8m act=1.37
n_sheep=7 sr= 0% len= 4000 min_pen= 1.8m act=1.38
n_sheep=8 sr= 60% len= 2472 min_pen= 1.6m act=1.39
n_sheep=9 sr= 0% len= 4000 min_pen= 1.5m act=1.39
n_sheep=10 sr= 83% len= 2243 min_pen= 1.5m act=1.40
Total time: 94.3 min
Artefacts: runs/v3/
Plots: runs/v3/success_rate.png, runs/v3/eval/
-15
View File
@@ -1,15 +0,0 @@
{
"W_PER_SHEEP": 2.0,
"W_ALIGN": 0.05,
"W_PEN_BONUS": 10.0,
"W_COMPLETE": 100.0,
"W_STEP_COST": 0.02,
"W_SOUTH": 0.01,
"W_COMPACT": 0.0,
"W_WALL_TOUCH": 0.0,
"WALL_TOUCH_BUFFER": 0.4,
"ALIGN_SHAPE": "standoff",
"ALIGN_GATED": true,
"ENTRY_AWARE": true,
"ent_coef": 0.02
}
Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 248 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Some files were not shown because too many files have changed in this diff Show More