Mimics webots approach better + debug. Lucky number
@@ -19,6 +19,7 @@ Permutation-invariant by design: curriculum stages share the same obs dim
|
||||
so VecNormalize statistics transfer as n_sheep advances.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import numpy as np
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
@@ -31,7 +32,8 @@ class HerdingEnv(gym.Env):
|
||||
# World constants — must match Webots world file
|
||||
# -----------------------------------------------------------------------
|
||||
MAX_SHEEP = 10
|
||||
FIELD = 15.0 # half-size; positions ∈ [-FIELD, FIELD]
|
||||
FIELD = 15.0 # field wall geometry in world file
|
||||
SHEEP_WALL_INNER = 14.5 # sheep.py wall checks use ±14.5
|
||||
PEN_X = (10.0, 13.0)
|
||||
PEN_Y = (-15.0, -8.0)
|
||||
PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
|
||||
@@ -41,17 +43,22 @@ class HerdingEnv(gym.Env):
|
||||
# Dynamics — calibrated to match Webots robot specs
|
||||
# -----------------------------------------------------------------------
|
||||
DOG_SPEED = 2.5 # m/s
|
||||
SHEEP_FLEE_V = 0.65 # m/s
|
||||
SHEEP_WANDER_V = 0.20 # m/s
|
||||
SHEEP_FLEE_V = 0.62 # m/s (20 rad/s * 0.031 m wheel radius in sheep.py)
|
||||
SHEEP_WANDER_V = 0.093 # m/s (3 rad/s * 0.031 m wheel radius in sheep.py)
|
||||
DT = 0.1 # seconds per step
|
||||
|
||||
# Wheeled dog dynamics — mirror the Webots controller's drive():
|
||||
# forward speed gated by cos(heading_error); turn rate proportional to
|
||||
# error. Without this, the env treats the dog as a particle that can
|
||||
# change direction instantly, producing policies that bang-bang and don't
|
||||
# transfer to the wheeled Webots robot.
|
||||
DOG_K_TURN = 4.0 # rad/s per rad (heading-error gain)
|
||||
DOG_MAX_TURN_RATE = 6.0 # rad/s (cap on turn rate)
|
||||
# Differential-drive dog dynamics — mirrors shepherd_dog_rl.py drive():
|
||||
# speed_ms = ||a|| * DOG_SPEED
|
||||
# err = wrap(target_heading - heading)
|
||||
# fwd_ms = speed_ms * max(0, cos(err))
|
||||
# fwd_rad = fwd_ms / DOG_WHEEL_R
|
||||
# turn = DOG_K_TURN * err
|
||||
# l = clamp(fwd_rad - turn), r = clamp(fwd_rad + turn)
|
||||
# Then integrated as unicycle kinematics using wheel geometry.
|
||||
DOG_K_TURN = 4.0 # rad/s per rad (matches Webots controller)
|
||||
DOG_WHEEL_R = 0.038 # m (ShepherdDog.proto wheel radius)
|
||||
DOG_AXLE_TRACK = 0.28 # m (wheel anchors at y=±0.14 in proto)
|
||||
DOG_MOTOR_MAX = 70.0 # rad/s (ShepherdDog.proto motor maxVelocity)
|
||||
DOG_STOP_THRESHOLD = 0.05 # ||action|| below this → dog stops in place
|
||||
|
||||
# Boid parameters — identical to sheep.py
|
||||
@@ -135,6 +142,15 @@ class HerdingEnv(gym.Env):
|
||||
self.wander_ang = np.zeros(self.MAX_SHEEP, dtype=np.float32)
|
||||
|
||||
self._fig = None
|
||||
# Differential-drive debug CSV for sim/Webots parity checks.
|
||||
# Always on by design.
|
||||
self._dog_debug_file = open("dog_debug.csv", "w", newline="")
|
||||
self._dog_debug_writer = csv.writer(self._dog_debug_file)
|
||||
self._dog_debug_writer.writerow([
|
||||
"step", "act_x", "act_y", "act_mag", "heading", "target_heading",
|
||||
"heading_err", "fwd_speed", "left_w", "right_w", "v", "w",
|
||||
"dog_x", "dog_y",
|
||||
])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Curriculum interface
|
||||
@@ -234,12 +250,19 @@ class HerdingEnv(gym.Env):
|
||||
|
||||
act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
|
||||
old_dog = self.dog_pos.copy()
|
||||
dog_dbg = {
|
||||
"target_heading": float(self.dog_heading),
|
||||
"err": 0.0,
|
||||
"fwd_speed": 0.0,
|
||||
"left_w": 0.0,
|
||||
"right_w": 0.0,
|
||||
"v": 0.0,
|
||||
"w": 0.0,
|
||||
}
|
||||
|
||||
# Wheeled-dog kinematics — mirrors the Webots controller's drive():
|
||||
# interpret (vx, vy) as a desired velocity vector in world frame; the
|
||||
# dog turns toward it at a limited rate, and forward speed is gated
|
||||
# by cos(heading_error). Bang-bang policies still produce smooth
|
||||
# motion (the dog can't sidestep — it has to turn first).
|
||||
# Differential-drive kinematics — mirrors Webots drive():
|
||||
# action -> desired heading/speed -> wheel angular velocities (with
|
||||
# saturation) -> body linear/angular velocity via wheel geometry.
|
||||
act_mag = float(np.linalg.norm(act))
|
||||
if act_mag < self.DOG_STOP_THRESHOLD:
|
||||
# Below threshold the Webots dog stops; treat the same way here.
|
||||
@@ -249,19 +272,36 @@ class HerdingEnv(gym.Env):
|
||||
err = target_heading - self.dog_heading
|
||||
# Wrap to (-pi, pi]
|
||||
err = (err + np.pi) % (2 * np.pi) - np.pi
|
||||
turn_rate = np.clip(self.DOG_K_TURN * err,
|
||||
-self.DOG_MAX_TURN_RATE,
|
||||
self.DOG_MAX_TURN_RATE)
|
||||
self.dog_heading = float(
|
||||
((self.dog_heading + turn_rate * self.DT) + np.pi)
|
||||
% (2 * np.pi) - np.pi
|
||||
)
|
||||
|
||||
target_speed = act_mag * self.DOG_SPEED
|
||||
fwd_speed = target_speed * max(0.0, float(np.cos(err)))
|
||||
step_vec = np.array([np.cos(self.dog_heading),
|
||||
np.sin(self.dog_heading)], dtype=np.float32)
|
||||
fwd_rad = fwd_speed / self.DOG_WHEEL_R
|
||||
turn = self.DOG_K_TURN * err
|
||||
|
||||
left_w = np.clip(fwd_rad - turn, -self.DOG_MOTOR_MAX, self.DOG_MOTOR_MAX)
|
||||
right_w = np.clip(fwd_rad + turn, -self.DOG_MOTOR_MAX, self.DOG_MOTOR_MAX)
|
||||
|
||||
v = self.DOG_WHEEL_R * 0.5 * (right_w + left_w)
|
||||
w = (self.DOG_WHEEL_R / self.DOG_AXLE_TRACK) * (right_w - left_w)
|
||||
dog_dbg.update({
|
||||
"target_heading": target_heading,
|
||||
"err": float(err),
|
||||
"fwd_speed": float(fwd_speed),
|
||||
"left_w": float(left_w),
|
||||
"right_w": float(right_w),
|
||||
"v": float(v),
|
||||
"w": float(w),
|
||||
})
|
||||
|
||||
self.dog_heading = float(
|
||||
((self.dog_heading + w * self.DT) + np.pi) % (2 * np.pi) - np.pi
|
||||
)
|
||||
step_vec = np.array(
|
||||
[np.cos(self.dog_heading), np.sin(self.dog_heading)],
|
||||
dtype=np.float32
|
||||
)
|
||||
new_dog = np.clip(
|
||||
self.dog_pos + step_vec * fwd_speed * self.DT,
|
||||
self.dog_pos + step_vec * v * self.DT,
|
||||
-self.FIELD, self.FIELD,
|
||||
)
|
||||
|
||||
@@ -281,8 +321,6 @@ class HerdingEnv(gym.Env):
|
||||
self.dog_pos = new_dog.astype(np.float32)
|
||||
|
||||
for i in range(self.n_sheep):
|
||||
if self.penned[i]:
|
||||
continue
|
||||
self.sheep_pos[i] = self._step_sheep(i)
|
||||
if self._in_pen(self.sheep_pos[i]):
|
||||
self.penned[i] = True
|
||||
@@ -295,7 +333,18 @@ class HerdingEnv(gym.Env):
|
||||
terminated = n_penned == self.n_sheep
|
||||
truncated = self._step_count >= self.max_steps
|
||||
info = {"n_penned": n_penned, "n_sheep": self.n_sheep,
|
||||
"rcomps": rcomps}
|
||||
"rcomps": rcomps, "dog_dyn": dog_dbg}
|
||||
|
||||
self._dog_debug_writer.writerow([
|
||||
self._step_count,
|
||||
float(act[0]), float(act[1]), act_mag,
|
||||
float(self.dog_heading), dog_dbg["target_heading"], dog_dbg["err"],
|
||||
dog_dbg["fwd_speed"], dog_dbg["left_w"], dog_dbg["right_w"],
|
||||
dog_dbg["v"], dog_dbg["w"],
|
||||
float(self.dog_pos[0]), float(self.dog_pos[1]),
|
||||
])
|
||||
if self._step_count % 200 == 0:
|
||||
self._dog_debug_file.flush()
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
@@ -357,6 +406,7 @@ class HerdingEnv(gym.Env):
|
||||
import matplotlib.pyplot as plt
|
||||
plt.close(self._fig)
|
||||
self._fig = None
|
||||
self._dog_debug_file.close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internals
|
||||
@@ -529,6 +579,41 @@ class HerdingEnv(gym.Env):
|
||||
old_pos = self.sheep_pos[i].copy() # saved for pen wall collision check
|
||||
pos = old_pos.copy()
|
||||
fx, fy = 0.0, 0.0
|
||||
if self.penned[i]:
|
||||
# Webots latch behavior: once in pen, sheep keep moving under
|
||||
# confinement + penned-sheep separation + wander.
|
||||
pm = 0.8 # PEN_MARGIN in sheep.py
|
||||
px0, px1 = self.PEN_X
|
||||
py0, py1 = self.PEN_Y
|
||||
x, y = float(pos[0]), float(pos[1])
|
||||
if x < px0 + pm: fx += ((px0 + pm - x) / pm) * 15.0
|
||||
if x > px1 - pm: fx -= ((x - (px1 - pm)) / pm) * 15.0
|
||||
if y < py0 + pm: fy += ((py0 + pm - y) / pm) * 15.0
|
||||
if y > py1 - pm: fy -= ((y - (py1 - pm)) / pm) * 15.0
|
||||
|
||||
for j in range(self.n_sheep):
|
||||
if j == i or not self.penned[j]:
|
||||
continue
|
||||
dv = self.sheep_pos[j] - pos
|
||||
dj = float(np.linalg.norm(dv))
|
||||
if 0.05 < dj < self.SEPARATION_DIST:
|
||||
push = (self.SEPARATION_DIST - dj) / dj
|
||||
fx -= (dv[0] / dj) * push * 2.5
|
||||
fy -= (dv[1] / dj) * push * 2.5
|
||||
|
||||
if self.np_random.random() < 0.02:
|
||||
self.wander_ang[i] += float(self.np_random.uniform(-0.6, 0.6))
|
||||
fx += float(np.cos(self.wander_ang[i])) * 0.5
|
||||
fy += float(np.sin(self.wander_ang[i])) * 0.5
|
||||
|
||||
force = np.array([fx, fy], dtype=np.float32)
|
||||
mag = float(np.linalg.norm(force))
|
||||
if mag > 0.01:
|
||||
speed = min(self.SHEEP_FLEE_V, mag * 0.3)
|
||||
pos = np.clip(pos + (force / mag) * speed * self.DT,
|
||||
-self.FIELD, self.FIELD)
|
||||
return pos.astype(np.float32)
|
||||
|
||||
fleeing = False
|
||||
|
||||
# Flee from dog — quadratic ramp
|
||||
@@ -536,11 +621,19 @@ class HerdingEnv(gym.Env):
|
||||
dist = float(np.linalg.norm(diff))
|
||||
if 0.01 < dist < self.FLEE_DIST:
|
||||
t = 1.0 - dist / self.FLEE_DIST
|
||||
s = t * t * 5.0
|
||||
s = t * t * 20.0
|
||||
fx -= (diff[0] / dist) * s
|
||||
fy -= (diff[1] / dist) * s
|
||||
fleeing = True
|
||||
|
||||
# Repel unpenned sheep from pen side-wall exteriors (sheep.py PEN_EXT_MARGIN).
|
||||
if self.PEN_Y[0] < pos[1] < self.PEN_Y[1]:
|
||||
pem = 0.8
|
||||
if self.PEN_X[0] - pem < pos[0] < self.PEN_X[0]:
|
||||
fx -= ((pos[0] - (self.PEN_X[0] - pem)) / pem) * 6.0
|
||||
if self.PEN_X[1] < pos[0] < self.PEN_X[1] + pem:
|
||||
fx += ((self.PEN_X[1] + pem - pos[0]) / pem) * 6.0
|
||||
|
||||
# Separation (inverse-distance) + Cohesion
|
||||
cx, cy, cn = 0.0, 0.0, 0
|
||||
for j in range(self.n_sheep):
|
||||
@@ -562,7 +655,7 @@ class HerdingEnv(gym.Env):
|
||||
fy += (cy / cn - pos[1]) * w
|
||||
|
||||
# Wall avoidance
|
||||
m, F = self.WALL_MARGIN, self.FIELD
|
||||
m, F = self.WALL_MARGIN, self.SHEEP_WALL_INNER
|
||||
if pos[0] < -F + m: fx += ((-F + m - pos[0]) / m) * 6.0
|
||||
if pos[0] > F - m: fx -= ((pos[0] - (F - m)) / m) * 6.0
|
||||
if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
|
||||
@@ -609,5 +702,8 @@ class HerdingEnv(gym.Env):
|
||||
# Block crossing through east wall from outside
|
||||
if old_pos[0] > px1 >= pos[0] and py0 < pos[1] < py1:
|
||||
pos = np.array([px1 + 1e-3, pos[1]], dtype=np.float32)
|
||||
# Block crossing through south wall from outside
|
||||
if old_pos[1] < py0 <= pos[1] and px0 < pos[0] < px1:
|
||||
pos = np.array([pos[0], py0 - 1e-3], dtype=np.float32)
|
||||
|
||||
return pos.astype(np.float32)
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
Config loaded from config.json
|
||||
Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.01, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
|
||||
Run dir: runs/v1
|
||||
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
|
||||
|
||||
|
||||
[Stage n_sheep=1] training 1,500,000 steps
|
||||
... [1 sheep | 100,000 steps | ret(last 40)=-19.52 win_sr=2% cum_sr=2%]
|
||||
... [1 sheep | 200,000 steps | ret(last 50)=-21.11 win_sr=4% cum_sr=4%]
|
||||
... [1 sheep | 300,000 steps | ret(last 50)=-7.06 win_sr=12% cum_sr=7%]
|
||||
... [1 sheep | 400,000 steps | ret(last 50)=+18.18 win_sr=90% cum_sr=40%]
|
||||
... [1 sheep | 500,000 steps | ret(last 50)=+16.17 win_sr=100% cum_sr=69%]
|
||||
... [1 sheep | 600,000 steps | ret(last 50)=+14.68 win_sr=100% cum_sr=82%]
|
||||
... [1 sheep | 700,000 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=88%]
|
||||
... [1 sheep | 800,000 steps | ret(last 50)=+14.20 win_sr=100% cum_sr=91%]
|
||||
... [1 sheep | 900,000 steps | ret(last 50)=+13.82 win_sr=100% cum_sr=93%]
|
||||
... [1 sheep | 1,000,000 steps | ret(last 50)=+13.76 win_sr=100% cum_sr=94%]
|
||||
... [1 sheep | 1,100,000 steps | ret(last 50)=+13.72 win_sr=100% cum_sr=95%]
|
||||
... [1 sheep | 1,200,000 steps | ret(last 50)=+13.41 win_sr=100% cum_sr=95%]
|
||||
... [1 sheep | 1,300,000 steps | ret(last 50)=+13.42 win_sr=100% cum_sr=96%]
|
||||
... [1 sheep | 1,400,000 steps | ret(last 50)=+13.40 win_sr=100% cum_sr=96%]
|
||||
... [1 sheep | 1,500,000 steps | ret(last 50)=+13.24 win_sr=100% cum_sr=97%]
|
||||
[Stage n_sheep=1] evaluating 30 eps
|
||||
[Stage n_sheep=1] sr=100% mean_len=243 mean_min_pen=3.7m mean_act=0.39
|
||||
failure modes: SUCCESS=30
|
||||
reward/step: progress=+0.1141 alignment=+0.0003 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0412 step_cost=-0.0200 complete=+0.4115
|
||||
|
||||
[Stage n_sheep=2] training 1,500,000 steps
|
||||
... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [2 sheep | 1,607,336 steps | ret(last 42)=-4.07 win_sr=7% cum_sr=7%]
|
||||
... [2 sheep | 1,707,336 steps | ret(last 50)=-6.10 win_sr=0% cum_sr=4%]
|
||||
... [2 sheep | 1,807,336 steps | ret(last 50)=-5.57 win_sr=2% cum_sr=3%]
|
||||
... [2 sheep | 1,907,336 steps | ret(last 50)=-5.04 win_sr=4% cum_sr=4%]
|
||||
... [2 sheep | 2,007,336 steps | ret(last 50)=-4.27 win_sr=10% cum_sr=5%]
|
||||
... [2 sheep | 2,107,336 steps | ret(last 50)=-4.42 win_sr=6% cum_sr=5%]
|
||||
... [2 sheep | 2,207,336 steps | ret(last 50)=+4.57 win_sr=50% cum_sr=13%]
|
||||
... [2 sheep | 2,307,336 steps | ret(last 50)=+11.35 win_sr=70% cum_sr=24%]
|
||||
... [2 sheep | 2,407,336 steps | ret(last 50)=+15.75 win_sr=86% cum_sr=32%]
|
||||
... [2 sheep | 2,507,336 steps | ret(last 50)=+19.97 win_sr=100% cum_sr=44%]
|
||||
... [2 sheep | 2,607,336 steps | ret(last 50)=+20.73 win_sr=100% cum_sr=54%]
|
||||
... [2 sheep | 2,707,336 steps | ret(last 50)=+19.81 win_sr=100% cum_sr=62%]
|
||||
... [2 sheep | 2,807,336 steps | ret(last 50)=+20.83 win_sr=100% cum_sr=67%]
|
||||
... [2 sheep | 2,907,336 steps | ret(last 50)=+20.43 win_sr=100% cum_sr=72%]
|
||||
... [2 sheep | 3,007,336 steps | ret(last 50)=+19.65 win_sr=100% cum_sr=75%]
|
||||
[Stage n_sheep=2] evaluating 30 eps
|
||||
[Stage n_sheep=2] sr=63% mean_len=1325 mean_min_pen=3.1m mean_act=0.42
|
||||
failure modes: SUCCESS=19 PARTIAL_1of2=10 COMPACT_CANT_DRIVE=1
|
||||
reward/step: progress=+0.0453 alignment=+0.0065 compact=+0.0000 wall_touch=-0.0052 pen_bonus=+0.0123 step_cost=-0.0200 complete=+0.0478
|
||||
|
||||
[Stage n_sheep=3] training 1,500,000 steps
|
||||
... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [3 sheep | 3,114,664 steps | ret(last 50)=+28.02 win_sr=100% cum_sr=100%]
|
||||
... [3 sheep | 3,214,664 steps | ret(last 50)=+24.04 win_sr=96% cum_sr=99%]
|
||||
... [3 sheep | 3,314,664 steps | ret(last 50)=+27.02 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 3,414,664 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 3,514,664 steps | ret(last 50)=+25.13 win_sr=96% cum_sr=99%]
|
||||
... [3 sheep | 3,614,664 steps | ret(last 50)=+26.45 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 3,714,664 steps | ret(last 50)=+25.83 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 3,814,664 steps | ret(last 50)=+26.07 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 3,914,664 steps | ret(last 50)=+25.03 win_sr=96% cum_sr=99%]
|
||||
... [3 sheep | 4,014,664 steps | ret(last 50)=+24.53 win_sr=98% cum_sr=99%]
|
||||
... [3 sheep | 4,114,664 steps | ret(last 50)=+24.98 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 4,214,664 steps | ret(last 50)=+26.81 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 4,314,664 steps | ret(last 50)=+24.78 win_sr=98% cum_sr=99%]
|
||||
... [3 sheep | 4,414,664 steps | ret(last 50)=+26.79 win_sr=100% cum_sr=99%]
|
||||
... [3 sheep | 4,514,664 steps | ret(last 50)=+26.26 win_sr=100% cum_sr=99%]
|
||||
[Stage n_sheep=3] evaluating 30 eps
|
||||
[Stage n_sheep=3] sr=97% mean_len=828 mean_min_pen=2.7m mean_act=1.15
|
||||
failure modes: SUCCESS=29 PARTIAL_1of3=1
|
||||
reward/step: progress=+0.1017 alignment=+0.0139 compact=+0.0000 wall_touch=-0.0023 pen_bonus=+0.0354 step_cost=-0.0200 complete=+0.1168
|
||||
|
||||
[Stage n_sheep=4] training 1,500,000 steps
|
||||
... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [4 sheep | 4,621,992 steps | ret(last 50)=+28.27 win_sr=90% cum_sr=93%]
|
||||
... [4 sheep | 4,721,992 steps | ret(last 50)=+31.16 win_sr=98% cum_sr=95%]
|
||||
... [4 sheep | 4,821,992 steps | ret(last 50)=+30.45 win_sr=100% cum_sr=96%]
|
||||
... [4 sheep | 4,921,992 steps | ret(last 50)=+31.12 win_sr=100% cum_sr=96%]
|
||||
... [4 sheep | 5,021,992 steps | ret(last 50)=+30.78 win_sr=100% cum_sr=97%]
|
||||
... [4 sheep | 5,121,992 steps | ret(last 50)=+30.42 win_sr=100% cum_sr=97%]
|
||||
... [4 sheep | 5,221,992 steps | ret(last 50)=+31.14 win_sr=100% cum_sr=98%]
|
||||
... [4 sheep | 5,321,992 steps | ret(last 50)=+31.20 win_sr=100% cum_sr=98%]
|
||||
... [4 sheep | 5,421,992 steps | ret(last 50)=+30.47 win_sr=98% cum_sr=98%]
|
||||
... [4 sheep | 5,521,992 steps | ret(last 50)=+30.13 win_sr=100% cum_sr=98%]
|
||||
... [4 sheep | 5,621,992 steps | ret(last 50)=+28.52 win_sr=98% cum_sr=98%]
|
||||
... [4 sheep | 5,721,992 steps | ret(last 50)=+31.40 win_sr=100% cum_sr=98%]
|
||||
... [4 sheep | 5,821,992 steps | ret(last 50)=+30.30 win_sr=100% cum_sr=99%]
|
||||
... [4 sheep | 5,921,992 steps | ret(last 50)=+29.45 win_sr=100% cum_sr=99%]
|
||||
... [4 sheep | 6,021,992 steps | ret(last 50)=+30.26 win_sr=100% cum_sr=99%]
|
||||
[Stage n_sheep=4] evaluating 30 eps
|
||||
[Stage n_sheep=4] sr=57% mean_len=1686 mean_min_pen=1.8m mean_act=1.01
|
||||
failure modes: SUCCESS=17 PARTIAL_1of4=10 PARTIAL_2of4=3
|
||||
reward/step: progress=+0.0721 alignment=+0.0085 compact=+0.0000 wall_touch=-0.0110 pen_bonus=+0.0166 step_cost=-0.0200 complete=+0.0336
|
||||
|
||||
[Stage n_sheep=5] training 1,500,000 steps
|
||||
... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [5 sheep | 6,129,320 steps | ret(last 50)=+34.59 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,229,320 steps | ret(last 50)=+35.53 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,329,320 steps | ret(last 50)=+34.77 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,429,320 steps | ret(last 50)=+34.30 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,529,320 steps | ret(last 50)=+35.12 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,629,320 steps | ret(last 50)=+33.76 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,729,320 steps | ret(last 50)=+34.81 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,829,320 steps | ret(last 50)=+31.82 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,929,320 steps | ret(last 50)=+33.69 win_sr=98% cum_sr=100%]
|
||||
... [5 sheep | 7,029,320 steps | ret(last 50)=+31.65 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 7,129,320 steps | ret(last 50)=+31.83 win_sr=96% cum_sr=99%]
|
||||
... [5 sheep | 7,229,320 steps | ret(last 50)=+33.96 win_sr=100% cum_sr=99%]
|
||||
... [5 sheep | 7,329,320 steps | ret(last 50)=+33.65 win_sr=98% cum_sr=99%]
|
||||
... [5 sheep | 7,429,320 steps | ret(last 50)=+34.20 win_sr=100% cum_sr=99%]
|
||||
... [5 sheep | 7,529,320 steps | ret(last 50)=+35.27 win_sr=98% cum_sr=99%]
|
||||
[Stage n_sheep=5] evaluating 30 eps
|
||||
[Stage n_sheep=5] sr=63% mean_len=1654 mean_min_pen=1.6m mean_act=1.36
|
||||
failure modes: SUCCESS=19 PARTIAL_2of5=9 PARTIAL_3of5=2
|
||||
reward/step: progress=+0.1043 alignment=+0.0108 compact=+0.0000 wall_touch=-0.0100 pen_bonus=+0.0240 step_cost=-0.0200 complete=+0.0383
|
||||
|
||||
[Stage n_sheep=6] training 1,500,000 steps
|
||||
... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [6 sheep | 7,636,648 steps | ret(last 50)=+37.79 win_sr=98% cum_sr=95%]
|
||||
... [6 sheep | 7,736,648 steps | ret(last 50)=+38.87 win_sr=94% cum_sr=95%]
|
||||
... [6 sheep | 7,836,648 steps | ret(last 50)=+37.50 win_sr=98% cum_sr=96%]
|
||||
... [6 sheep | 7,936,648 steps | ret(last 50)=+39.42 win_sr=94% cum_sr=96%]
|
||||
... [6 sheep | 8,036,648 steps | ret(last 50)=+38.28 win_sr=98% cum_sr=96%]
|
||||
... [6 sheep | 8,136,648 steps | ret(last 50)=+36.39 win_sr=100% cum_sr=97%]
|
||||
... [6 sheep | 8,236,648 steps | ret(last 50)=+39.29 win_sr=100% cum_sr=97%]
|
||||
... [6 sheep | 8,336,648 steps | ret(last 50)=+37.92 win_sr=98% cum_sr=97%]
|
||||
... [6 sheep | 8,436,648 steps | ret(last 50)=+38.64 win_sr=98% cum_sr=97%]
|
||||
... [6 sheep | 8,536,648 steps | ret(last 50)=+38.46 win_sr=98% cum_sr=97%]
|
||||
... [6 sheep | 8,636,648 steps | ret(last 50)=+38.08 win_sr=98% cum_sr=97%]
|
||||
... [6 sheep | 8,736,648 steps | ret(last 50)=+36.78 win_sr=100% cum_sr=97%]
|
||||
... [6 sheep | 8,836,648 steps | ret(last 50)=+36.81 win_sr=98% cum_sr=98%]
|
||||
... [6 sheep | 8,936,648 steps | ret(last 50)=+37.89 win_sr=98% cum_sr=98%]
|
||||
... [6 sheep | 9,036,648 steps | ret(last 50)=+36.17 win_sr=98% cum_sr=98%]
|
||||
[Stage n_sheep=6] evaluating 30 eps
|
||||
[Stage n_sheep=6] sr=33% mean_len=2161 mean_min_pen=1.8m mean_act=1.37
|
||||
failure modes: PARTIAL_5of6=14 SUCCESS=10 PARTIAL_4of6=4 COMPACT_CANT_DRIVE=2
|
||||
reward/step: progress=+0.0915 alignment=+0.0102 compact=+0.0000 wall_touch=-0.0068 pen_bonus=+0.0225 step_cost=-0.0200 complete=+0.0154
|
||||
|
||||
[Stage n_sheep=7] training 1,500,000 steps
|
||||
... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [7 sheep | 9,143,976 steps | ret(last 50)=+39.61 win_sr=98% cum_sr=97%]
|
||||
... [7 sheep | 9,243,976 steps | ret(last 50)=+42.39 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 9,343,976 steps | ret(last 50)=+39.89 win_sr=96% cum_sr=98%]
|
||||
... [7 sheep | 9,443,976 steps | ret(last 50)=+42.48 win_sr=98% cum_sr=98%]
|
||||
... [7 sheep | 9,543,976 steps | ret(last 50)=+39.16 win_sr=98% cum_sr=98%]
|
||||
... [7 sheep | 9,643,976 steps | ret(last 50)=+38.80 win_sr=96% cum_sr=98%]
|
||||
... [7 sheep | 9,743,976 steps | ret(last 50)=+43.06 win_sr=96% cum_sr=98%]
|
||||
... [7 sheep | 9,843,976 steps | ret(last 50)=+40.04 win_sr=94% cum_sr=98%]
|
||||
... [7 sheep | 9,943,976 steps | ret(last 50)=+40.45 win_sr=98% cum_sr=97%]
|
||||
... [7 sheep | 10,043,976 steps | ret(last 50)=+39.21 win_sr=96% cum_sr=97%]
|
||||
... [7 sheep | 10,143,976 steps | ret(last 50)=+40.23 win_sr=100% cum_sr=97%]
|
||||
... [7 sheep | 10,243,976 steps | ret(last 50)=+41.51 win_sr=96% cum_sr=97%]
|
||||
... [7 sheep | 10,343,976 steps | ret(last 50)=+40.05 win_sr=98% cum_sr=97%]
|
||||
... [7 sheep | 10,443,976 steps | ret(last 50)=+39.17 win_sr=96% cum_sr=97%]
|
||||
... [7 sheep | 10,543,976 steps | ret(last 50)=+41.80 win_sr=98% cum_sr=97%]
|
||||
[Stage n_sheep=7] evaluating 30 eps
|
||||
[Stage n_sheep=7] sr=20% mean_len=2211 mean_min_pen=1.8m mean_act=1.36
|
||||
failure modes: PARTIAL_4of7=11 PARTIAL_3of7=8 SUCCESS=6 PARTIAL_6of7=3 PARTIAL_5of7=2
|
||||
reward/step: progress=+0.0879 alignment=+0.0086 compact=+0.0000 wall_touch=-0.0142 pen_bonus=+0.0208 step_cost=-0.0200 complete=+0.0090
|
||||
|
||||
[Stage n_sheep=8] training 1,500,000 steps
|
||||
... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [8 sheep | 10,651,304 steps | ret(last 50)=+48.67 win_sr=96% cum_sr=97%]
|
||||
... [8 sheep | 10,751,304 steps | ret(last 50)=+46.60 win_sr=96% cum_sr=97%]
|
||||
... [8 sheep | 10,851,304 steps | ret(last 50)=+41.39 win_sr=98% cum_sr=96%]
|
||||
... [8 sheep | 10,951,304 steps | ret(last 50)=+41.47 win_sr=96% cum_sr=96%]
|
||||
... [8 sheep | 11,051,304 steps | ret(last 50)=+40.29 win_sr=96% cum_sr=95%]
|
||||
... [8 sheep | 11,151,304 steps | ret(last 50)=+42.96 win_sr=100% cum_sr=96%]
|
||||
... [8 sheep | 11,251,304 steps | ret(last 50)=+42.87 win_sr=94% cum_sr=96%]
|
||||
... [8 sheep | 11,351,304 steps | ret(last 50)=+44.71 win_sr=100% cum_sr=96%]
|
||||
... [8 sheep | 11,451,304 steps | ret(last 50)=+45.20 win_sr=96% cum_sr=96%]
|
||||
... [8 sheep | 11,551,304 steps | ret(last 50)=+46.82 win_sr=96% cum_sr=96%]
|
||||
... [8 sheep | 11,651,304 steps | ret(last 50)=+43.23 win_sr=96% cum_sr=96%]
|
||||
... [8 sheep | 11,751,304 steps | ret(last 50)=+43.77 win_sr=94% cum_sr=96%]
|
||||
... [8 sheep | 11,851,304 steps | ret(last 50)=+48.78 win_sr=98% cum_sr=96%]
|
||||
... [8 sheep | 11,951,304 steps | ret(last 50)=+43.19 win_sr=94% cum_sr=96%]
|
||||
... [8 sheep | 12,051,304 steps | ret(last 50)=+42.83 win_sr=96% cum_sr=96%]
|
||||
[Stage n_sheep=8] evaluating 30 eps
|
||||
[Stage n_sheep=8] sr=63% mean_len=1745 mean_min_pen=1.7m mean_act=1.37
|
||||
failure modes: SUCCESS=19 PARTIAL_4of8=9 PARTIAL_1of8=1 PARTIAL_6of8=1
|
||||
reward/step: progress=+0.1198 alignment=+0.0134 compact=+0.0000 wall_touch=-0.0107 pen_bonus=+0.0373 step_cost=-0.0200 complete=+0.0363
|
||||
|
||||
[Stage n_sheep=9] training 1,500,000 steps
|
||||
... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [9 sheep | 12,158,632 steps | ret(last 50)=+49.04 win_sr=98% cum_sr=97%]
|
||||
... [9 sheep | 12,258,632 steps | ret(last 50)=+47.01 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,358,632 steps | ret(last 50)=+48.47 win_sr=90% cum_sr=95%]
|
||||
... [9 sheep | 12,458,632 steps | ret(last 50)=+46.43 win_sr=88% cum_sr=94%]
|
||||
... [9 sheep | 12,558,632 steps | ret(last 50)=+44.78 win_sr=94% cum_sr=94%]
|
||||
... [9 sheep | 12,658,632 steps | ret(last 50)=+49.15 win_sr=100% cum_sr=95%]
|
||||
... [9 sheep | 12,758,632 steps | ret(last 50)=+47.87 win_sr=94% cum_sr=95%]
|
||||
... [9 sheep | 12,858,632 steps | ret(last 50)=+50.32 win_sr=96% cum_sr=95%]
|
||||
... [9 sheep | 12,958,632 steps | ret(last 50)=+47.07 win_sr=94% cum_sr=95%]
|
||||
... [9 sheep | 13,058,632 steps | ret(last 50)=+48.71 win_sr=100% cum_sr=96%]
|
||||
... [9 sheep | 13,158,632 steps | ret(last 50)=+47.69 win_sr=96% cum_sr=96%]
|
||||
... [9 sheep | 13,258,632 steps | ret(last 50)=+46.83 win_sr=98% cum_sr=96%]
|
||||
... [9 sheep | 13,358,632 steps | ret(last 50)=+48.27 win_sr=94% cum_sr=96%]
|
||||
... [9 sheep | 13,458,632 steps | ret(last 50)=+47.61 win_sr=88% cum_sr=95%]
|
||||
... [9 sheep | 13,558,632 steps | ret(last 50)=+47.29 win_sr=96% cum_sr=95%]
|
||||
[Stage n_sheep=9] evaluating 30 eps
|
||||
[Stage n_sheep=9] sr=83% mean_len=1723 mean_min_pen=1.8m mean_act=1.38
|
||||
failure modes: SUCCESS=25 PARTIAL_5of9=3 NEVER_COMPACT=1 PARTIAL_6of9=1
|
||||
reward/step: progress=+0.1562 alignment=+0.0155 compact=+0.0000 wall_touch=-0.0073 pen_bonus=+0.0480 step_cost=-0.0200 complete=+0.0484
|
||||
|
||||
[Stage n_sheep=10] training 1,500,000 steps
|
||||
... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [10 sheep | 13,665,960 steps | ret(last 50)=+50.02 win_sr=94% cum_sr=95%]
|
||||
... [10 sheep | 13,765,960 steps | ret(last 50)=+48.46 win_sr=84% cum_sr=90%]
|
||||
... [10 sheep | 13,865,960 steps | ret(last 50)=+48.24 win_sr=84% cum_sr=87%]
|
||||
... [10 sheep | 13,965,960 steps | ret(last 50)=+50.64 win_sr=82% cum_sr=86%]
|
||||
... [10 sheep | 14,065,960 steps | ret(last 50)=+53.92 win_sr=86% cum_sr=86%]
|
||||
... [10 sheep | 14,165,960 steps | ret(last 50)=+51.33 win_sr=80% cum_sr=85%]
|
||||
... [10 sheep | 14,265,960 steps | ret(last 50)=+49.55 win_sr=76% cum_sr=84%]
|
||||
... [10 sheep | 14,365,960 steps | ret(last 50)=+51.05 win_sr=92% cum_sr=85%]
|
||||
... [10 sheep | 14,465,960 steps | ret(last 50)=+47.58 win_sr=86% cum_sr=85%]
|
||||
... [10 sheep | 14,565,960 steps | ret(last 50)=+47.98 win_sr=74% cum_sr=84%]
|
||||
... [10 sheep | 14,665,960 steps | ret(last 50)=+50.60 win_sr=82% cum_sr=84%]
|
||||
... [10 sheep | 14,765,960 steps | ret(last 50)=+51.25 win_sr=88% cum_sr=84%]
|
||||
... [10 sheep | 14,865,960 steps | ret(last 50)=+50.54 win_sr=92% cum_sr=85%]
|
||||
... [10 sheep | 14,965,960 steps | ret(last 50)=+50.94 win_sr=92% cum_sr=86%]
|
||||
... [10 sheep | 15,065,960 steps | ret(last 50)=+50.54 win_sr=90% cum_sr=86%]
|
||||
[Stage n_sheep=10] evaluating 30 eps
|
||||
[Stage n_sheep=10] sr=27% mean_len=2267 mean_min_pen=2.2m mean_act=1.38
|
||||
failure modes: PARTIAL_6of10=16 SUCCESS=8 COMPACT_CANT_DRIVE=2 PARTIAL_7of10=1 PARTIAL_9of10=1 PARTIAL_5of10=1 PARTIAL_8of10=1
|
||||
reward/step: progress=+0.1360 alignment=+0.0134 compact=+0.0000 wall_touch=-0.0122 pen_bonus=+0.0301 step_cost=-0.0200 complete=+0.0118
|
||||
|
||||
======================================================================
|
||||
TRAINING SUMMARY
|
||||
======================================================================
|
||||
n_sheep=1 sr=100% len= 243 min_pen= 3.7m act=0.39
|
||||
n_sheep=2 sr= 63% len= 1325 min_pen= 3.1m act=0.42
|
||||
n_sheep=3 sr= 97% len= 828 min_pen= 2.7m act=1.15
|
||||
n_sheep=4 sr= 57% len= 1686 min_pen= 1.8m act=1.01
|
||||
n_sheep=5 sr= 63% len= 1654 min_pen= 1.6m act=1.36
|
||||
n_sheep=6 sr= 33% len= 2161 min_pen= 1.8m act=1.37
|
||||
n_sheep=7 sr= 20% len= 2211 min_pen= 1.8m act=1.36
|
||||
n_sheep=8 sr= 63% len= 1745 min_pen= 1.7m act=1.37
|
||||
n_sheep=9 sr= 83% len= 1723 min_pen= 1.8m act=1.38
|
||||
n_sheep=10 sr= 27% len= 2267 min_pen= 2.2m act=1.38
|
||||
|
||||
Total time: 97.6 min
|
||||
Artefacts: runs/v1/
|
||||
Plots: runs/v1/success_rate.png, runs/v1/eval/
|
||||
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"W_PER_SHEEP": 2.0,
|
||||
"W_ALIGN": 0.05,
|
||||
"W_PEN_BONUS": 10.0,
|
||||
"W_COMPLETE": 100.0,
|
||||
"W_STEP_COST": 0.02,
|
||||
"W_COMPACT": 0.0,
|
||||
"W_WALL_TOUCH": 0.01,
|
||||
"WALL_TOUCH_BUFFER": 0.4,
|
||||
"ALIGN_SHAPE": "standoff",
|
||||
"ALIGN_GATED": true,
|
||||
"ENTRY_AWARE": true,
|
||||
"ent_coef": 0.02
|
||||
}
|
||||
|
Before Width: | Height: | Size: 186 KiB |
|
Before Width: | Height: | Size: 41 KiB |
|
Before Width: | Height: | Size: 85 KiB |
|
Before Width: | Height: | Size: 64 KiB |
|
Before Width: | Height: | Size: 67 KiB |
|
Before Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 151 KiB |
|
Before Width: | Height: | Size: 131 KiB |
|
Before Width: | Height: | Size: 180 KiB |
|
Before Width: | Height: | Size: 110 KiB |
|
Before Width: | Height: | Size: 200 KiB |
|
Before Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 201 KiB |
|
Before Width: | Height: | Size: 133 KiB |
|
Before Width: | Height: | Size: 155 KiB |
|
Before Width: | Height: | Size: 109 KiB |
|
Before Width: | Height: | Size: 150 KiB |
|
Before Width: | Height: | Size: 196 KiB |
|
Before Width: | Height: | Size: 184 KiB |
|
Before Width: | Height: | Size: 152 KiB |
@@ -1,218 +0,0 @@
|
||||
[
|
||||
{
|
||||
"sr": 1.0,
|
||||
"mean_len": 243.0,
|
||||
"mean_min_pen": 3.7120999256769815,
|
||||
"mean_act": 0.3930775734995823,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 30
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.11411363949746262,
|
||||
"alignment": 0.00034729298515464674,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.0411522633744856,
|
||||
"step_cost": -0.020000000000000108,
|
||||
"complete": 0.411522633744856
|
||||
},
|
||||
"n_sheep": 1
|
||||
},
|
||||
{
|
||||
"sr": 0.6333333333333333,
|
||||
"mean_len": 1324.9333333333334,
|
||||
"mean_min_pen": 3.108120004336039,
|
||||
"mean_act": 0.41626948835668365,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 19,
|
||||
"PARTIAL_1of2": 10,
|
||||
"COMPACT_CANT_DRIVE": 1
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.045259184195888084,
|
||||
"alignment": 0.006548802090560675,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.005243643148915256,
|
||||
"pen_bonus": 0.012327664284995472,
|
||||
"step_cost": -0.019999999999989106,
|
||||
"complete": 0.04780114722753346
|
||||
},
|
||||
"n_sheep": 2
|
||||
},
|
||||
{
|
||||
"sr": 0.9666666666666667,
|
||||
"mean_len": 827.7,
|
||||
"mean_min_pen": 2.727696478366852,
|
||||
"mean_act": 1.1521936838813016,
|
||||
"failure_modes": {
|
||||
"PARTIAL_1of3": 1,
|
||||
"SUCCESS": 29
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.10166334638295625,
|
||||
"alignment": 0.013859153429505626,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.0022604217500245883,
|
||||
"pen_bonus": 0.03543957150336273,
|
||||
"step_cost": -0.019999999999993488,
|
||||
"complete": 0.11678949699971809
|
||||
},
|
||||
"n_sheep": 3
|
||||
},
|
||||
{
|
||||
"sr": 0.5666666666666667,
|
||||
"mean_len": 1686.0333333333333,
|
||||
"mean_min_pen": 1.7675368865331014,
|
||||
"mean_act": 1.0093803780622697,
|
||||
"failure_modes": {
|
||||
"PARTIAL_1of4": 10,
|
||||
"SUCCESS": 17,
|
||||
"PARTIAL_2of4": 3
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.07213990871824405,
|
||||
"alignment": 0.008500170591885925,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.01096873654520888,
|
||||
"pen_bonus": 0.016607026353769202,
|
||||
"step_cost": -0.019999999999987545,
|
||||
"complete": 0.03360945809691386
|
||||
},
|
||||
"n_sheep": 4
|
||||
},
|
||||
{
|
||||
"sr": 0.6333333333333333,
|
||||
"mean_len": 1653.8333333333333,
|
||||
"mean_min_pen": 1.6310479640960693,
|
||||
"mean_act": 1.3572492104366454,
|
||||
"failure_modes": {
|
||||
"PARTIAL_2of5": 9,
|
||||
"SUCCESS": 19,
|
||||
"PARTIAL_3of5": 2
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.10426509678994506,
|
||||
"alignment": 0.010847962450905363,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.010001784418012447,
|
||||
"pen_bonus": 0.02398468205179885,
|
||||
"step_cost": -0.019999999999987656,
|
||||
"complete": 0.038294870502872114
|
||||
},
|
||||
"n_sheep": 5
|
||||
},
|
||||
{
|
||||
"sr": 0.3333333333333333,
|
||||
"mean_len": 2161.0333333333333,
|
||||
"mean_min_pen": 1.7910769859949747,
|
||||
"mean_act": 1.3728399181766682,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 10,
|
||||
"PARTIAL_5of6": 14,
|
||||
"PARTIAL_4of6": 4,
|
||||
"COMPACT_CANT_DRIVE": 2
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.09152597398477412,
|
||||
"alignment": 0.010169068168091603,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.006849364742307595,
|
||||
"pen_bonus": 0.022520090697351575,
|
||||
"step_cost": -0.019999999999986286,
|
||||
"complete": 0.015424719655720258
|
||||
},
|
||||
"n_sheep": 6
|
||||
},
|
||||
{
|
||||
"sr": 0.2,
|
||||
"mean_len": 2211.2,
|
||||
"mean_min_pen": 1.8339664101600648,
|
||||
"mean_act": 1.3635542380694952,
|
||||
"failure_modes": {
|
||||
"PARTIAL_5of7": 2,
|
||||
"SUCCESS": 6,
|
||||
"PARTIAL_6of7": 3,
|
||||
"PARTIAL_3of7": 8,
|
||||
"PARTIAL_4of7": 11
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.08794138462674025,
|
||||
"alignment": 0.008588877237149285,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.014176997336213705,
|
||||
"pen_bonus": 0.020803183791606367,
|
||||
"step_cost": -0.019999999999986185,
|
||||
"complete": 0.009044862518089725
|
||||
},
|
||||
"n_sheep": 7
|
||||
},
|
||||
{
|
||||
"sr": 0.6333333333333333,
|
||||
"mean_len": 1744.5666666666666,
|
||||
"mean_min_pen": 1.7331914146741232,
|
||||
"mean_act": 1.366222499606064,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 19,
|
||||
"PARTIAL_4of8": 9,
|
||||
"PARTIAL_1of8": 1,
|
||||
"PARTIAL_6of8": 1
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.11981066786559799,
|
||||
"alignment": 0.013385751275637974,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.010737474453107049,
|
||||
"pen_bonus": 0.037258536026138295,
|
||||
"step_cost": -0.019999999999987354,
|
||||
"complete": 0.03630318894854501
|
||||
},
|
||||
"n_sheep": 8
|
||||
},
|
||||
{
|
||||
"sr": 0.8333333333333334,
|
||||
"mean_len": 1723.1333333333334,
|
||||
"mean_min_pen": 1.7584208091100058,
|
||||
"mean_act": 1.3848404770822742,
|
||||
"failure_modes": {
|
||||
"NEVER_COMPACT": 1,
|
||||
"SUCCESS": 25,
|
||||
"PARTIAL_5of9": 3,
|
||||
"PARTIAL_6of9": 1
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.1561655017464111,
|
||||
"alignment": 0.01548957874142236,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.00728069638883058,
|
||||
"pen_bonus": 0.04797461987851588,
|
||||
"step_cost": -0.019999999999987424,
|
||||
"complete": 0.048361511974310364
|
||||
},
|
||||
"n_sheep": 9
|
||||
},
|
||||
{
|
||||
"sr": 0.26666666666666666,
|
||||
"mean_len": 2266.5333333333333,
|
||||
"mean_min_pen": 2.1789512236913047,
|
||||
"mean_act": 1.3836169439830954,
|
||||
"failure_modes": {
|
||||
"PARTIAL_6of10": 16,
|
||||
"SUCCESS": 8,
|
||||
"PARTIAL_7of10": 1,
|
||||
"PARTIAL_9of10": 1,
|
||||
"PARTIAL_5of10": 1,
|
||||
"COMPACT_CANT_DRIVE": 2,
|
||||
"PARTIAL_8of10": 1
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.13603502511276877,
|
||||
"alignment": 0.013359252519942029,
|
||||
"compact": 0.0,
|
||||
"wall_touch": -0.012232639033891629,
|
||||
"pen_bonus": 0.030148832284252015,
|
||||
"step_cost": -0.019999999999986078,
|
||||
"complete": 0.011765397964586153
|
||||
},
|
||||
"n_sheep": 10
|
||||
}
|
||||
]
|
||||
|
Before Width: | Height: | Size: 33 KiB |
@@ -1,242 +0,0 @@
|
||||
Config loaded from config.json
|
||||
Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
|
||||
Run dir: runs/v2
|
||||
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
|
||||
|
||||
|
||||
[Stage n_sheep=1] training 1,500,000 steps
|
||||
... [1 sheep | 100,000 steps | ret(last 40)=-23.39 win_sr=8% cum_sr=8%]
|
||||
... [1 sheep | 200,000 steps | ret(last 50)=-22.10 win_sr=10% cum_sr=9%]
|
||||
... [1 sheep | 300,000 steps | ret(last 50)=-23.02 win_sr=10% cum_sr=10%]
|
||||
... [1 sheep | 400,000 steps | ret(last 50)=-18.97 win_sr=18% cum_sr=12%]
|
||||
... [1 sheep | 500,000 steps | ret(last 50)=-20.01 win_sr=8% cum_sr=11%]
|
||||
... [1 sheep | 600,000 steps | ret(last 50)=-18.57 win_sr=14% cum_sr=12%]
|
||||
... [1 sheep | 700,000 steps | ret(last 50)=-17.55 win_sr=22% cum_sr=14%]
|
||||
... [1 sheep | 800,000 steps | ret(last 50)=+7.41 win_sr=66% cum_sr=23%]
|
||||
... [1 sheep | 900,000 steps | ret(last 50)=+17.61 win_sr=100% cum_sr=47%]
|
||||
... [1 sheep | 1,000,000 steps | ret(last 50)=+16.11 win_sr=100% cum_sr=65%]
|
||||
... [1 sheep | 1,100,000 steps | ret(last 50)=+15.82 win_sr=100% cum_sr=74%]
|
||||
... [1 sheep | 1,200,000 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=80%]
|
||||
... [1 sheep | 1,300,000 steps | ret(last 50)=+14.19 win_sr=100% cum_sr=84%]
|
||||
... [1 sheep | 1,400,000 steps | ret(last 50)=+14.00 win_sr=100% cum_sr=87%]
|
||||
... [1 sheep | 1,500,000 steps | ret(last 50)=+13.96 win_sr=100% cum_sr=89%]
|
||||
[Stage n_sheep=1] evaluating 30 eps
|
||||
[Stage n_sheep=1] sr=100% mean_len=234 mean_min_pen=3.7m mean_act=0.41
|
||||
failure modes: SUCCESS=30
|
||||
reward/step: progress=+0.1118 alignment=+0.0003 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0427 step_cost=-0.0200 complete=+0.4274
|
||||
|
||||
[Stage n_sheep=2] training 1,500,000 steps
|
||||
... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [2 sheep | 1,607,336 steps | ret(last 40)=-4.45 win_sr=8% cum_sr=8%]
|
||||
... [2 sheep | 1,707,336 steps | ret(last 50)=-4.56 win_sr=8% cum_sr=9%]
|
||||
... [2 sheep | 1,807,336 steps | ret(last 50)=-2.33 win_sr=12% cum_sr=10%]
|
||||
... [2 sheep | 1,907,336 steps | ret(last 50)=+1.93 win_sr=24% cum_sr=14%]
|
||||
... [2 sheep | 2,007,336 steps | ret(last 50)=+7.32 win_sr=52% cum_sr=24%]
|
||||
... [2 sheep | 2,107,336 steps | ret(last 50)=+10.52 win_sr=58% cum_sr=30%]
|
||||
... [2 sheep | 2,207,336 steps | ret(last 50)=+15.67 win_sr=76% cum_sr=39%]
|
||||
... [2 sheep | 2,307,336 steps | ret(last 50)=+16.91 win_sr=78% cum_sr=46%]
|
||||
... [2 sheep | 2,407,336 steps | ret(last 50)=+21.91 win_sr=96% cum_sr=53%]
|
||||
... [2 sheep | 2,507,336 steps | ret(last 50)=+21.08 win_sr=94% cum_sr=60%]
|
||||
... [2 sheep | 2,607,336 steps | ret(last 50)=+20.24 win_sr=92% cum_sr=65%]
|
||||
... [2 sheep | 2,707,336 steps | ret(last 50)=+21.40 win_sr=96% cum_sr=70%]
|
||||
... [2 sheep | 2,807,336 steps | ret(last 50)=+21.95 win_sr=100% cum_sr=73%]
|
||||
... [2 sheep | 2,907,336 steps | ret(last 50)=+20.73 win_sr=100% cum_sr=76%]
|
||||
... [2 sheep | 3,007,336 steps | ret(last 50)=+21.25 win_sr=100% cum_sr=79%]
|
||||
[Stage n_sheep=2] evaluating 30 eps
|
||||
[Stage n_sheep=2] sr=87% mean_len=1064 mean_min_pen=4.1m mean_act=0.59
|
||||
failure modes: SUCCESS=26 COMPACT_CANT_DRIVE=4
|
||||
reward/step: progress=+0.0565 alignment=+0.0071 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0163 step_cost=-0.0200 complete=+0.0815
|
||||
|
||||
[Stage n_sheep=3] training 1,500,000 steps
|
||||
... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [3 sheep | 3,114,664 steps | ret(last 50)=+17.60 win_sr=72% cum_sr=73%]
|
||||
... [3 sheep | 3,214,664 steps | ret(last 50)=+25.44 win_sr=98% cum_sr=87%]
|
||||
... [3 sheep | 3,314,664 steps | ret(last 50)=+25.73 win_sr=92% cum_sr=90%]
|
||||
... [3 sheep | 3,414,664 steps | ret(last 50)=+28.01 win_sr=98% cum_sr=92%]
|
||||
... [3 sheep | 3,514,664 steps | ret(last 50)=+25.71 win_sr=94% cum_sr=93%]
|
||||
... [3 sheep | 3,614,664 steps | ret(last 50)=+24.73 win_sr=94% cum_sr=93%]
|
||||
... [3 sheep | 3,714,664 steps | ret(last 50)=+23.51 win_sr=88% cum_sr=92%]
|
||||
... [3 sheep | 3,814,664 steps | ret(last 50)=+25.11 win_sr=96% cum_sr=93%]
|
||||
... [3 sheep | 3,914,664 steps | ret(last 50)=+27.02 win_sr=100% cum_sr=93%]
|
||||
... [3 sheep | 4,014,664 steps | ret(last 50)=+24.67 win_sr=94% cum_sr=94%]
|
||||
... [3 sheep | 4,114,664 steps | ret(last 50)=+26.08 win_sr=98% cum_sr=94%]
|
||||
... [3 sheep | 4,214,664 steps | ret(last 50)=+26.69 win_sr=98% cum_sr=94%]
|
||||
... [3 sheep | 4,314,664 steps | ret(last 50)=+24.01 win_sr=92% cum_sr=94%]
|
||||
... [3 sheep | 4,414,664 steps | ret(last 50)=+25.74 win_sr=98% cum_sr=94%]
|
||||
... [3 sheep | 4,514,664 steps | ret(last 50)=+27.43 win_sr=100% cum_sr=95%]
|
||||
[Stage n_sheep=3] evaluating 30 eps
|
||||
[Stage n_sheep=3] sr=100% mean_len=769 mean_min_pen=3.5m mean_act=0.72
|
||||
failure modes: SUCCESS=30
|
||||
reward/step: progress=+0.1121 alignment=+0.0078 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0390 step_cost=-0.0200 complete=+0.1301
|
||||
|
||||
[Stage n_sheep=4] training 1,500,000 steps
|
||||
... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [4 sheep | 4,621,992 steps | ret(last 50)=+32.50 win_sr=100% cum_sr=96%]
|
||||
... [4 sheep | 4,721,992 steps | ret(last 50)=+31.21 win_sr=100% cum_sr=98%]
|
||||
... [4 sheep | 4,821,992 steps | ret(last 50)=+34.05 win_sr=100% cum_sr=99%]
|
||||
... [4 sheep | 4,921,992 steps | ret(last 50)=+32.04 win_sr=100% cum_sr=99%]
|
||||
... [4 sheep | 5,021,992 steps | ret(last 50)=+29.20 win_sr=100% cum_sr=99%]
|
||||
... [4 sheep | 5,121,992 steps | ret(last 50)=+31.56 win_sr=100% cum_sr=99%]
|
||||
... [4 sheep | 5,221,992 steps | ret(last 50)=+31.25 win_sr=100% cum_sr=100%]
|
||||
... [4 sheep | 5,321,992 steps | ret(last 50)=+30.62 win_sr=100% cum_sr=100%]
|
||||
... [4 sheep | 5,421,992 steps | ret(last 50)=+30.44 win_sr=100% cum_sr=100%]
|
||||
... [4 sheep | 5,521,992 steps | ret(last 50)=+32.84 win_sr=100% cum_sr=100%]
|
||||
... [4 sheep | 5,621,992 steps | ret(last 50)=+30.98 win_sr=100% cum_sr=100%]
|
||||
... [4 sheep | 5,721,992 steps | ret(last 50)=+28.77 win_sr=98% cum_sr=100%]
|
||||
... [4 sheep | 5,821,992 steps | ret(last 50)=+29.24 win_sr=100% cum_sr=100%]
|
||||
... [4 sheep | 5,921,992 steps | ret(last 50)=+30.83 win_sr=100% cum_sr=100%]
|
||||
... [4 sheep | 6,021,992 steps | ret(last 50)=+30.06 win_sr=100% cum_sr=100%]
|
||||
[Stage n_sheep=4] evaluating 30 eps
|
||||
[Stage n_sheep=4] sr=100% mean_len=750 mean_min_pen=3.5m mean_act=1.23
|
||||
failure modes: SUCCESS=30
|
||||
reward/step: progress=+0.1586 alignment=+0.0113 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0533 step_cost=-0.0200 complete=+0.1334
|
||||
|
||||
[Stage n_sheep=5] training 1,500,000 steps
|
||||
... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [5 sheep | 6,129,320 steps | ret(last 50)=+31.97 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,229,320 steps | ret(last 50)=+32.32 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,329,320 steps | ret(last 50)=+34.26 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,429,320 steps | ret(last 50)=+33.75 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,529,320 steps | ret(last 50)=+34.77 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,629,320 steps | ret(last 50)=+34.06 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,729,320 steps | ret(last 50)=+32.39 win_sr=96% cum_sr=100%]
|
||||
... [5 sheep | 6,829,320 steps | ret(last 50)=+32.33 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 6,929,320 steps | ret(last 50)=+33.29 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 7,029,320 steps | ret(last 50)=+32.12 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 7,129,320 steps | ret(last 50)=+32.58 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 7,229,320 steps | ret(last 50)=+33.27 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 7,329,320 steps | ret(last 50)=+33.64 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 7,429,320 steps | ret(last 50)=+32.67 win_sr=100% cum_sr=100%]
|
||||
... [5 sheep | 7,529,320 steps | ret(last 50)=+32.79 win_sr=100% cum_sr=100%]
|
||||
[Stage n_sheep=5] evaluating 30 eps
|
||||
[Stage n_sheep=5] sr=97% mean_len=921 mean_min_pen=3.2m mean_act=1.33
|
||||
failure modes: SUCCESS=29 PARTIAL_3of5=1
|
||||
reward/step: progress=+0.1565 alignment=+0.0135 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0536 step_cost=-0.0200 complete=+0.1050
|
||||
|
||||
[Stage n_sheep=6] training 1,500,000 steps
|
||||
... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [6 sheep | 7,636,648 steps | ret(last 50)=+35.93 win_sr=100% cum_sr=96%]
|
||||
... [6 sheep | 7,736,648 steps | ret(last 50)=+37.56 win_sr=100% cum_sr=97%]
|
||||
... [6 sheep | 7,836,648 steps | ret(last 50)=+34.93 win_sr=100% cum_sr=98%]
|
||||
... [6 sheep | 7,936,648 steps | ret(last 50)=+32.71 win_sr=98% cum_sr=98%]
|
||||
... [6 sheep | 8,036,648 steps | ret(last 50)=+36.84 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,136,648 steps | ret(last 50)=+35.11 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,236,648 steps | ret(last 50)=+36.54 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,336,648 steps | ret(last 50)=+34.67 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,436,648 steps | ret(last 50)=+36.14 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,536,648 steps | ret(last 50)=+36.95 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,636,648 steps | ret(last 50)=+35.42 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,736,648 steps | ret(last 50)=+33.44 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 8,836,648 steps | ret(last 50)=+36.70 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 8,936,648 steps | ret(last 50)=+34.03 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 9,036,648 steps | ret(last 50)=+34.53 win_sr=100% cum_sr=100%]
|
||||
[Stage n_sheep=6] evaluating 30 eps
|
||||
[Stage n_sheep=6] sr=97% mean_len=1193 mean_min_pen=3.4m mean_act=1.36
|
||||
failure modes: SUCCESS=29 COMPACT_CANT_DRIVE=1
|
||||
reward/step: progress=+0.1597 alignment=+0.0173 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0492 step_cost=-0.0200 complete=+0.0810
|
||||
|
||||
[Stage n_sheep=7] training 1,500,000 steps
|
||||
... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [7 sheep | 9,143,976 steps | ret(last 50)=+40.54 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,243,976 steps | ret(last 50)=+38.70 win_sr=98% cum_sr=99%]
|
||||
... [7 sheep | 9,343,976 steps | ret(last 50)=+38.13 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,443,976 steps | ret(last 50)=+40.37 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,543,976 steps | ret(last 50)=+39.40 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 9,643,976 steps | ret(last 50)=+40.44 win_sr=98% cum_sr=99%]
|
||||
... [7 sheep | 9,743,976 steps | ret(last 50)=+37.74 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 9,843,976 steps | ret(last 50)=+39.91 win_sr=98% cum_sr=99%]
|
||||
... [7 sheep | 9,943,976 steps | ret(last 50)=+40.67 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 10,043,976 steps | ret(last 50)=+35.38 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 10,143,976 steps | ret(last 50)=+38.31 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 10,243,976 steps | ret(last 50)=+40.86 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 10,343,976 steps | ret(last 50)=+40.95 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 10,443,976 steps | ret(last 50)=+37.90 win_sr=100% cum_sr=99%]
|
||||
... [7 sheep | 10,543,976 steps | ret(last 50)=+39.07 win_sr=100% cum_sr=99%]
|
||||
[Stage n_sheep=7] evaluating 30 eps
|
||||
[Stage n_sheep=7] sr=100% mean_len=1209 mean_min_pen=3.2m mean_act=1.37
|
||||
failure modes: SUCCESS=30
|
||||
reward/step: progress=+0.1774 alignment=+0.0179 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0579 step_cost=-0.0200 complete=+0.0827
|
||||
|
||||
[Stage n_sheep=8] training 1,500,000 steps
|
||||
... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [8 sheep | 10,651,304 steps | ret(last 50)=+42.81 win_sr=100% cum_sr=100%]
|
||||
... [8 sheep | 10,751,304 steps | ret(last 50)=+44.59 win_sr=100% cum_sr=100%]
|
||||
... [8 sheep | 10,851,304 steps | ret(last 50)=+45.59 win_sr=98% cum_sr=99%]
|
||||
... [8 sheep | 10,951,304 steps | ret(last 50)=+42.27 win_sr=98% cum_sr=99%]
|
||||
... [8 sheep | 11,051,304 steps | ret(last 50)=+45.05 win_sr=98% cum_sr=99%]
|
||||
... [8 sheep | 11,151,304 steps | ret(last 50)=+45.50 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 11,251,304 steps | ret(last 50)=+43.60 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 11,351,304 steps | ret(last 50)=+40.26 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 11,451,304 steps | ret(last 50)=+43.00 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 11,551,304 steps | ret(last 50)=+43.16 win_sr=100% cum_sr=100%]
|
||||
... [8 sheep | 11,651,304 steps | ret(last 50)=+42.78 win_sr=100% cum_sr=100%]
|
||||
... [8 sheep | 11,751,304 steps | ret(last 50)=+42.32 win_sr=98% cum_sr=99%]
|
||||
... [8 sheep | 11,851,304 steps | ret(last 50)=+41.62 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 11,951,304 steps | ret(last 50)=+42.56 win_sr=98% cum_sr=99%]
|
||||
... [8 sheep | 12,051,304 steps | ret(last 50)=+41.83 win_sr=100% cum_sr=99%]
|
||||
[Stage n_sheep=8] evaluating 30 eps
|
||||
[Stage n_sheep=8] sr=100% mean_len=1492 mean_min_pen=3.2m mean_act=1.38
|
||||
failure modes: SUCCESS=30
|
||||
reward/step: progress=+0.1916 alignment=+0.0190 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0536 step_cost=-0.0200 complete=+0.0670
|
||||
|
||||
[Stage n_sheep=9] training 1,500,000 steps
|
||||
... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [9 sheep | 12,158,632 steps | ret(last 50)=+46.03 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 12,258,632 steps | ret(last 50)=+46.87 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,358,632 steps | ret(last 50)=+45.48 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,458,632 steps | ret(last 50)=+47.02 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,558,632 steps | ret(last 50)=+44.66 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,658,632 steps | ret(last 50)=+46.60 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,758,632 steps | ret(last 50)=+41.85 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,858,632 steps | ret(last 50)=+47.81 win_sr=96% cum_sr=97%]
|
||||
... [9 sheep | 12,958,632 steps | ret(last 50)=+44.92 win_sr=90% cum_sr=96%]
|
||||
... [9 sheep | 13,058,632 steps | ret(last 50)=+47.40 win_sr=90% cum_sr=96%]
|
||||
... [9 sheep | 13,158,632 steps | ret(last 50)=+47.16 win_sr=92% cum_sr=95%]
|
||||
... [9 sheep | 13,258,632 steps | ret(last 50)=+45.55 win_sr=98% cum_sr=96%]
|
||||
... [9 sheep | 13,358,632 steps | ret(last 50)=+46.87 win_sr=96% cum_sr=96%]
|
||||
... [9 sheep | 13,458,632 steps | ret(last 50)=+47.69 win_sr=98% cum_sr=96%]
|
||||
... [9 sheep | 13,558,632 steps | ret(last 50)=+45.17 win_sr=94% cum_sr=96%]
|
||||
[Stage n_sheep=9] evaluating 30 eps
|
||||
[Stage n_sheep=9] sr=90% mean_len=1628 mean_min_pen=3.2m mean_act=1.38
|
||||
failure modes: SUCCESS=27 COMPACT_CANT_DRIVE=3
|
||||
reward/step: progress=+0.1802 alignment=+0.0204 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0514 step_cost=-0.0200 complete=+0.0553
|
||||
|
||||
[Stage n_sheep=10] training 1,500,000 steps
|
||||
... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [10 sheep | 13,665,960 steps | ret(last 50)=+49.00 win_sr=82% cum_sr=82%]
|
||||
... [10 sheep | 13,765,960 steps | ret(last 50)=+48.55 win_sr=86% cum_sr=84%]
|
||||
... [10 sheep | 13,865,960 steps | ret(last 50)=+46.53 win_sr=80% cum_sr=83%]
|
||||
... [10 sheep | 13,965,960 steps | ret(last 50)=+44.70 win_sr=82% cum_sr=83%]
|
||||
... [10 sheep | 14,065,960 steps | ret(last 50)=+52.57 win_sr=92% cum_sr=85%]
|
||||
... [10 sheep | 14,165,960 steps | ret(last 50)=+50.20 win_sr=82% cum_sr=85%]
|
||||
... [10 sheep | 14,265,960 steps | ret(last 50)=+50.34 win_sr=90% cum_sr=85%]
|
||||
... [10 sheep | 14,365,960 steps | ret(last 50)=+50.24 win_sr=90% cum_sr=86%]
|
||||
... [10 sheep | 14,465,960 steps | ret(last 50)=+48.40 win_sr=86% cum_sr=86%]
|
||||
... [10 sheep | 14,565,960 steps | ret(last 50)=+48.74 win_sr=88% cum_sr=87%]
|
||||
... [10 sheep | 14,665,960 steps | ret(last 50)=+48.46 win_sr=80% cum_sr=86%]
|
||||
... [10 sheep | 14,765,960 steps | ret(last 50)=+51.46 win_sr=70% cum_sr=85%]
|
||||
... [10 sheep | 14,865,960 steps | ret(last 50)=+49.28 win_sr=92% cum_sr=85%]
|
||||
... [10 sheep | 14,965,960 steps | ret(last 50)=+51.12 win_sr=88% cum_sr=86%]
|
||||
... [10 sheep | 15,065,960 steps | ret(last 50)=+52.03 win_sr=84% cum_sr=85%]
|
||||
[Stage n_sheep=10] evaluating 30 eps
|
||||
[Stage n_sheep=10] sr=93% mean_len=1870 mean_min_pen=3.1m mean_act=1.38
|
||||
failure modes: SUCCESS=28 COMPACT_CANT_DRIVE=2
|
||||
reward/step: progress=+0.1727 alignment=+0.0219 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0522 step_cost=-0.0200 complete=+0.0499
|
||||
|
||||
======================================================================
|
||||
TRAINING SUMMARY
|
||||
======================================================================
|
||||
n_sheep=1 sr=100% len= 234 min_pen= 3.7m act=0.41
|
||||
n_sheep=2 sr= 87% len= 1064 min_pen= 4.1m act=0.59
|
||||
n_sheep=3 sr=100% len= 769 min_pen= 3.5m act=0.72
|
||||
n_sheep=4 sr=100% len= 750 min_pen= 3.5m act=1.23
|
||||
n_sheep=5 sr= 97% len= 921 min_pen= 3.2m act=1.33
|
||||
n_sheep=6 sr= 97% len= 1193 min_pen= 3.4m act=1.36
|
||||
n_sheep=7 sr=100% len= 1209 min_pen= 3.2m act=1.37
|
||||
n_sheep=8 sr=100% len= 1492 min_pen= 3.2m act=1.38
|
||||
n_sheep=9 sr= 90% len= 1628 min_pen= 3.2m act=1.38
|
||||
n_sheep=10 sr= 93% len= 1870 min_pen= 3.1m act=1.38
|
||||
|
||||
Total time: 92.0 min
|
||||
Artefacts: runs/v2/
|
||||
Plots: runs/v2/success_rate.png, runs/v2/eval/
|
||||
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"W_PER_SHEEP": 2.0,
|
||||
"W_ALIGN": 0.05,
|
||||
"W_PEN_BONUS": 10.0,
|
||||
"W_COMPLETE": 100.0,
|
||||
"W_STEP_COST": 0.02,
|
||||
"W_COMPACT": 0.0,
|
||||
"W_WALL_TOUCH": 0.0,
|
||||
"WALL_TOUCH_BUFFER": 0.4,
|
||||
"ALIGN_SHAPE": "standoff",
|
||||
"ALIGN_GATED": true,
|
||||
"ENTRY_AWARE": true,
|
||||
"ent_coef": 0.02
|
||||
}
|
||||
|
Before Width: | Height: | Size: 2.2 MiB |
|
Before Width: | Height: | Size: 133 KiB |
|
Before Width: | Height: | Size: 1.2 MiB |
|
Before Width: | Height: | Size: 259 KiB |
|
Before Width: | Height: | Size: 453 KiB |
|
Before Width: | Height: | Size: 658 KiB |
|
Before Width: | Height: | Size: 1.4 MiB |
|
Before Width: | Height: | Size: 1.4 MiB |
|
Before Width: | Height: | Size: 2.0 MiB |
|
Before Width: | Height: | Size: 904 KiB |
|
Before Width: | Height: | Size: 202 KiB |
|
Before Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 75 KiB |
|
Before Width: | Height: | Size: 61 KiB |
|
Before Width: | Height: | Size: 101 KiB |
|
Before Width: | Height: | Size: 99 KiB |
|
Before Width: | Height: | Size: 115 KiB |
|
Before Width: | Height: | Size: 169 KiB |
|
Before Width: | Height: | Size: 172 KiB |
|
Before Width: | Height: | Size: 103 KiB |
|
Before Width: | Height: | Size: 190 KiB |
|
Before Width: | Height: | Size: 80 KiB |
|
Before Width: | Height: | Size: 192 KiB |
|
Before Width: | Height: | Size: 141 KiB |
|
Before Width: | Height: | Size: 152 KiB |
|
Before Width: | Height: | Size: 155 KiB |
|
Before Width: | Height: | Size: 176 KiB |
|
Before Width: | Height: | Size: 184 KiB |
|
Before Width: | Height: | Size: 194 KiB |
|
Before Width: | Height: | Size: 158 KiB |
@@ -1,197 +0,0 @@
|
||||
[
|
||||
{
|
||||
"sr": 1.0,
|
||||
"mean_len": 234.0,
|
||||
"mean_min_pen": 3.6668872674306234,
|
||||
"mean_act": 0.4068990752695293,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 30
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.11183513424165568,
|
||||
"alignment": 0.0002786317654047819,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.042735042735042736,
|
||||
"step_cost": -0.019999999999999716,
|
||||
"complete": 0.42735042735042733
|
||||
},
|
||||
"n_sheep": 1
|
||||
},
|
||||
{
|
||||
"sr": 0.8666666666666667,
|
||||
"mean_len": 1063.6666666666667,
|
||||
"mean_min_pen": 4.120940693219503,
|
||||
"mean_act": 0.5870139278816712,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 26,
|
||||
"COMPACT_CANT_DRIVE": 4
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.05651345582855781,
|
||||
"alignment": 0.007121706701510673,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.01629583202757756,
|
||||
"step_cost": -0.0199999999999909,
|
||||
"complete": 0.08147916013788781
|
||||
},
|
||||
"n_sheep": 2
|
||||
},
|
||||
{
|
||||
"sr": 1.0,
|
||||
"mean_len": 768.6,
|
||||
"mean_min_pen": 3.4802104949951174,
|
||||
"mean_act": 0.7173416881465967,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 30
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.11210350058336283,
|
||||
"alignment": 0.007752684222105381,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.039032006245121,
|
||||
"step_cost": -0.019999999999994387,
|
||||
"complete": 0.13010668748373666
|
||||
},
|
||||
"n_sheep": 3
|
||||
},
|
||||
{
|
||||
"sr": 1.0,
|
||||
"mean_len": 749.8666666666667,
|
||||
"mean_min_pen": 3.491257842381795,
|
||||
"mean_act": 1.2302732761302806,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 30
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.15859288932254823,
|
||||
"alignment": 0.011327628562653137,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.05334281650071124,
|
||||
"step_cost": -0.0199999999999947,
|
||||
"complete": 0.13335704125177808
|
||||
},
|
||||
"n_sheep": 4
|
||||
},
|
||||
{
|
||||
"sr": 0.9666666666666667,
|
||||
"mean_len": 920.5666666666667,
|
||||
"mean_min_pen": 3.2368871172269187,
|
||||
"mean_act": 1.329068384219205,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 29,
|
||||
"PARTIAL_3of5": 1
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.15654392868672135,
|
||||
"alignment": 0.013497823599666012,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.05359017996161784,
|
||||
"step_cost": -0.019999999999992312,
|
||||
"complete": 0.10500778505992686
|
||||
},
|
||||
"n_sheep": 5
|
||||
},
|
||||
{
|
||||
"sr": 0.9666666666666667,
|
||||
"mean_len": 1193.2333333333333,
|
||||
"mean_min_pen": 3.4217512369155885,
|
||||
"mean_act": 1.3575613093489967,
|
||||
"failure_modes": {
|
||||
"COMPACT_CANT_DRIVE": 1,
|
||||
"SUCCESS": 29
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.15969395095863717,
|
||||
"alignment": 0.017340700156353795,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.049166131240048046,
|
||||
"step_cost": -0.01999999999998991,
|
||||
"complete": 0.08101237533871554
|
||||
},
|
||||
"n_sheep": 6
|
||||
},
|
||||
{
|
||||
"sr": 1.0,
|
||||
"mean_len": 1209.4666666666667,
|
||||
"mean_min_pen": 3.2339003403981526,
|
||||
"mean_act": 1.3714931576761524,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 30
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.17738547200352864,
|
||||
"alignment": 0.017914342656107935,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.057876750082681075,
|
||||
"step_cost": -0.019999999999989804,
|
||||
"complete": 0.08268107154668725
|
||||
},
|
||||
"n_sheep": 7
|
||||
},
|
||||
{
|
||||
"sr": 1.0,
|
||||
"mean_len": 1491.7666666666667,
|
||||
"mean_min_pen": 3.216744065284729,
|
||||
"mean_act": 1.3783802580111435,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 30
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.19162546125035912,
|
||||
"alignment": 0.018971863842493202,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.05362768976381472,
|
||||
"step_cost": -0.01999999999998829,
|
||||
"complete": 0.06703461220476839
|
||||
},
|
||||
"n_sheep": 8
|
||||
},
|
||||
{
|
||||
"sr": 0.9,
|
||||
"mean_len": 1627.5666666666666,
|
||||
"mean_min_pen": 3.23857311407725,
|
||||
"mean_act": 1.3832202011732966,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 27,
|
||||
"COMPACT_CANT_DRIVE": 3
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.18015228593205654,
|
||||
"alignment": 0.020407598899987247,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.05140598439388044,
|
||||
"step_cost": -0.01999999999998775,
|
||||
"complete": 0.055297274049194094
|
||||
},
|
||||
"n_sheep": 9
|
||||
},
|
||||
{
|
||||
"sr": 0.9333333333333333,
|
||||
"mean_len": 1869.9666666666667,
|
||||
"mean_min_pen": 3.1344878753026326,
|
||||
"mean_act": 1.3841143385300063,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 28,
|
||||
"COMPACT_CANT_DRIVE": 2
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.17267533684098152,
|
||||
"alignment": 0.021850885374692264,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.05222909499278062,
|
||||
"step_cost": -0.019999999999986983,
|
||||
"complete": 0.04991176313303267
|
||||
},
|
||||
"n_sheep": 10
|
||||
}
|
||||
]
|
||||
|
Before Width: | Height: | Size: 30 KiB |
@@ -1,242 +0,0 @@
|
||||
Config loaded from config.json
|
||||
Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_SOUTH': 0.01, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
|
||||
Run dir: runs/v3
|
||||
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
|
||||
|
||||
|
||||
[Stage n_sheep=1] training 1,500,000 steps
|
||||
... [1 sheep | 100,000 steps | ret(last 24)=-47.74 win_sr=12% cum_sr=12%]
|
||||
... [1 sheep | 200,000 steps | ret(last 50)=-40.77 win_sr=14% cum_sr=16%]
|
||||
... [1 sheep | 300,000 steps | ret(last 50)=-36.39 win_sr=16% cum_sr=16%]
|
||||
... [1 sheep | 400,000 steps | ret(last 50)=-40.04 win_sr=14% cum_sr=15%]
|
||||
... [1 sheep | 500,000 steps | ret(last 50)=+7.09 win_sr=80% cum_sr=36%]
|
||||
... [1 sheep | 600,000 steps | ret(last 50)=+15.87 win_sr=100% cum_sr=71%]
|
||||
... [1 sheep | 700,000 steps | ret(last 50)=+14.78 win_sr=100% cum_sr=84%]
|
||||
... [1 sheep | 800,000 steps | ret(last 50)=+14.04 win_sr=100% cum_sr=90%]
|
||||
... [1 sheep | 900,000 steps | ret(last 50)=+14.08 win_sr=100% cum_sr=92%]
|
||||
... [1 sheep | 1,000,000 steps | ret(last 50)=+13.33 win_sr=100% cum_sr=94%]
|
||||
... [1 sheep | 1,100,000 steps | ret(last 50)=+13.99 win_sr=100% cum_sr=95%]
|
||||
... [1 sheep | 1,200,000 steps | ret(last 50)=+13.38 win_sr=100% cum_sr=96%]
|
||||
... [1 sheep | 1,300,000 steps | ret(last 50)=+13.18 win_sr=100% cum_sr=96%]
|
||||
... [1 sheep | 1,400,000 steps | ret(last 50)=+13.53 win_sr=100% cum_sr=97%]
|
||||
... [1 sheep | 1,500,000 steps | ret(last 50)=+13.46 win_sr=100% cum_sr=97%]
|
||||
[Stage n_sheep=1] evaluating 30 eps
|
||||
[Stage n_sheep=1] sr=100% mean_len=264 mean_min_pen=3.7m mean_act=0.45
|
||||
failure modes: SUCCESS=30
|
||||
reward/step: progress=+0.1156 alignment=+0.0001 south=-0.0005 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0378 step_cost=-0.0200 complete=+0.3784
|
||||
|
||||
[Stage n_sheep=2] training 1,500,000 steps
|
||||
... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [2 sheep | 1,607,336 steps | ret(last 35)=-3.04 win_sr=49% cum_sr=49%]
|
||||
... [2 sheep | 1,707,336 steps | ret(last 50)=-11.13 win_sr=20% cum_sr=33%]
|
||||
... [2 sheep | 1,807,336 steps | ret(last 50)=-11.83 win_sr=18% cum_sr=31%]
|
||||
... [2 sheep | 1,907,336 steps | ret(last 50)=-8.76 win_sr=30% cum_sr=31%]
|
||||
... [2 sheep | 2,007,336 steps | ret(last 50)=-8.95 win_sr=30% cum_sr=30%]
|
||||
... [2 sheep | 2,107,336 steps | ret(last 50)=-9.06 win_sr=32% cum_sr=30%]
|
||||
... [2 sheep | 2,207,336 steps | ret(last 50)=-9.48 win_sr=32% cum_sr=30%]
|
||||
... [2 sheep | 2,307,336 steps | ret(last 50)=-1.70 win_sr=44% cum_sr=33%]
|
||||
... [2 sheep | 2,407,336 steps | ret(last 50)=+5.02 win_sr=64% cum_sr=38%]
|
||||
... [2 sheep | 2,507,336 steps | ret(last 50)=+13.32 win_sr=88% cum_sr=46%]
|
||||
... [2 sheep | 2,607,336 steps | ret(last 50)=+12.15 win_sr=90% cum_sr=54%]
|
||||
... [2 sheep | 2,707,336 steps | ret(last 50)=+17.13 win_sr=98% cum_sr=63%]
|
||||
... [2 sheep | 2,807,336 steps | ret(last 50)=+18.81 win_sr=98% cum_sr=69%]
|
||||
... [2 sheep | 2,907,336 steps | ret(last 50)=+16.23 win_sr=92% cum_sr=73%]
|
||||
... [2 sheep | 3,007,336 steps | ret(last 50)=+18.83 win_sr=100% cum_sr=76%]
|
||||
[Stage n_sheep=2] evaluating 30 eps
|
||||
[Stage n_sheep=2] sr=77% mean_len=1398 mean_min_pen=3.3m mean_act=0.97
|
||||
failure modes: SUCCESS=23 PARTIAL_1of2=6 COMPACT_CANT_DRIVE=1
|
||||
reward/step: progress=+0.0401 alignment=+0.0045 south=-0.0039 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0126 step_cost=-0.0200 complete=+0.0549
|
||||
|
||||
[Stage n_sheep=3] training 1,500,000 steps
|
||||
... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [3 sheep | 3,114,664 steps | ret(last 50)=+13.79 win_sr=82% cum_sr=84%]
|
||||
... [3 sheep | 3,214,664 steps | ret(last 50)=+21.64 win_sr=96% cum_sr=88%]
|
||||
... [3 sheep | 3,314,664 steps | ret(last 50)=+23.45 win_sr=98% cum_sr=92%]
|
||||
... [3 sheep | 3,414,664 steps | ret(last 50)=+22.18 win_sr=98% cum_sr=94%]
|
||||
... [3 sheep | 3,514,664 steps | ret(last 50)=+24.83 win_sr=100% cum_sr=96%]
|
||||
... [3 sheep | 3,614,664 steps | ret(last 50)=+19.77 win_sr=94% cum_sr=96%]
|
||||
... [3 sheep | 3,714,664 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=96%]
|
||||
... [3 sheep | 3,814,664 steps | ret(last 50)=+25.24 win_sr=100% cum_sr=97%]
|
||||
... [3 sheep | 3,914,664 steps | ret(last 50)=+24.43 win_sr=100% cum_sr=97%]
|
||||
... [3 sheep | 4,014,664 steps | ret(last 50)=+24.59 win_sr=100% cum_sr=97%]
|
||||
... [3 sheep | 4,114,664 steps | ret(last 50)=+22.18 win_sr=98% cum_sr=98%]
|
||||
... [3 sheep | 4,214,664 steps | ret(last 50)=+23.11 win_sr=96% cum_sr=97%]
|
||||
... [3 sheep | 4,314,664 steps | ret(last 50)=+23.06 win_sr=98% cum_sr=97%]
|
||||
... [3 sheep | 4,414,664 steps | ret(last 50)=+23.35 win_sr=100% cum_sr=97%]
|
||||
... [3 sheep | 4,514,664 steps | ret(last 50)=+22.50 win_sr=100% cum_sr=98%]
|
||||
[Stage n_sheep=3] evaluating 30 eps
|
||||
[Stage n_sheep=3] sr=97% mean_len=1095 mean_min_pen=2.5m mean_act=0.95
|
||||
failure modes: SUCCESS=29 COMPACT_CANT_DRIVE=1
|
||||
reward/step: progress=+0.0821 alignment=+0.0113 south=-0.0087 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0265 step_cost=-0.0200 complete=+0.0883
|
||||
|
||||
[Stage n_sheep=4] training 1,500,000 steps
|
||||
... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [4 sheep | 4,621,992 steps | ret(last 50)=+22.17 win_sr=92% cum_sr=94%]
|
||||
... [4 sheep | 4,721,992 steps | ret(last 50)=+25.81 win_sr=94% cum_sr=93%]
|
||||
... [4 sheep | 4,821,992 steps | ret(last 50)=+21.80 win_sr=90% cum_sr=93%]
|
||||
... [4 sheep | 4,921,992 steps | ret(last 50)=+26.38 win_sr=98% cum_sr=94%]
|
||||
... [4 sheep | 5,021,992 steps | ret(last 50)=+26.65 win_sr=98% cum_sr=95%]
|
||||
... [4 sheep | 5,121,992 steps | ret(last 50)=+26.07 win_sr=98% cum_sr=95%]
|
||||
... [4 sheep | 5,221,992 steps | ret(last 50)=+27.08 win_sr=98% cum_sr=96%]
|
||||
... [4 sheep | 5,321,992 steps | ret(last 50)=+27.87 win_sr=100% cum_sr=96%]
|
||||
... [4 sheep | 5,421,992 steps | ret(last 50)=+27.53 win_sr=100% cum_sr=97%]
|
||||
... [4 sheep | 5,521,992 steps | ret(last 50)=+25.91 win_sr=100% cum_sr=97%]
|
||||
... [4 sheep | 5,621,992 steps | ret(last 50)=+27.75 win_sr=100% cum_sr=97%]
|
||||
... [4 sheep | 5,721,992 steps | ret(last 50)=+25.63 win_sr=100% cum_sr=97%]
|
||||
... [4 sheep | 5,821,992 steps | ret(last 50)=+24.43 win_sr=98% cum_sr=97%]
|
||||
... [4 sheep | 5,921,992 steps | ret(last 50)=+22.52 win_sr=94% cum_sr=97%]
|
||||
... [4 sheep | 6,021,992 steps | ret(last 50)=+27.28 win_sr=100% cum_sr=98%]
|
||||
[Stage n_sheep=4] evaluating 30 eps
|
||||
[Stage n_sheep=4] sr=57% mean_len=2572 mean_min_pen=2.2m mean_act=1.28
|
||||
failure modes: SUCCESS=17 PARTIAL_1of4=6 PARTIAL_2of4=5 DROVE_NO_SHEEP=1 NEVER_COMPACT=1
|
||||
reward/step: progress=+0.0455 alignment=+0.0040 south=-0.0454 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0109 step_cost=-0.0200 complete=+0.0220
|
||||
|
||||
[Stage n_sheep=5] training 1,500,000 steps
|
||||
... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [5 sheep | 6,129,320 steps | ret(last 50)=+28.06 win_sr=96% cum_sr=96%]
|
||||
... [5 sheep | 6,229,320 steps | ret(last 50)=+31.40 win_sr=98% cum_sr=96%]
|
||||
... [5 sheep | 6,329,320 steps | ret(last 50)=+27.81 win_sr=96% cum_sr=96%]
|
||||
... [5 sheep | 6,429,320 steps | ret(last 50)=+22.08 win_sr=88% cum_sr=95%]
|
||||
... [5 sheep | 6,529,320 steps | ret(last 50)=+26.99 win_sr=94% cum_sr=95%]
|
||||
... [5 sheep | 6,629,320 steps | ret(last 50)=+21.24 win_sr=86% cum_sr=93%]
|
||||
... [5 sheep | 6,729,320 steps | ret(last 50)=+24.58 win_sr=94% cum_sr=93%]
|
||||
... [5 sheep | 6,829,320 steps | ret(last 50)=+29.66 win_sr=96% cum_sr=93%]
|
||||
... [5 sheep | 6,929,320 steps | ret(last 50)=+27.53 win_sr=96% cum_sr=93%]
|
||||
... [5 sheep | 7,029,320 steps | ret(last 50)=+28.99 win_sr=100% cum_sr=94%]
|
||||
... [5 sheep | 7,129,320 steps | ret(last 50)=+27.59 win_sr=98% cum_sr=94%]
|
||||
... [5 sheep | 7,229,320 steps | ret(last 50)=+30.79 win_sr=100% cum_sr=95%]
|
||||
... [5 sheep | 7,329,320 steps | ret(last 50)=+30.56 win_sr=98% cum_sr=95%]
|
||||
... [5 sheep | 7,429,320 steps | ret(last 50)=+31.55 win_sr=100% cum_sr=95%]
|
||||
... [5 sheep | 7,529,320 steps | ret(last 50)=+29.95 win_sr=100% cum_sr=96%]
|
||||
[Stage n_sheep=5] evaluating 30 eps
|
||||
[Stage n_sheep=5] sr=0% mean_len=4000 mean_min_pen=1.7m mean_act=1.36
|
||||
failure modes: PARTIAL_4of5=17 PARTIAL_1of5=9 PARTIAL_3of5=2 PARTIAL_2of5=2
|
||||
reward/step: progress=+0.0396 alignment=+0.0034 south=-0.0393 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0073 step_cost=-0.0200 complete=+0.0000
|
||||
|
||||
[Stage n_sheep=6] training 1,500,000 steps
|
||||
... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [6 sheep | 7,636,648 steps | ret(last 50)=+34.50 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 7,736,648 steps | ret(last 50)=+31.01 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 7,836,648 steps | ret(last 50)=+33.27 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 7,936,648 steps | ret(last 50)=+34.81 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 8,036,648 steps | ret(last 50)=+32.69 win_sr=100% cum_sr=100%]
|
||||
... [6 sheep | 8,136,648 steps | ret(last 50)=+31.36 win_sr=96% cum_sr=99%]
|
||||
... [6 sheep | 8,236,648 steps | ret(last 50)=+33.71 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,336,648 steps | ret(last 50)=+34.71 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,436,648 steps | ret(last 50)=+31.89 win_sr=96% cum_sr=99%]
|
||||
... [6 sheep | 8,536,648 steps | ret(last 50)=+35.63 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,636,648 steps | ret(last 50)=+35.92 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,736,648 steps | ret(last 50)=+33.70 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,836,648 steps | ret(last 50)=+33.46 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 8,936,648 steps | ret(last 50)=+35.12 win_sr=100% cum_sr=99%]
|
||||
... [6 sheep | 9,036,648 steps | ret(last 50)=+34.21 win_sr=100% cum_sr=100%]
|
||||
[Stage n_sheep=6] evaluating 30 eps
|
||||
[Stage n_sheep=6] sr=37% mean_len=3137 mean_min_pen=1.8m mean_act=1.37
|
||||
failure modes: PARTIAL_4of6=14 SUCCESS=11 PARTIAL_3of6=5
|
||||
reward/step: progress=+0.0654 alignment=+0.0085 south=-0.0392 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0146 step_cost=-0.0200 complete=+0.0117
|
||||
|
||||
[Stage n_sheep=7] training 1,500,000 steps
|
||||
... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [7 sheep | 9,143,976 steps | ret(last 50)=+36.14 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,243,976 steps | ret(last 50)=+33.77 win_sr=98% cum_sr=99%]
|
||||
... [7 sheep | 9,343,976 steps | ret(last 50)=+37.14 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,443,976 steps | ret(last 50)=+39.90 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,543,976 steps | ret(last 50)=+37.52 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,643,976 steps | ret(last 50)=+37.31 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,743,976 steps | ret(last 50)=+36.24 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,843,976 steps | ret(last 50)=+39.67 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 9,943,976 steps | ret(last 50)=+39.12 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 10,043,976 steps | ret(last 50)=+37.82 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 10,143,976 steps | ret(last 50)=+37.38 win_sr=100% cum_sr=100%]
|
||||
... [7 sheep | 10,243,976 steps | ret(last 50)=+37.47 win_sr=98% cum_sr=100%]
|
||||
... [7 sheep | 10,343,976 steps | ret(last 50)=+36.04 win_sr=98% cum_sr=99%]
|
||||
... [7 sheep | 10,443,976 steps | ret(last 50)=+31.71 win_sr=98% cum_sr=99%]
|
||||
... [7 sheep | 10,543,976 steps | ret(last 50)=+32.50 win_sr=96% cum_sr=99%]
|
||||
[Stage n_sheep=7] evaluating 30 eps
|
||||
[Stage n_sheep=7] sr=0% mean_len=4000 mean_min_pen=1.8m mean_act=1.38
|
||||
failure modes: PARTIAL_5of7=18 PARTIAL_6of7=7 PARTIAL_3of7=3 PARTIAL_4of7=2
|
||||
reward/step: progress=+0.0533 alignment=+0.0069 south=-0.0356 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0124 step_cost=-0.0200 complete=+0.0000
|
||||
|
||||
[Stage n_sheep=8] training 1,500,000 steps
|
||||
... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [8 sheep | 10,651,304 steps | ret(last 50)=+36.01 win_sr=96% cum_sr=96%]
|
||||
... [8 sheep | 10,751,304 steps | ret(last 50)=+37.97 win_sr=96% cum_sr=96%]
|
||||
... [8 sheep | 10,851,304 steps | ret(last 50)=+39.12 win_sr=100% cum_sr=98%]
|
||||
... [8 sheep | 10,951,304 steps | ret(last 50)=+36.54 win_sr=96% cum_sr=97%]
|
||||
... [8 sheep | 11,051,304 steps | ret(last 50)=+40.58 win_sr=100% cum_sr=98%]
|
||||
... [8 sheep | 11,151,304 steps | ret(last 50)=+39.00 win_sr=98% cum_sr=98%]
|
||||
... [8 sheep | 11,251,304 steps | ret(last 50)=+38.54 win_sr=98% cum_sr=98%]
|
||||
... [8 sheep | 11,351,304 steps | ret(last 50)=+39.29 win_sr=100% cum_sr=98%]
|
||||
... [8 sheep | 11,451,304 steps | ret(last 50)=+38.36 win_sr=100% cum_sr=98%]
|
||||
... [8 sheep | 11,551,304 steps | ret(last 50)=+40.04 win_sr=100% cum_sr=98%]
|
||||
... [8 sheep | 11,651,304 steps | ret(last 50)=+37.92 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 11,751,304 steps | ret(last 50)=+40.01 win_sr=98% cum_sr=99%]
|
||||
... [8 sheep | 11,851,304 steps | ret(last 50)=+39.06 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 11,951,304 steps | ret(last 50)=+41.39 win_sr=100% cum_sr=99%]
|
||||
... [8 sheep | 12,051,304 steps | ret(last 50)=+40.05 win_sr=100% cum_sr=99%]
|
||||
[Stage n_sheep=8] evaluating 30 eps
|
||||
[Stage n_sheep=8] sr=60% mean_len=2472 mean_min_pen=1.6m mean_act=1.39
|
||||
failure modes: SUCCESS=18 PARTIAL_6of8=9 PARTIAL_4of8=3
|
||||
reward/step: progress=+0.0956 alignment=+0.0106 south=-0.0508 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0283 step_cost=-0.0200 complete=+0.0243
|
||||
|
||||
[Stage n_sheep=9] training 1,500,000 steps
|
||||
... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [9 sheep | 12,158,632 steps | ret(last 50)=+41.35 win_sr=98% cum_sr=98%]
|
||||
... [9 sheep | 12,258,632 steps | ret(last 50)=+41.63 win_sr=100% cum_sr=99%]
|
||||
... [9 sheep | 12,358,632 steps | ret(last 50)=+41.85 win_sr=100% cum_sr=99%]
|
||||
... [9 sheep | 12,458,632 steps | ret(last 50)=+42.49 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 12,558,632 steps | ret(last 50)=+40.87 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 12,658,632 steps | ret(last 50)=+39.09 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 12,758,632 steps | ret(last 50)=+42.23 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 12,858,632 steps | ret(last 50)=+41.00 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 12,958,632 steps | ret(last 50)=+43.02 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 13,058,632 steps | ret(last 50)=+41.13 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 13,158,632 steps | ret(last 50)=+41.02 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 13,258,632 steps | ret(last 50)=+42.88 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 13,358,632 steps | ret(last 50)=+46.16 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 13,458,632 steps | ret(last 50)=+44.69 win_sr=100% cum_sr=100%]
|
||||
... [9 sheep | 13,558,632 steps | ret(last 50)=+44.49 win_sr=100% cum_sr=100%]
|
||||
[Stage n_sheep=9] evaluating 30 eps
|
||||
[Stage n_sheep=9] sr=0% mean_len=4000 mean_min_pen=1.5m mean_act=1.39
|
||||
failure modes: PARTIAL_8of9=26 PARTIAL_7of9=4
|
||||
reward/step: progress=+0.0787 alignment=+0.0079 south=-0.0184 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0197 step_cost=-0.0200 complete=+0.0000
|
||||
|
||||
[Stage n_sheep=10] training 1,500,000 steps
|
||||
... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%]
|
||||
... [10 sheep | 13,665,960 steps | ret(last 50)=+43.38 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 13,765,960 steps | ret(last 50)=+43.26 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 13,865,960 steps | ret(last 50)=+46.91 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 13,965,960 steps | ret(last 50)=+45.36 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,065,960 steps | ret(last 50)=+45.37 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,165,960 steps | ret(last 50)=+44.30 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,265,960 steps | ret(last 50)=+43.83 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,365,960 steps | ret(last 50)=+47.09 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,465,960 steps | ret(last 50)=+41.32 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,565,960 steps | ret(last 50)=+45.30 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,665,960 steps | ret(last 50)=+45.36 win_sr=98% cum_sr=100%]
|
||||
... [10 sheep | 14,765,960 steps | ret(last 50)=+41.83 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,865,960 steps | ret(last 50)=+44.40 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 14,965,960 steps | ret(last 50)=+45.89 win_sr=100% cum_sr=100%]
|
||||
... [10 sheep | 15,065,960 steps | ret(last 50)=+42.49 win_sr=100% cum_sr=100%]
|
||||
[Stage n_sheep=10] evaluating 30 eps
|
||||
[Stage n_sheep=10] sr=83% mean_len=2243 mean_min_pen=1.5m mean_act=1.40
|
||||
failure modes: SUCCESS=25 PARTIAL_8of10=3 PARTIAL_7of10=2
|
||||
reward/step: progress=+0.1387 alignment=+0.0150 south=-0.0437 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0428 step_cost=-0.0200 complete=+0.0372
|
||||
|
||||
======================================================================
|
||||
TRAINING SUMMARY
|
||||
======================================================================
|
||||
n_sheep=1 sr=100% len= 264 min_pen= 3.7m act=0.45
|
||||
n_sheep=2 sr= 77% len= 1398 min_pen= 3.3m act=0.97
|
||||
n_sheep=3 sr= 97% len= 1095 min_pen= 2.5m act=0.95
|
||||
n_sheep=4 sr= 57% len= 2572 min_pen= 2.2m act=1.28
|
||||
n_sheep=5 sr= 0% len= 4000 min_pen= 1.7m act=1.36
|
||||
n_sheep=6 sr= 37% len= 3137 min_pen= 1.8m act=1.37
|
||||
n_sheep=7 sr= 0% len= 4000 min_pen= 1.8m act=1.38
|
||||
n_sheep=8 sr= 60% len= 2472 min_pen= 1.6m act=1.39
|
||||
n_sheep=9 sr= 0% len= 4000 min_pen= 1.5m act=1.39
|
||||
n_sheep=10 sr= 83% len= 2243 min_pen= 1.5m act=1.40
|
||||
|
||||
Total time: 94.3 min
|
||||
Artefacts: runs/v3/
|
||||
Plots: runs/v3/success_rate.png, runs/v3/eval/
|
||||
@@ -1,15 +0,0 @@
|
||||
{
|
||||
"W_PER_SHEEP": 2.0,
|
||||
"W_ALIGN": 0.05,
|
||||
"W_PEN_BONUS": 10.0,
|
||||
"W_COMPLETE": 100.0,
|
||||
"W_STEP_COST": 0.02,
|
||||
"W_SOUTH": 0.01,
|
||||
"W_COMPACT": 0.0,
|
||||
"W_WALL_TOUCH": 0.0,
|
||||
"WALL_TOUCH_BUFFER": 0.4,
|
||||
"ALIGN_SHAPE": "standoff",
|
||||
"ALIGN_GATED": true,
|
||||
"ENTRY_AWARE": true,
|
||||
"ent_coef": 0.02
|
||||
}
|
||||
|
Before Width: | Height: | Size: 2.5 MiB |
|
Before Width: | Height: | Size: 135 KiB |
|
Before Width: | Height: | Size: 1.2 MiB |
|
Before Width: | Height: | Size: 248 KiB |
|
Before Width: | Height: | Size: 3.6 MiB |
|
Before Width: | Height: | Size: 3.8 MiB |
|
Before Width: | Height: | Size: 1.3 MiB |
|
Before Width: | Height: | Size: 3.9 MiB |
|
Before Width: | Height: | Size: 4.0 MiB |
|
Before Width: | Height: | Size: 3.9 MiB |
|
Before Width: | Height: | Size: 224 KiB |
|
Before Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 82 KiB |
|
Before Width: | Height: | Size: 61 KiB |
|
Before Width: | Height: | Size: 73 KiB |
|
Before Width: | Height: | Size: 85 KiB |
|
Before Width: | Height: | Size: 143 KiB |
|
Before Width: | Height: | Size: 144 KiB |
|
Before Width: | Height: | Size: 172 KiB |
|
Before Width: | Height: | Size: 148 KiB |
|
Before Width: | Height: | Size: 186 KiB |
|
Before Width: | Height: | Size: 82 KiB |
|
Before Width: | Height: | Size: 190 KiB |
|
Before Width: | Height: | Size: 126 KiB |
|
Before Width: | Height: | Size: 109 KiB |
|
Before Width: | Height: | Size: 120 KiB |
|
Before Width: | Height: | Size: 163 KiB |
|
Before Width: | Height: | Size: 170 KiB |
|
Before Width: | Height: | Size: 172 KiB |
|
Before Width: | Height: | Size: 132 KiB |
@@ -1,222 +0,0 @@
|
||||
[
|
||||
{
|
||||
"sr": 1.0,
|
||||
"mean_len": 264.3,
|
||||
"mean_min_pen": 3.6947483142217,
|
||||
"mean_act": 0.4488927691353647,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 30
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.11562251145796992,
|
||||
"alignment": 0.00012847888517811197,
|
||||
"south": -0.00046327802870008703,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.037835792659856225,
|
||||
"step_cost": -0.020000000000000923,
|
||||
"complete": 0.37835792659856227
|
||||
},
|
||||
"n_sheep": 1
|
||||
},
|
||||
{
|
||||
"sr": 0.7666666666666667,
|
||||
"mean_len": 1397.6333333333334,
|
||||
"mean_min_pen": 3.3354002753893535,
|
||||
"mean_act": 0.9679237489606706,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 23,
|
||||
"PARTIAL_1of2": 6,
|
||||
"COMPACT_CANT_DRIVE": 1
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.04012407340533507,
|
||||
"alignment": 0.004549029322963513,
|
||||
"south": -0.003855391958439705,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.01264041594123399,
|
||||
"step_cost": -0.019999999999988728,
|
||||
"complete": 0.05485463521667581
|
||||
},
|
||||
"n_sheep": 2
|
||||
},
|
||||
{
|
||||
"sr": 0.9666666666666667,
|
||||
"mean_len": 1095.3666666666666,
|
||||
"mean_min_pen": 2.4724439779917398,
|
||||
"mean_act": 0.950618689999602,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 29,
|
||||
"COMPACT_CANT_DRIVE": 1
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.08207998032411863,
|
||||
"alignment": 0.011342550088712133,
|
||||
"south": -0.008689572376747992,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.0264751529168315,
|
||||
"step_cost": -0.019999999999990636,
|
||||
"complete": 0.08825050972277168
|
||||
},
|
||||
"n_sheep": 3
|
||||
},
|
||||
{
|
||||
"sr": 0.5666666666666667,
|
||||
"mean_len": 2571.866666666667,
|
||||
"mean_min_pen": 2.1761705835660297,
|
||||
"mean_act": 1.2794624905502197,
|
||||
"failure_modes": {
|
||||
"PARTIAL_2of4": 5,
|
||||
"SUCCESS": 17,
|
||||
"DROVE_NO_SHEEP": 1,
|
||||
"NEVER_COMPACT": 1,
|
||||
"PARTIAL_1of4": 6
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.04547638401556759,
|
||||
"alignment": 0.003989776116242459,
|
||||
"south": -0.04544084245355691,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.010887034060863705,
|
||||
"step_cost": -0.01999999999998557,
|
||||
"complete": 0.02203328321841464
|
||||
},
|
||||
"n_sheep": 4
|
||||
},
|
||||
{
|
||||
"sr": 0.0,
|
||||
"mean_len": 4000.0,
|
||||
"mean_min_pen": 1.7023075381914774,
|
||||
"mean_act": 1.3590981605617019,
|
||||
"failure_modes": {
|
||||
"PARTIAL_1of5": 9,
|
||||
"PARTIAL_3of5": 2,
|
||||
"PARTIAL_2of5": 2,
|
||||
"PARTIAL_4of5": 17
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.039584031492471694,
|
||||
"alignment": 0.003391631218188155,
|
||||
"south": -0.03930825256315925,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.00725,
|
||||
"step_cost": -0.01999999999998423,
|
||||
"complete": 0.0
|
||||
},
|
||||
"n_sheep": 5
|
||||
},
|
||||
{
|
||||
"sr": 0.36666666666666664,
|
||||
"mean_len": 3136.766666666667,
|
||||
"mean_min_pen": 1.7896055857340494,
|
||||
"mean_act": 1.3694271957435262,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 11,
|
||||
"PARTIAL_3of6": 5,
|
||||
"PARTIAL_4of6": 14
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.06539200159542725,
|
||||
"alignment": 0.00849681660918308,
|
||||
"south": -0.03917853851677538,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.014558515669000988,
|
||||
"step_cost": -0.019999999999984894,
|
||||
"complete": 0.011689319150292764
|
||||
},
|
||||
"n_sheep": 6
|
||||
},
|
||||
{
|
||||
"sr": 0.0,
|
||||
"mean_len": 4000.0,
|
||||
"mean_min_pen": 1.8426543315251669,
|
||||
"mean_act": 1.383490810132896,
|
||||
"failure_modes": {
|
||||
"PARTIAL_5of7": 18,
|
||||
"PARTIAL_3of7": 3,
|
||||
"PARTIAL_6of7": 7,
|
||||
"PARTIAL_4of7": 2
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.05331589305400848,
|
||||
"alignment": 0.00686667034524816,
|
||||
"south": -0.03559404498259062,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.012416666666666666,
|
||||
"step_cost": -0.01999999999998423,
|
||||
"complete": 0.0
|
||||
},
|
||||
"n_sheep": 7
|
||||
},
|
||||
{
|
||||
"sr": 0.6,
|
||||
"mean_len": 2472.0666666666666,
|
||||
"mean_min_pen": 1.609976100921631,
|
||||
"mean_act": 1.3901071324053385,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 18,
|
||||
"PARTIAL_4of8": 3,
|
||||
"PARTIAL_6of8": 9
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.09555249268374479,
|
||||
"alignment": 0.010622170754243947,
|
||||
"south": -0.050827120587952045,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.028316388446913515,
|
||||
"step_cost": -0.019999999999985724,
|
||||
"complete": 0.024271190097354442
|
||||
},
|
||||
"n_sheep": 8
|
||||
},
|
||||
{
|
||||
"sr": 0.0,
|
||||
"mean_len": 4000.0,
|
||||
"mean_min_pen": 1.5165573159853618,
|
||||
"mean_act": 1.3936255563423037,
|
||||
"failure_modes": {
|
||||
"PARTIAL_8of9": 26,
|
||||
"PARTIAL_7of9": 4
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.07870613823334376,
|
||||
"alignment": 0.007913931652916581,
|
||||
"south": -0.01837508026464782,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.019666666666666666,
|
||||
"step_cost": -0.01999999999998423,
|
||||
"complete": 0.0
|
||||
},
|
||||
"n_sheep": 9
|
||||
},
|
||||
{
|
||||
"sr": 0.8333333333333334,
|
||||
"mean_len": 2243.0,
|
||||
"mean_min_pen": 1.5175361116727193,
|
||||
"mean_act": 1.3979439154633806,
|
||||
"failure_modes": {
|
||||
"SUCCESS": 25,
|
||||
"PARTIAL_7of10": 2,
|
||||
"PARTIAL_8of10": 3
|
||||
},
|
||||
"reward_per_step": {
|
||||
"progress": 0.13872242361851903,
|
||||
"alignment": 0.015031396471697371,
|
||||
"south": -0.04365405077614671,
|
||||
"compact": 0.0,
|
||||
"wall_touch": 0.0,
|
||||
"pen_bonus": 0.04279982166740972,
|
||||
"step_cost": -0.019999999999986123,
|
||||
"complete": 0.03715262297518205
|
||||
},
|
||||
"n_sheep": 10
|
||||
}
|
||||
]
|
||||
|
Before Width: | Height: | Size: 32 KiB |