diff --git a/training/runs/replay_20260425_152857/config.json b/training/runs/replay_20260425_152857/config.json new file mode 100644 index 0000000..b2d15fe --- /dev/null +++ b/training/runs/replay_20260425_152857/config.json @@ -0,0 +1,11 @@ +{ + "W_PER_SHEEP": 1.0, + "W_ALIGN": 0.0, + "W_PEN_BONUS": 5.0, + "W_STEP_COST": 0.02, + "W_COMPLETE": 200.0, + "W_COMPACT": 1.5, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": false, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/replay_20260425_152857/final_model.zip b/training/runs/replay_20260425_152857/final_model.zip new file mode 100644 index 0000000..b326e4c Binary files /dev/null and b/training/runs/replay_20260425_152857/final_model.zip differ diff --git a/training/runs/replay_20260425_152857/stage_results.json b/training/runs/replay_20260425_152857/stage_results.json new file mode 100644 index 0000000..c4e1ec0 --- /dev/null +++ b/training/runs/replay_20260425_152857/stage_results.json @@ -0,0 +1,23 @@ +[ + { + "n_sheep": 1, + "sr": 1.0, + "mean_len": 267.6333333333333, + "mean_min_pen": 3.7235233147939044, + "mean_act": 0.3746675180125346 + }, + { + "n_sheep": 2, + "sr": 0.06666666666666667, + "mean_len": 1458.6666666666667, + "mean_min_pen": 14.14484707514445, + "mean_act": 0.284232099657656 + }, + { + "n_sheep": 3, + "sr": 0.0, + "mean_len": 1500.0, + "mean_min_pen": 12.514182837804158, + "mean_act": 1.2590703022670828 + } +] \ No newline at end of file diff --git a/training/runs/replay_20260425_152857/vecnorm.pkl b/training/runs/replay_20260425_152857/vecnorm.pkl new file mode 100644 index 0000000..0a57434 Binary files /dev/null and b/training/runs/replay_20260425_152857/vecnorm.pkl differ diff --git a/training/runs/replay_best.log b/training/runs/replay_best.log new file mode 100644 index 0000000..5fd9f21 --- /dev/null +++ b/training/runs/replay_best.log @@ -0,0 +1,72 @@ +Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02} +Run dir: runs/replay_20260425_152857 +Curriculum: 1 → 3 sheep, 1,500,000 steps/stage + +[Stage n_sheep=1] training 1,500,000 steps + ... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-20.83 sr=6%] + ... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-21.40 sr=4%] + ... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-22.31 sr=0%] + ... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-19.13 sr=4%] + ... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-18.79 sr=8%] + ... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-10.15 sr=8%] + ... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=+10.14 sr=82%] + ... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=+11.90 sr=100%] + ... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=+11.32 sr=100%] + ... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=+11.36 sr=100%] + ... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=+11.18 sr=100%] + ... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=+11.08 sr=100%] + ... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=+11.14 sr=100%] + ... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=+11.10 sr=100%] + ... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+10.99 sr=100%] +[Stage n_sheep=1] evaluating 30 eps +[Stage n_sheep=1] sr=100% mean_len=268 mean_min_pen=3.7m mean_act=0.37 + +[Stage n_sheep=2] training 1,500,000 steps + ... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan sr=nan%] + ... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 50)=-3.10 sr=2%] + ... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-3.41 sr=2%] + ... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=-3.11 sr=6%] + ... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=-2.65 sr=8%] + ... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=-4.11 sr=2%] + ... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=-3.19 sr=6%] + ... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=-3.45 sr=4%] + ... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=-4.13 sr=0%] + ... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=-3.47 sr=8%] + ... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=-3.83 sr=4%] + ... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=-4.58 sr=0%] + ... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=-3.94 sr=2%] + ... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=-4.15 sr=2%] + ... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=-3.95 sr=4%] + ... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=-4.44 sr=0%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=7% mean_len=1459 mean_min_pen=14.1m mean_act=0.28 + +[Stage n_sheep=3] training 1,500,000 steps + ... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan sr=nan%] + ... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=-4.16 sr=0%] + ... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=-4.94 sr=0%] + ... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=-4.42 sr=0%] + ... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=-4.69 sr=0%] + ... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=-3.72 sr=0%] + ... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=-5.04 sr=0%] + ... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=-4.26 sr=0%] + ... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=-4.70 sr=0%] + ... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=-4.61 sr=0%] + ... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=-4.19 sr=0%] + ... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=-4.35 sr=0%] + ... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=-4.41 sr=0%] + ... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=-4.42 sr=0%] + ... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=-4.77 sr=0%] + ... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=-4.49 sr=0%] +[Stage n_sheep=3] evaluating 30 eps +[Stage n_sheep=3] sr=0% mean_len=1500 mean_min_pen=12.5m mean_act=1.26 + +============================================================ + REPLAY SUMMARY +============================================================ + n_sheep=1 sr=100% len= 268 min_pen= 3.7m act=0.37 + n_sheep=2 sr= 7% len= 1459 min_pen= 14.1m act=0.28 + n_sheep=3 sr= 0% len= 1500 min_pen= 12.5m act=1.26 + + Total time: 26.9 min + Artefacts: runs/replay_20260425_152857/