Test25_1600

This commit is contained in:
Johnny Fernandes
2026-04-25 15:06:06 +00:00
parent cd7e62b1b2
commit 4350c7d320
6 changed files with 832 additions and 0 deletions
+681
View File
@@ -0,0 +1,681 @@
Sweep dir: runs/sweep_20260425_124630
Search space: ['W_PER_SHEEP', 'W_ALIGN', 'W_PEN_BONUS', 'W_STEP_COST', 'W_COMPLETE', 'W_COMPACT', 'ALIGN_SHAPE', 'ALIGN_GATED', 'ent_coef']
Per-trial: 1,000,000 steps train + 30 eval eps
Time budget: 7.5h
[Trial 1] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 1 | 1 sheep | 50,000 steps | ret(last 33)=-7.72 sr=6%]
... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-10.07 sr=2%]
... [trial 1 | 1 sheep | 150,000 steps | ret(last 50)=-9.89 sr=2%]
... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-7.94 sr=4%]
... [trial 1 | 1 sheep | 250,000 steps | ret(last 50)=+2.69 sr=2%]
... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=+18.25 sr=24%]
... [trial 1 | 1 sheep | 350,000 steps | ret(last 50)=+24.63 sr=20%]
... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=+24.83 sr=26%]
... [trial 1 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 1 | 2 sheep | 459,608 steps | ret(last 32)=+10.08 sr=0%]
... [trial 1 | 2 sheep | 509,608 steps | ret(last 50)=+11.51 sr=0%]
... [trial 1 | 2 sheep | 559,608 steps | ret(last 50)=+12.82 sr=0%]
... [trial 1 | 2 sheep | 609,608 steps | ret(last 50)=+14.39 sr=0%]
... [trial 1 | 2 sheep | 659,608 steps | ret(last 50)=+14.14 sr=0%]
... [trial 1 | 2 sheep | 709,608 steps | ret(last 50)=+12.36 sr=2%]
... [trial 1 | 2 sheep | 759,608 steps | ret(last 50)=+13.08 sr=0%]
... [trial 1 | 2 sheep | 809,608 steps | ret(last 50)=+13.24 sr=0%]
... [trial 1 | 2 sheep | 859,608 steps | ret(last 50)=+13.23 sr=0%]
... [trial 1 | 2 sheep | 909,608 steps | ret(last 50)=+14.23 sr=2%]
... [trial 1 | 2 sheep | 959,608 steps | ret(last 50)=+14.69 sr=0%]
... [trial 1 | 2 sheep | 1,009,608 steps | ret(last 50)=+20.23 sr=0%]
... [trial 1 | eval n=1]
... [trial 1 | eval n=2]
... [trial 1 | eval n=3]
→ score=0.150 sr1=0.50 sr2=0.10 sr3=0.00 [317s]
[Trial 2] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 2 | 1 sheep | 50,000 steps | ret(last 34)=-24.61 sr=9%]
... [trial 2 | 1 sheep | 100,000 steps | ret(last 50)=-28.20 sr=10%]
... [trial 2 | 1 sheep | 150,000 steps | ret(last 50)=-28.14 sr=8%]
... [trial 2 | 1 sheep | 200,000 steps | ret(last 50)=-31.36 sr=2%]
... [trial 2 | 1 sheep | 250,000 steps | ret(last 50)=-31.38 sr=6%]
... [trial 2 | 1 sheep | 300,000 steps | ret(last 50)=-32.89 sr=4%]
... [trial 2 | 1 sheep | 350,000 steps | ret(last 50)=-29.11 sr=8%]
... [trial 2 | 1 sheep | 400,000 steps | ret(last 50)=-19.16 sr=30%]
... [trial 2 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 2 | 2 sheep | 459,608 steps | ret(last 34)=-17.61 sr=9%]
... [trial 2 | 2 sheep | 509,608 steps | ret(last 50)=-18.59 sr=2%]
... [trial 2 | 2 sheep | 559,608 steps | ret(last 50)=-16.92 sr=0%]
... [trial 2 | 2 sheep | 609,608 steps | ret(last 50)=-17.40 sr=0%]
... [trial 2 | 2 sheep | 659,608 steps | ret(last 50)=-18.13 sr=0%]
... [trial 2 | 2 sheep | 709,608 steps | ret(last 50)=-17.45 sr=0%]
... [trial 2 | 2 sheep | 759,608 steps | ret(last 50)=-16.06 sr=0%]
... [trial 2 | 2 sheep | 809,608 steps | ret(last 50)=-15.35 sr=0%]
... [trial 2 | 2 sheep | 859,608 steps | ret(last 50)=-12.63 sr=0%]
... [trial 2 | 2 sheep | 909,608 steps | ret(last 50)=-12.41 sr=0%]
... [trial 2 | 2 sheep | 959,608 steps | ret(last 50)=-12.91 sr=0%]
... [trial 2 | 2 sheep | 1,009,608 steps | ret(last 50)=-10.94 sr=0%]
... [trial 2 | eval n=1]
... [trial 2 | eval n=2]
... [trial 2 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [311s]
[Trial 3] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.01}
... [trial 3 | 1 sheep | 50,000 steps | ret(last 32)=-1.75 sr=0%]
... [trial 3 | 1 sheep | 100,000 steps | ret(last 50)=-3.70 sr=0%]
... [trial 3 | 1 sheep | 150,000 steps | ret(last 50)=-6.09 sr=2%]
... [trial 3 | 1 sheep | 200,000 steps | ret(last 50)=-3.44 sr=4%]
... [trial 3 | 1 sheep | 250,000 steps | ret(last 50)=+6.68 sr=8%]
... [trial 3 | 1 sheep | 300,000 steps | ret(last 50)=+14.58 sr=22%]
... [trial 3 | 1 sheep | 350,000 steps | ret(last 50)=+15.28 sr=64%]
... [trial 3 | 1 sheep | 400,000 steps | ret(last 50)=+14.70 sr=74%]
... [trial 3 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 3 | 2 sheep | 459,608 steps | ret(last 35)=+0.82 sr=9%]
... [trial 3 | 2 sheep | 509,608 steps | ret(last 50)=-0.66 sr=2%]
... [trial 3 | 2 sheep | 559,608 steps | ret(last 50)=-0.02 sr=0%]
... [trial 3 | 2 sheep | 609,608 steps | ret(last 50)=-0.02 sr=0%]
... [trial 3 | 2 sheep | 659,608 steps | ret(last 50)=+1.37 sr=4%]
... [trial 3 | 2 sheep | 709,608 steps | ret(last 50)=+2.75 sr=8%]
... [trial 3 | 2 sheep | 759,608 steps | ret(last 50)=+1.25 sr=6%]
... [trial 3 | 2 sheep | 809,608 steps | ret(last 50)=+4.20 sr=10%]
... [trial 3 | 2 sheep | 859,608 steps | ret(last 50)=+2.14 sr=4%]
... [trial 3 | 2 sheep | 909,608 steps | ret(last 50)=+3.13 sr=8%]
... [trial 3 | 2 sheep | 959,608 steps | ret(last 50)=+5.16 sr=6%]
... [trial 3 | 2 sheep | 1,009,608 steps | ret(last 50)=+5.95 sr=8%]
... [trial 3 | eval n=1]
... [trial 3 | eval n=2]
... [trial 3 | eval n=3]
→ score=0.270 sr1=0.70 sr2=0.20 sr3=0.10 [304s]
[Trial 4] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 4 | 1 sheep | 50,000 steps | ret(last 33)=-2.86 sr=9%]
... [trial 4 | 1 sheep | 100,000 steps | ret(last 50)=-3.54 sr=6%]
... [trial 4 | 1 sheep | 150,000 steps | ret(last 50)=-2.76 sr=8%]
... [trial 4 | 1 sheep | 200,000 steps | ret(last 50)=-1.56 sr=8%]
... [trial 4 | 1 sheep | 250,000 steps | ret(last 50)=+9.18 sr=24%]
... [trial 4 | 1 sheep | 300,000 steps | ret(last 50)=+18.46 sr=46%]
... [trial 4 | 1 sheep | 350,000 steps | ret(last 50)=+15.01 sr=34%]
... [trial 4 | 1 sheep | 400,000 steps | ret(last 50)=+14.44 sr=42%]
... [trial 4 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 4 | 2 sheep | 459,608 steps | ret(last 35)=+6.77 sr=9%]
... [trial 4 | 2 sheep | 509,608 steps | ret(last 50)=+5.50 sr=6%]
... [trial 4 | 2 sheep | 559,608 steps | ret(last 50)=+4.39 sr=0%]
... [trial 4 | 2 sheep | 609,608 steps | ret(last 50)=+4.54 sr=0%]
... [trial 4 | 2 sheep | 659,608 steps | ret(last 50)=+6.97 sr=0%]
... [trial 4 | 2 sheep | 709,608 steps | ret(last 50)=+4.28 sr=4%]
... [trial 4 | 2 sheep | 759,608 steps | ret(last 50)=+4.30 sr=2%]
... [trial 4 | 2 sheep | 809,608 steps | ret(last 50)=+6.34 sr=4%]
... [trial 4 | 2 sheep | 859,608 steps | ret(last 50)=+7.27 sr=2%]
... [trial 4 | 2 sheep | 909,608 steps | ret(last 50)=+8.22 sr=4%]
... [trial 4 | 2 sheep | 959,608 steps | ret(last 50)=+7.23 sr=6%]
... [trial 4 | 2 sheep | 1,009,608 steps | ret(last 50)=+7.24 sr=2%]
... [trial 4 | eval n=1]
... [trial 4 | eval n=2]
... [trial 4 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [302s]
[Trial 5] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 5 | 1 sheep | 50,000 steps | ret(last 33)=+3.70 sr=6%]
... [trial 5 | 1 sheep | 100,000 steps | ret(last 50)=-2.32 sr=0%]
... [trial 5 | 1 sheep | 150,000 steps | ret(last 50)=-4.36 sr=4%]
... [trial 5 | 1 sheep | 200,000 steps | ret(last 50)=-4.30 sr=6%]
... [trial 5 | 1 sheep | 250,000 steps | ret(last 50)=-0.15 sr=14%]
... [trial 5 | 1 sheep | 300,000 steps | ret(last 50)=+1.39 sr=8%]
... [trial 5 | 1 sheep | 350,000 steps | ret(last 50)=+11.40 sr=36%]
... [trial 5 | 1 sheep | 400,000 steps | ret(last 50)=+11.08 sr=24%]
... [trial 5 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 5 | 2 sheep | 459,608 steps | ret(last 34)=+6.85 sr=6%]
... [trial 5 | 2 sheep | 509,608 steps | ret(last 50)=+7.35 sr=8%]
... [trial 5 | 2 sheep | 559,608 steps | ret(last 50)=+7.57 sr=4%]
... [trial 5 | 2 sheep | 609,608 steps | ret(last 50)=+6.64 sr=2%]
... [trial 5 | 2 sheep | 659,608 steps | ret(last 50)=+9.15 sr=10%]
... [trial 5 | 2 sheep | 709,608 steps | ret(last 50)=+14.27 sr=10%]
... [trial 5 | 2 sheep | 759,608 steps | ret(last 50)=+10.93 sr=6%]
... [trial 5 | 2 sheep | 809,608 steps | ret(last 50)=+10.17 sr=12%]
... [trial 5 | 2 sheep | 859,608 steps | ret(last 50)=+8.20 sr=8%]
... [trial 5 | 2 sheep | 909,608 steps | ret(last 50)=+9.61 sr=14%]
... [trial 5 | 2 sheep | 959,608 steps | ret(last 50)=+11.14 sr=10%]
... [trial 5 | 2 sheep | 1,009,608 steps | ret(last 50)=+10.75 sr=12%]
... [trial 5 | eval n=1]
... [trial 5 | eval n=2]
... [trial 5 | eval n=3]
→ score=0.200 sr1=1.00 sr2=0.00 sr3=0.00 [314s]
[Trial 6] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 200.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.01}
... [trial 6 | 1 sheep | 50,000 steps | ret(last 32)=-13.18 sr=9%]
... [trial 6 | 1 sheep | 100,000 steps | ret(last 50)=-10.28 sr=16%]
... [trial 6 | 1 sheep | 150,000 steps | ret(last 50)=+5.28 sr=44%]
... [trial 6 | 1 sheep | 200,000 steps | ret(last 50)=+9.40 sr=38%]
... [trial 6 | 1 sheep | 250,000 steps | ret(last 50)=+8.62 sr=32%]
... [trial 6 | 1 sheep | 300,000 steps | ret(last 50)=+9.14 sr=34%]
... [trial 6 | 1 sheep | 350,000 steps | ret(last 50)=+12.59 sr=60%]
... [trial 6 | 1 sheep | 400,000 steps | ret(last 50)=+14.10 sr=72%]
... [trial 6 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 6 | 2 sheep | 459,608 steps | ret(last 34)=+0.12 sr=9%]
... [trial 6 | 2 sheep | 509,608 steps | ret(last 50)=-2.84 sr=4%]
... [trial 6 | 2 sheep | 559,608 steps | ret(last 50)=-2.11 sr=10%]
... [trial 6 | 2 sheep | 609,608 steps | ret(last 50)=-1.91 sr=14%]
... [trial 6 | 2 sheep | 659,608 steps | ret(last 50)=-2.14 sr=14%]
... [trial 6 | 2 sheep | 709,608 steps | ret(last 50)=-4.30 sr=6%]
... [trial 6 | 2 sheep | 759,608 steps | ret(last 50)=-1.89 sr=10%]
... [trial 6 | 2 sheep | 809,608 steps | ret(last 50)=-3.47 sr=8%]
... [trial 6 | 2 sheep | 859,608 steps | ret(last 50)=-1.45 sr=8%]
... [trial 6 | 2 sheep | 909,608 steps | ret(last 50)=-3.55 sr=2%]
... [trial 6 | 2 sheep | 959,608 steps | ret(last 50)=-2.93 sr=4%]
... [trial 6 | 2 sheep | 1,009,608 steps | ret(last 50)=-1.45 sr=10%]
... [trial 6 | eval n=1]
... [trial 6 | eval n=2]
... [trial 6 | eval n=3]
→ score=0.160 sr1=0.80 sr2=0.00 sr3=0.00 [312s]
[Trial 7] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.01}
... [trial 7 | 1 sheep | 50,000 steps | ret(last 32)=-8.47 sr=0%]
... [trial 7 | 1 sheep | 100,000 steps | ret(last 50)=-5.40 sr=4%]
... [trial 7 | 1 sheep | 150,000 steps | ret(last 50)=-2.72 sr=10%]
... [trial 7 | 1 sheep | 200,000 steps | ret(last 50)=-1.59 sr=10%]
... [trial 7 | 1 sheep | 250,000 steps | ret(last 50)=-1.58 sr=6%]
... [trial 7 | 1 sheep | 300,000 steps | ret(last 50)=-3.68 sr=2%]
... [trial 7 | 1 sheep | 350,000 steps | ret(last 50)=+4.82 sr=10%]
... [trial 7 | 1 sheep | 400,000 steps | ret(last 50)=+15.81 sr=54%]
... [trial 7 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 7 | 2 sheep | 459,608 steps | ret(last 32)=-2.50 sr=6%]
... [trial 7 | 2 sheep | 509,608 steps | ret(last 50)=-2.32 sr=2%]
... [trial 7 | 2 sheep | 559,608 steps | ret(last 50)=+0.76 sr=4%]
... [trial 7 | 2 sheep | 609,608 steps | ret(last 50)=+0.45 sr=0%]
... [trial 7 | 2 sheep | 659,608 steps | ret(last 50)=+1.03 sr=8%]
... [trial 7 | 2 sheep | 709,608 steps | ret(last 50)=+0.62 sr=6%]
... [trial 7 | 2 sheep | 759,608 steps | ret(last 50)=+0.36 sr=8%]
... [trial 7 | 2 sheep | 809,608 steps | ret(last 50)=+2.27 sr=10%]
... [trial 7 | 2 sheep | 859,608 steps | ret(last 50)=+2.31 sr=6%]
... [trial 7 | 2 sheep | 909,608 steps | ret(last 50)=+3.78 sr=4%]
... [trial 7 | 2 sheep | 959,608 steps | ret(last 50)=+2.21 sr=10%]
... [trial 7 | 2 sheep | 1,009,608 steps | ret(last 50)=+2.66 sr=4%]
... [trial 7 | eval n=1]
... [trial 7 | eval n=2]
... [trial 7 | eval n=3]
→ score=0.080 sr1=0.40 sr2=0.00 sr3=0.00 [338s]
[Trial 8] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 8 | 1 sheep | 50,000 steps | ret(last 32)=-7.73 sr=6%]
... [trial 8 | 1 sheep | 100,000 steps | ret(last 50)=-9.58 sr=8%]
... [trial 8 | 1 sheep | 150,000 steps | ret(last 50)=-10.87 sr=8%]
... [trial 8 | 1 sheep | 200,000 steps | ret(last 50)=-9.79 sr=6%]
... [trial 8 | 1 sheep | 250,000 steps | ret(last 50)=-7.19 sr=8%]
... [trial 8 | 1 sheep | 300,000 steps | ret(last 50)=-3.84 sr=18%]
... [trial 8 | 1 sheep | 350,000 steps | ret(last 50)=-0.03 sr=26%]
... [trial 8 | 1 sheep | 400,000 steps | ret(last 50)=+6.80 sr=44%]
... [trial 8 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 8 | 2 sheep | 459,608 steps | ret(last 35)=-3.00 sr=9%]
... [trial 8 | 2 sheep | 509,608 steps | ret(last 50)=-4.26 sr=4%]
... [trial 8 | 2 sheep | 559,608 steps | ret(last 50)=+1.91 sr=14%]
... [trial 8 | 2 sheep | 609,608 steps | ret(last 50)=-0.57 sr=16%]
... [trial 8 | 2 sheep | 659,608 steps | ret(last 50)=+1.65 sr=14%]
... [trial 8 | 2 sheep | 709,608 steps | ret(last 50)=+2.90 sr=8%]
... [trial 8 | 2 sheep | 759,608 steps | ret(last 50)=+0.98 sr=2%]
... [trial 8 | 2 sheep | 809,608 steps | ret(last 50)=-2.52 sr=4%]
... [trial 8 | 2 sheep | 859,608 steps | ret(last 50)=-1.11 sr=2%]
... [trial 8 | 2 sheep | 909,608 steps | ret(last 50)=+2.74 sr=2%]
... [trial 8 | 2 sheep | 959,608 steps | ret(last 50)=+2.94 sr=0%]
... [trial 8 | 2 sheep | 1,009,608 steps | ret(last 50)=+5.13 sr=0%]
... [trial 8 | eval n=1]
... [trial 8 | eval n=2]
... [trial 8 | eval n=3]
→ score=0.110 sr1=0.30 sr2=0.10 sr3=0.00 [451s]
[Trial 9] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.05}
... [trial 9 | 1 sheep | 50,000 steps | ret(last 34)=-11.25 sr=15%]
... [trial 9 | 1 sheep | 100,000 steps | ret(last 50)=-11.98 sr=8%]
... [trial 9 | 1 sheep | 150,000 steps | ret(last 50)=-10.46 sr=14%]
... [trial 9 | 1 sheep | 200,000 steps | ret(last 50)=-2.86 sr=14%]
... [trial 9 | 1 sheep | 250,000 steps | ret(last 50)=+8.65 sr=60%]
... [trial 9 | 1 sheep | 300,000 steps | ret(last 50)=+10.48 sr=58%]
... [trial 9 | 1 sheep | 350,000 steps | ret(last 50)=+8.65 sr=56%]
... [trial 9 | 1 sheep | 400,000 steps | ret(last 50)=+10.25 sr=68%]
... [trial 9 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 9 | 2 sheep | 459,608 steps | ret(last 35)=-0.75 sr=20%]
... [trial 9 | 2 sheep | 509,608 steps | ret(last 50)=-6.64 sr=2%]
... [trial 9 | 2 sheep | 559,608 steps | ret(last 50)=-7.43 sr=4%]
... [trial 9 | 2 sheep | 609,608 steps | ret(last 50)=-4.32 sr=6%]
... [trial 9 | 2 sheep | 659,608 steps | ret(last 50)=-3.64 sr=6%]
... [trial 9 | 2 sheep | 709,608 steps | ret(last 50)=-7.09 sr=0%]
... [trial 9 | 2 sheep | 759,608 steps | ret(last 50)=-5.60 sr=4%]
... [trial 9 | 2 sheep | 809,608 steps | ret(last 50)=-5.70 sr=6%]
... [trial 9 | 2 sheep | 859,608 steps | ret(last 50)=-4.99 sr=4%]
... [trial 9 | 2 sheep | 909,608 steps | ret(last 50)=-4.60 sr=6%]
... [trial 9 | 2 sheep | 959,608 steps | ret(last 50)=-6.53 sr=4%]
... [trial 9 | 2 sheep | 1,009,608 steps | ret(last 50)=-7.46 sr=2%]
... [trial 9 | eval n=1]
... [trial 9 | eval n=2]
... [trial 9 | eval n=3]
→ score=0.190 sr1=0.70 sr2=0.10 sr3=0.00 [349s]
[Trial 10] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.05}
... [trial 10 | 1 sheep | 50,000 steps | ret(last 32)=-13.35 sr=3%]
... [trial 10 | 1 sheep | 100,000 steps | ret(last 50)=-12.49 sr=4%]
... [trial 10 | 1 sheep | 150,000 steps | ret(last 50)=-13.24 sr=8%]
... [trial 10 | 1 sheep | 200,000 steps | ret(last 50)=-12.73 sr=10%]
... [trial 10 | 1 sheep | 250,000 steps | ret(last 50)=-15.27 sr=4%]
... [trial 10 | 1 sheep | 300,000 steps | ret(last 50)=-9.43 sr=8%]
... [trial 10 | 1 sheep | 350,000 steps | ret(last 50)=-2.65 sr=22%]
... [trial 10 | 1 sheep | 400,000 steps | ret(last 50)=+5.12 sr=46%]
... [trial 10 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 10 | 2 sheep | 459,608 steps | ret(last 34)=-4.93 sr=6%]
... [trial 10 | 2 sheep | 509,608 steps | ret(last 50)=-6.25 sr=2%]
... [trial 10 | 2 sheep | 559,608 steps | ret(last 50)=-5.57 sr=4%]
... [trial 10 | 2 sheep | 609,608 steps | ret(last 50)=-6.24 sr=4%]
... [trial 10 | 2 sheep | 659,608 steps | ret(last 50)=-9.34 sr=0%]
... [trial 10 | 2 sheep | 709,608 steps | ret(last 50)=-8.23 sr=0%]
... [trial 10 | 2 sheep | 759,608 steps | ret(last 50)=-8.34 sr=0%]
... [trial 10 | 2 sheep | 809,608 steps | ret(last 50)=-5.27 sr=0%]
... [trial 10 | 2 sheep | 859,608 steps | ret(last 50)=-8.24 sr=0%]
... [trial 10 | 2 sheep | 909,608 steps | ret(last 50)=-8.75 sr=0%]
... [trial 10 | 2 sheep | 959,608 steps | ret(last 50)=-9.15 sr=0%]
... [trial 10 | 2 sheep | 1,009,608 steps | ret(last 50)=-9.75 sr=0%]
... [trial 10 | eval n=1]
... [trial 10 | eval n=2]
... [trial 10 | eval n=3]
→ score=0.000 sr1=0.00 sr2=0.00 sr3=0.00 [319s]
[Trial 11] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 11 | 1 sheep | 50,000 steps | ret(last 32)=-3.50 sr=12%]
... [trial 11 | 1 sheep | 100,000 steps | ret(last 50)=-5.79 sr=6%]
... [trial 11 | 1 sheep | 150,000 steps | ret(last 50)=-2.10 sr=18%]
... [trial 11 | 1 sheep | 200,000 steps | ret(last 50)=+2.60 sr=8%]
... [trial 11 | 1 sheep | 250,000 steps | ret(last 50)=+11.49 sr=8%]
... [trial 11 | 1 sheep | 300,000 steps | ret(last 50)=+21.73 sr=26%]
... [trial 11 | 1 sheep | 350,000 steps | ret(last 50)=+20.73 sr=36%]
... [trial 11 | 1 sheep | 400,000 steps | ret(last 50)=+19.77 sr=62%]
... [trial 11 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 11 | 2 sheep | 459,608 steps | ret(last 36)=+10.19 sr=11%]
... [trial 11 | 2 sheep | 509,608 steps | ret(last 50)=+11.56 sr=6%]
... [trial 11 | 2 sheep | 559,608 steps | ret(last 50)=+13.61 sr=2%]
... [trial 11 | 2 sheep | 609,608 steps | ret(last 50)=+15.44 sr=4%]
... [trial 11 | 2 sheep | 659,608 steps | ret(last 50)=+15.61 sr=10%]
... [trial 11 | 2 sheep | 709,608 steps | ret(last 50)=+16.30 sr=6%]
... [trial 11 | 2 sheep | 759,608 steps | ret(last 50)=+17.33 sr=4%]
... [trial 11 | 2 sheep | 809,608 steps | ret(last 50)=+18.36 sr=2%]
... [trial 11 | 2 sheep | 859,608 steps | ret(last 50)=+19.78 sr=8%]
... [trial 11 | 2 sheep | 909,608 steps | ret(last 50)=+20.12 sr=14%]
... [trial 11 | 2 sheep | 959,608 steps | ret(last 50)=+18.93 sr=8%]
... [trial 11 | 2 sheep | 1,009,608 steps | ret(last 50)=+18.16 sr=2%]
... [trial 11 | eval n=1]
... [trial 11 | eval n=2]
... [trial 11 | eval n=3]
→ score=0.160 sr1=0.80 sr2=0.00 sr3=0.00 [310s]
[Trial 12] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 50.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 12 | 1 sheep | 50,000 steps | ret(last 32)=-42.77 sr=0%]
... [trial 12 | 1 sheep | 100,000 steps | ret(last 50)=-39.16 sr=2%]
... [trial 12 | 1 sheep | 150,000 steps | ret(last 50)=-35.02 sr=6%]
... [trial 12 | 1 sheep | 200,000 steps | ret(last 50)=-31.49 sr=4%]
... [trial 12 | 1 sheep | 250,000 steps | ret(last 50)=-8.31 sr=16%]
... [trial 12 | 1 sheep | 300,000 steps | ret(last 50)=+7.97 sr=36%]
... [trial 12 | 1 sheep | 350,000 steps | ret(last 50)=+11.77 sr=68%]
... [trial 12 | 1 sheep | 400,000 steps | ret(last 50)=+12.47 sr=74%]
... [trial 12 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 12 | 2 sheep | 459,608 steps | ret(last 34)=-9.76 sr=0%]
... [trial 12 | 2 sheep | 509,608 steps | ret(last 50)=-4.85 sr=0%]
... [trial 12 | 2 sheep | 559,608 steps | ret(last 50)=-2.81 sr=8%]
... [trial 12 | 2 sheep | 609,608 steps | ret(last 50)=+2.27 sr=10%]
... [trial 12 | 2 sheep | 659,608 steps | ret(last 50)=+1.66 sr=6%]
... [trial 12 | 2 sheep | 709,608 steps | ret(last 50)=+3.42 sr=4%]
... [trial 12 | 2 sheep | 759,608 steps | ret(last 50)=+4.08 sr=2%]
... [trial 12 | 2 sheep | 809,608 steps | ret(last 50)=+5.49 sr=2%]
... [trial 12 | 2 sheep | 859,608 steps | ret(last 50)=+7.12 sr=10%]
... [trial 12 | 2 sheep | 909,608 steps | ret(last 50)=+7.91 sr=6%]
... [trial 12 | 2 sheep | 959,608 steps | ret(last 50)=+6.87 sr=2%]
... [trial 12 | 2 sheep | 1,009,608 steps | ret(last 50)=+5.83 sr=2%]
... [trial 12 | eval n=1]
... [trial 12 | eval n=2]
... [trial 12 | eval n=3]
→ score=0.240 sr1=0.70 sr2=0.20 sr3=0.00 [330s]
[Trial 13] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.005}
... [trial 13 | 1 sheep | 50,000 steps | ret(last 34)=-31.15 sr=9%]
... [trial 13 | 1 sheep | 100,000 steps | ret(last 50)=-32.34 sr=4%]
... [trial 13 | 1 sheep | 150,000 steps | ret(last 50)=-33.16 sr=0%]
... [trial 13 | 1 sheep | 200,000 steps | ret(last 50)=-29.98 sr=6%]
... [trial 13 | 1 sheep | 250,000 steps | ret(last 50)=-28.64 sr=4%]
... [trial 13 | 1 sheep | 300,000 steps | ret(last 50)=-17.91 sr=14%]
... [trial 13 | 1 sheep | 350,000 steps | ret(last 50)=-15.27 sr=22%]
... [trial 13 | 1 sheep | 400,000 steps | ret(last 50)=-11.36 sr=16%]
... [trial 13 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 13 | 2 sheep | 459,608 steps | ret(last 34)=-16.78 sr=0%]
... [trial 13 | 2 sheep | 509,608 steps | ret(last 50)=-16.84 sr=2%]
... [trial 13 | 2 sheep | 559,608 steps | ret(last 50)=-14.28 sr=0%]
... [trial 13 | 2 sheep | 609,608 steps | ret(last 50)=-12.35 sr=6%]
... [trial 13 | 2 sheep | 659,608 steps | ret(last 50)=-14.50 sr=2%]
... [trial 13 | 2 sheep | 709,608 steps | ret(last 50)=-12.96 sr=2%]
... [trial 13 | 2 sheep | 759,608 steps | ret(last 50)=-9.86 sr=4%]
... [trial 13 | 2 sheep | 809,608 steps | ret(last 50)=-13.88 sr=2%]
... [trial 13 | 2 sheep | 859,608 steps | ret(last 50)=-14.76 sr=0%]
... [trial 13 | 2 sheep | 909,608 steps | ret(last 50)=-12.79 sr=0%]
... [trial 13 | 2 sheep | 959,608 steps | ret(last 50)=-12.54 sr=0%]
... [trial 13 | 2 sheep | 1,009,608 steps | ret(last 50)=-12.11 sr=8%]
... [trial 13 | eval n=1]
... [trial 13 | eval n=2]
... [trial 13 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [323s]
[Trial 14] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 14 | 1 sheep | 50,000 steps | ret(last 32)=-20.15 sr=9%]
... [trial 14 | 1 sheep | 100,000 steps | ret(last 50)=-15.28 sr=8%]
... [trial 14 | 1 sheep | 150,000 steps | ret(last 50)=-8.87 sr=26%]
... [trial 14 | 1 sheep | 200,000 steps | ret(last 50)=-9.94 sr=8%]
... [trial 14 | 1 sheep | 250,000 steps | ret(last 50)=-9.04 sr=8%]
... [trial 14 | 1 sheep | 300,000 steps | ret(last 50)=-7.40 sr=14%]
... [trial 14 | 1 sheep | 350,000 steps | ret(last 50)=+2.22 sr=50%]
... [trial 14 | 1 sheep | 400,000 steps | ret(last 50)=+4.06 sr=58%]
... [trial 14 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 14 | 2 sheep | 459,608 steps | ret(last 33)=-5.93 sr=3%]
... [trial 14 | 2 sheep | 509,608 steps | ret(last 50)=-6.85 sr=4%]
... [trial 14 | 2 sheep | 559,608 steps | ret(last 50)=-6.81 sr=6%]
... [trial 14 | 2 sheep | 609,608 steps | ret(last 50)=-4.80 sr=4%]
... [trial 14 | 2 sheep | 659,608 steps | ret(last 50)=-6.55 sr=4%]
... [trial 14 | 2 sheep | 709,608 steps | ret(last 50)=-4.81 sr=12%]
... [trial 14 | 2 sheep | 759,608 steps | ret(last 50)=-5.41 sr=10%]
... [trial 14 | 2 sheep | 809,608 steps | ret(last 50)=-0.00 sr=30%]
... [trial 14 | 2 sheep | 859,608 steps | ret(last 50)=+1.17 sr=26%]
... [trial 14 | 2 sheep | 909,608 steps | ret(last 50)=+0.17 sr=20%]
... [trial 14 | 2 sheep | 959,608 steps | ret(last 50)=-0.96 sr=18%]
... [trial 14 | 2 sheep | 1,009,608 steps | ret(last 50)=-1.33 sr=20%]
... [trial 14 | eval n=1]
... [trial 14 | eval n=2]
... [trial 14 | eval n=3]
→ score=0.350 sr1=1.00 sr2=0.30 sr3=0.00 [314s]
[Trial 15] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.05}
... [trial 15 | 1 sheep | 50,000 steps | ret(last 32)=-6.83 sr=3%]
... [trial 15 | 1 sheep | 100,000 steps | ret(last 50)=-7.59 sr=4%]
... [trial 15 | 1 sheep | 150,000 steps | ret(last 50)=-5.74 sr=6%]
... [trial 15 | 1 sheep | 200,000 steps | ret(last 50)=-5.92 sr=6%]
... [trial 15 | 1 sheep | 250,000 steps | ret(last 50)=+8.14 sr=22%]
... [trial 15 | 1 sheep | 300,000 steps | ret(last 50)=+15.51 sr=22%]
... [trial 15 | 1 sheep | 350,000 steps | ret(last 50)=+21.46 sr=20%]
... [trial 15 | 1 sheep | 400,000 steps | ret(last 50)=+22.52 sr=16%]
... [trial 15 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 15 | 2 sheep | 459,608 steps | ret(last 35)=+6.28 sr=0%]
... [trial 15 | 2 sheep | 509,608 steps | ret(last 50)=+13.19 sr=2%]
... [trial 15 | 2 sheep | 559,608 steps | ret(last 50)=+15.58 sr=4%]
... [trial 15 | 2 sheep | 609,608 steps | ret(last 50)=+18.78 sr=10%]
... [trial 15 | 2 sheep | 659,608 steps | ret(last 50)=+22.71 sr=10%]
... [trial 15 | 2 sheep | 709,608 steps | ret(last 50)=+23.95 sr=6%]
... [trial 15 | 2 sheep | 759,608 steps | ret(last 50)=+24.84 sr=14%]
... [trial 15 | 2 sheep | 809,608 steps | ret(last 50)=+24.00 sr=8%]
... [trial 15 | 2 sheep | 859,608 steps | ret(last 50)=+23.91 sr=2%]
... [trial 15 | 2 sheep | 909,608 steps | ret(last 50)=+23.73 sr=4%]
... [trial 15 | 2 sheep | 959,608 steps | ret(last 50)=+24.23 sr=4%]
... [trial 15 | 2 sheep | 1,009,608 steps | ret(last 50)=+24.77 sr=4%]
... [trial 15 | eval n=1]
... [trial 15 | eval n=2]
... [trial 15 | eval n=3]
→ score=0.140 sr1=0.70 sr2=0.00 sr3=0.00 [323s]
[Trial 16] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.01}
... [trial 16 | 1 sheep | 50,000 steps | ret(last 32)=-7.14 sr=9%]
... [trial 16 | 1 sheep | 100,000 steps | ret(last 50)=-5.58 sr=12%]
... [trial 16 | 1 sheep | 150,000 steps | ret(last 50)=+5.93 sr=26%]
... [trial 16 | 1 sheep | 200,000 steps | ret(last 50)=+15.53 sr=68%]
... [trial 16 | 1 sheep | 250,000 steps | ret(last 50)=+14.88 sr=56%]
... [trial 16 | 1 sheep | 300,000 steps | ret(last 50)=+13.86 sr=36%]
... [trial 16 | 1 sheep | 350,000 steps | ret(last 50)=+14.84 sr=54%]
... [trial 16 | 1 sheep | 400,000 steps | ret(last 50)=+15.15 sr=70%]
... [trial 16 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 16 | 2 sheep | 459,608 steps | ret(last 34)=-1.47 sr=6%]
... [trial 16 | 2 sheep | 509,608 steps | ret(last 50)=-1.63 sr=2%]
... [trial 16 | 2 sheep | 559,608 steps | ret(last 50)=-3.78 sr=2%]
... [trial 16 | 2 sheep | 609,608 steps | ret(last 50)=-2.17 sr=4%]
... [trial 16 | 2 sheep | 659,608 steps | ret(last 50)=+1.25 sr=6%]
... [trial 16 | 2 sheep | 709,608 steps | ret(last 50)=+0.28 sr=4%]
... [trial 16 | 2 sheep | 759,608 steps | ret(last 50)=+2.74 sr=4%]
... [trial 16 | 2 sheep | 809,608 steps | ret(last 50)=+7.19 sr=6%]
... [trial 16 | 2 sheep | 859,608 steps | ret(last 50)=+7.68 sr=4%]
... [trial 16 | 2 sheep | 909,608 steps | ret(last 50)=+2.38 sr=0%]
... [trial 16 | 2 sheep | 959,608 steps | ret(last 50)=+3.43 sr=0%]
... [trial 16 | 2 sheep | 1,009,608 steps | ret(last 50)=+11.11 sr=0%]
... [trial 16 | eval n=1]
... [trial 16 | eval n=2]
... [trial 16 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [312s]
[Trial 17] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 200.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': True, 'ent_coef': 0.05}
... [trial 17 | 1 sheep | 50,000 steps | ret(last 32)=+2.15 sr=6%]
... [trial 17 | 1 sheep | 100,000 steps | ret(last 50)=-0.51 sr=2%]
... [trial 17 | 1 sheep | 150,000 steps | ret(last 50)=+0.84 sr=6%]
... [trial 17 | 1 sheep | 200,000 steps | ret(last 50)=+2.96 sr=6%]
... [trial 17 | 1 sheep | 250,000 steps | ret(last 50)=+3.04 sr=4%]
... [trial 17 | 1 sheep | 300,000 steps | ret(last 50)=+10.58 sr=10%]
... [trial 17 | 1 sheep | 350,000 steps | ret(last 50)=+21.95 sr=36%]
... [trial 17 | 1 sheep | 400,000 steps | ret(last 50)=+19.20 sr=16%]
... [trial 17 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 17 | 2 sheep | 459,608 steps | ret(last 32)=+10.27 sr=16%]
... [trial 17 | 2 sheep | 509,608 steps | ret(last 50)=+12.25 sr=6%]
... [trial 17 | 2 sheep | 559,608 steps | ret(last 50)=+12.94 sr=6%]
... [trial 17 | 2 sheep | 609,608 steps | ret(last 50)=+11.82 sr=4%]
... [trial 17 | 2 sheep | 659,608 steps | ret(last 50)=+13.45 sr=4%]
... [trial 17 | 2 sheep | 709,608 steps | ret(last 50)=+13.03 sr=4%]
... [trial 17 | 2 sheep | 759,608 steps | ret(last 50)=+10.69 sr=6%]
... [trial 17 | 2 sheep | 809,608 steps | ret(last 50)=+7.79 sr=6%]
... [trial 17 | 2 sheep | 859,608 steps | ret(last 50)=+12.16 sr=16%]
... [trial 17 | 2 sheep | 909,608 steps | ret(last 50)=+11.75 sr=12%]
... [trial 17 | 2 sheep | 959,608 steps | ret(last 50)=+13.65 sr=16%]
... [trial 17 | 2 sheep | 1,009,608 steps | ret(last 50)=+12.43 sr=10%]
... [trial 17 | eval n=1]
... [trial 17 | eval n=2]
... [trial 17 | eval n=3]
→ score=0.240 sr1=0.70 sr2=0.20 sr3=0.00 [304s]
[Trial 18] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 18 | 1 sheep | 50,000 steps | ret(last 32)=-3.63 sr=3%]
... [trial 18 | 1 sheep | 100,000 steps | ret(last 50)=-2.28 sr=12%]
... [trial 18 | 1 sheep | 150,000 steps | ret(last 50)=-3.15 sr=10%]
... [trial 18 | 1 sheep | 200,000 steps | ret(last 50)=-3.31 sr=6%]
... [trial 18 | 1 sheep | 250,000 steps | ret(last 50)=-3.23 sr=2%]
... [trial 18 | 1 sheep | 300,000 steps | ret(last 50)=+3.55 sr=22%]
... [trial 18 | 1 sheep | 350,000 steps | ret(last 50)=+8.15 sr=28%]
... [trial 18 | 1 sheep | 400,000 steps | ret(last 50)=+10.56 sr=18%]
... [trial 18 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 18 | 2 sheep | 459,608 steps | ret(last 34)=+3.80 sr=0%]
... [trial 18 | 2 sheep | 509,608 steps | ret(last 50)=+7.30 sr=4%]
... [trial 18 | 2 sheep | 559,608 steps | ret(last 50)=+9.61 sr=10%]
... [trial 18 | 2 sheep | 609,608 steps | ret(last 50)=+7.70 sr=8%]
... [trial 18 | 2 sheep | 659,608 steps | ret(last 50)=+6.01 sr=2%]
... [trial 18 | 2 sheep | 709,608 steps | ret(last 50)=+8.28 sr=6%]
... [trial 18 | 2 sheep | 759,608 steps | ret(last 50)=+6.74 sr=0%]
... [trial 18 | 2 sheep | 809,608 steps | ret(last 50)=+10.61 sr=0%]
... [trial 18 | 2 sheep | 859,608 steps | ret(last 50)=+12.20 sr=0%]
... [trial 18 | 2 sheep | 909,608 steps | ret(last 50)=+11.25 sr=2%]
... [trial 18 | 2 sheep | 959,608 steps | ret(last 50)=+13.58 sr=4%]
... [trial 18 | 2 sheep | 1,009,608 steps | ret(last 50)=+16.61 sr=20%]
... [trial 18 | eval n=1]
... [trial 18 | eval n=2]
... [trial 18 | eval n=3]
→ score=0.160 sr1=0.30 sr2=0.20 sr3=0.00 [316s]
[Trial 19] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 19 | 1 sheep | 50,000 steps | ret(last 32)=-36.89 sr=3%]
... [trial 19 | 1 sheep | 100,000 steps | ret(last 50)=-30.93 sr=4%]
... [trial 19 | 1 sheep | 150,000 steps | ret(last 50)=-28.35 sr=12%]
... [trial 19 | 1 sheep | 200,000 steps | ret(last 50)=-30.73 sr=8%]
... [trial 19 | 1 sheep | 250,000 steps | ret(last 50)=-29.54 sr=4%]
... [trial 19 | 1 sheep | 300,000 steps | ret(last 50)=-20.15 sr=20%]
... [trial 19 | 1 sheep | 350,000 steps | ret(last 50)=-0.07 sr=68%]
... [trial 19 | 1 sheep | 400,000 steps | ret(last 50)=+1.66 sr=52%]
... [trial 19 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 19 | 2 sheep | 459,608 steps | ret(last 36)=-12.82 sr=19%]
... [trial 19 | 2 sheep | 509,608 steps | ret(last 50)=-20.66 sr=0%]
... [trial 19 | 2 sheep | 559,608 steps | ret(last 50)=-16.54 sr=4%]
... [trial 19 | 2 sheep | 609,608 steps | ret(last 50)=-17.11 sr=4%]
... [trial 19 | 2 sheep | 659,608 steps | ret(last 50)=-19.32 sr=0%]
... [trial 19 | 2 sheep | 709,608 steps | ret(last 50)=-16.20 sr=0%]
... [trial 19 | 2 sheep | 759,608 steps | ret(last 50)=-13.12 sr=2%]
... [trial 19 | 2 sheep | 809,608 steps | ret(last 50)=-17.18 sr=4%]
... [trial 19 | 2 sheep | 859,608 steps | ret(last 50)=-18.16 sr=2%]
... [trial 19 | 2 sheep | 909,608 steps | ret(last 50)=-18.12 sr=4%]
... [trial 19 | 2 sheep | 959,608 steps | ret(last 50)=-17.79 sr=2%]
... [trial 19 | 2 sheep | 1,009,608 steps | ret(last 50)=-17.58 sr=0%]
... [trial 19 | eval n=1]
... [trial 19 | eval n=2]
... [trial 19 | eval n=3]
→ score=0.160 sr1=0.80 sr2=0.00 sr3=0.00 [318s]
[Trial 20] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02}
... [trial 20 | 1 sheep | 50,000 steps | ret(last 33)=-15.83 sr=9%]
... [trial 20 | 1 sheep | 100,000 steps | ret(last 50)=-18.74 sr=10%]
... [trial 20 | 1 sheep | 150,000 steps | ret(last 50)=-22.88 sr=6%]
... [trial 20 | 1 sheep | 200,000 steps | ret(last 50)=-23.86 sr=4%]
... [trial 20 | 1 sheep | 250,000 steps | ret(last 50)=-21.10 sr=6%]
... [trial 20 | 1 sheep | 300,000 steps | ret(last 50)=-18.42 sr=6%]
... [trial 20 | 1 sheep | 350,000 steps | ret(last 50)=+1.74 sr=14%]
... [trial 20 | 1 sheep | 400,000 steps | ret(last 50)=+7.62 sr=34%]
... [trial 20 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 20 | 2 sheep | 459,608 steps | ret(last 34)=-2.63 sr=3%]
... [trial 20 | 2 sheep | 509,608 steps | ret(last 50)=+1.10 sr=2%]
... [trial 20 | 2 sheep | 559,608 steps | ret(last 50)=+5.57 sr=4%]
... [trial 20 | 2 sheep | 609,608 steps | ret(last 50)=+8.54 sr=8%]
... [trial 20 | 2 sheep | 659,608 steps | ret(last 50)=+12.02 sr=8%]
... [trial 20 | 2 sheep | 709,608 steps | ret(last 50)=+11.28 sr=4%]
... [trial 20 | 2 sheep | 759,608 steps | ret(last 50)=+11.45 sr=2%]
... [trial 20 | 2 sheep | 809,608 steps | ret(last 50)=+9.52 sr=0%]
... [trial 20 | 2 sheep | 859,608 steps | ret(last 50)=+9.07 sr=2%]
... [trial 20 | 2 sheep | 909,608 steps | ret(last 50)=+12.06 sr=8%]
... [trial 20 | 2 sheep | 959,608 steps | ret(last 50)=+12.77 sr=8%]
... [trial 20 | 2 sheep | 1,009,608 steps | ret(last 50)=+11.55 sr=2%]
... [trial 20 | eval n=1]
... [trial 20 | eval n=2]
... [trial 20 | eval n=3]
→ score=0.130 sr1=0.40 sr2=0.10 sr3=0.00 [315s]
[Trial 21] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 21 | 1 sheep | 50,000 steps | ret(last 32)=-14.94 sr=6%]
... [trial 21 | 1 sheep | 100,000 steps | ret(last 50)=-12.47 sr=4%]
... [trial 21 | 1 sheep | 150,000 steps | ret(last 50)=-12.65 sr=6%]
... [trial 21 | 1 sheep | 200,000 steps | ret(last 50)=-12.44 sr=2%]
... [trial 21 | 1 sheep | 250,000 steps | ret(last 50)=-12.95 sr=6%]
... [trial 21 | 1 sheep | 300,000 steps | ret(last 50)=-13.04 sr=6%]
... [trial 21 | 1 sheep | 350,000 steps | ret(last 50)=-5.14 sr=8%]
... [trial 21 | 1 sheep | 400,000 steps | ret(last 50)=-0.46 sr=8%]
... [trial 21 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 21 | 2 sheep | 459,608 steps | ret(last 33)=-7.10 sr=0%]
... [trial 21 | 2 sheep | 509,608 steps | ret(last 50)=-8.26 sr=0%]
... [trial 21 | 2 sheep | 559,608 steps | ret(last 50)=-6.17 sr=4%]
... [trial 21 | 2 sheep | 609,608 steps | ret(last 50)=-4.23 sr=4%]
... [trial 21 | 2 sheep | 659,608 steps | ret(last 50)=-5.62 sr=0%]
... [trial 21 | 2 sheep | 709,608 steps | ret(last 50)=-3.72 sr=0%]
... [trial 21 | 2 sheep | 759,608 steps | ret(last 50)=-2.06 sr=0%]
... [trial 21 | 2 sheep | 809,608 steps | ret(last 50)=-1.23 sr=0%]
... [trial 21 | 2 sheep | 859,608 steps | ret(last 50)=-0.14 sr=0%]
... [trial 21 | 2 sheep | 909,608 steps | ret(last 50)=+1.30 sr=2%]
... [trial 21 | 2 sheep | 959,608 steps | ret(last 50)=+0.64 sr=2%]
... [trial 21 | 2 sheep | 1,009,608 steps | ret(last 50)=+2.62 sr=6%]
... [trial 21 | eval n=1]
... [trial 21 | eval n=2]
... [trial 21 | eval n=3]
→ score=0.050 sr1=0.00 sr2=0.10 sr3=0.00 [310s]
[Trial 22] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 100.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.005}
... [trial 22 | 1 sheep | 50,000 steps | ret(last 32)=-11.10 sr=6%]
... [trial 22 | 1 sheep | 100,000 steps | ret(last 50)=-10.61 sr=8%]
... [trial 22 | 1 sheep | 150,000 steps | ret(last 50)=-11.16 sr=4%]
... [trial 22 | 1 sheep | 200,000 steps | ret(last 50)=-11.15 sr=4%]
... [trial 22 | 1 sheep | 250,000 steps | ret(last 50)=-10.56 sr=6%]
... [trial 22 | 1 sheep | 300,000 steps | ret(last 50)=-14.90 sr=0%]
... [trial 22 | 1 sheep | 350,000 steps | ret(last 50)=-5.11 sr=14%]
... [trial 22 | 1 sheep | 400,000 steps | ret(last 50)=+2.22 sr=24%]
... [trial 22 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 22 | 2 sheep | 459,608 steps | ret(last 35)=-4.69 sr=6%]
... [trial 22 | 2 sheep | 509,608 steps | ret(last 50)=-3.17 sr=0%]
... [trial 22 | 2 sheep | 559,608 steps | ret(last 50)=+2.18 sr=2%]
... [trial 22 | 2 sheep | 609,608 steps | ret(last 50)=+4.53 sr=8%]
... [trial 22 | 2 sheep | 659,608 steps | ret(last 50)=+4.97 sr=10%]
... [trial 22 | 2 sheep | 709,608 steps | ret(last 50)=+5.06 sr=8%]
... [trial 22 | 2 sheep | 759,608 steps | ret(last 50)=+6.04 sr=4%]
... [trial 22 | 2 sheep | 809,608 steps | ret(last 50)=+5.95 sr=4%]
... [trial 22 | 2 sheep | 859,608 steps | ret(last 50)=+3.34 sr=2%]
... [trial 22 | 2 sheep | 909,608 steps | ret(last 50)=+6.80 sr=8%]
... [trial 22 | 2 sheep | 959,608 steps | ret(last 50)=+4.13 sr=8%]
... [trial 22 | 2 sheep | 1,009,608 steps | ret(last 50)=+4.17 sr=2%]
... [trial 22 | eval n=1]
... [trial 22 | eval n=2]
... [trial 22 | eval n=3]
→ score=0.110 sr1=0.30 sr2=0.10 sr3=0.00 [316s]
[Trial 23] {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.1, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 100.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ent_coef': 0.05}
... [trial 23 | 1 sheep | 50,000 steps | ret(last 32)=-22.59 sr=9%]
... [trial 23 | 1 sheep | 100,000 steps | ret(last 50)=-21.14 sr=6%]
... [trial 23 | 1 sheep | 150,000 steps | ret(last 50)=-20.75 sr=6%]
... [trial 23 | 1 sheep | 200,000 steps | ret(last 50)=-20.37 sr=8%]
... [trial 23 | 1 sheep | 250,000 steps | ret(last 50)=-5.04 sr=18%]
... [trial 23 | 1 sheep | 300,000 steps | ret(last 50)=+7.25 sr=12%]
... [trial 23 | 1 sheep | 350,000 steps | ret(last 50)=+11.34 sr=32%]
... [trial 23 | 1 sheep | 400,000 steps | ret(last 50)=+13.02 sr=24%]
... [trial 23 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 23 | 2 sheep | 459,608 steps | ret(last 32)=+0.29 sr=3%]
... [trial 23 | 2 sheep | 509,608 steps | ret(last 50)=-0.39 sr=4%]
... [trial 23 | 2 sheep | 559,608 steps | ret(last 50)=+6.56 sr=2%]
... [trial 23 | 2 sheep | 609,608 steps | ret(last 50)=+10.45 sr=2%]
... [trial 23 | 2 sheep | 659,608 steps | ret(last 50)=+9.75 sr=2%]
... [trial 23 | 2 sheep | 709,608 steps | ret(last 50)=+7.98 sr=6%]
... [trial 23 | 2 sheep | 759,608 steps | ret(last 50)=+9.20 sr=4%]
... [trial 23 | 2 sheep | 809,608 steps | ret(last 50)=+11.03 sr=6%]
... [trial 23 | 2 sheep | 859,608 steps | ret(last 50)=+12.53 sr=6%]
... [trial 23 | 2 sheep | 909,608 steps | ret(last 50)=+10.86 sr=6%]
... [trial 23 | 2 sheep | 959,608 steps | ret(last 50)=+13.16 sr=14%]
... [trial 23 | 2 sheep | 1,009,608 steps | ret(last 50)=+12.36 sr=12%]
... [trial 23 | eval n=1]
... [trial 23 | eval n=2]
... [trial 23 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [472s]
[Trial 24] {'W_PER_SHEEP': 6.0, 'W_ALIGN': 0.025, 'W_PEN_BONUS': 20.0, 'W_STEP_COST': 0.005, 'W_COMPLETE': 200.0, 'W_COMPACT': 3.0, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.01}
... [trial 24 | 1 sheep | 50,000 steps | ret(last 32)=-1.97 sr=0%]
... [trial 24 | 1 sheep | 100,000 steps | ret(last 50)=-1.86 sr=2%]
... [trial 24 | 1 sheep | 150,000 steps | ret(last 50)=-2.97 sr=4%]
... [trial 24 | 1 sheep | 200,000 steps | ret(last 50)=-0.45 sr=8%]
... [trial 24 | 1 sheep | 250,000 steps | ret(last 50)=-1.73 sr=4%]
... [trial 24 | 1 sheep | 300,000 steps | ret(last 50)=+0.64 sr=4%]
... [trial 24 | 1 sheep | 350,000 steps | ret(last 50)=+1.35 sr=2%]
... [trial 24 | 1 sheep | 400,000 steps | ret(last 50)=+0.95 sr=4%]
... [trial 24 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 24 | 2 sheep | 459,608 steps | ret(last 33)=+1.34 sr=0%]
... [trial 24 | 2 sheep | 509,608 steps | ret(last 50)=+1.48 sr=0%]
... [trial 24 | 2 sheep | 559,608 steps | ret(last 50)=+6.05 sr=0%]
... [trial 24 | 2 sheep | 609,608 steps | ret(last 50)=+3.58 sr=0%]
... [trial 24 | 2 sheep | 659,608 steps | ret(last 50)=+2.33 sr=0%]
... [trial 24 | 2 sheep | 709,608 steps | ret(last 50)=+4.05 sr=2%]
... [trial 24 | 2 sheep | 759,608 steps | ret(last 50)=+0.93 sr=0%]
... [trial 24 | 2 sheep | 809,608 steps | ret(last 50)=-0.39 sr=0%]
... [trial 24 | 2 sheep | 859,608 steps | ret(last 50)=-2.68 sr=0%]
... [trial 24 | 2 sheep | 909,608 steps | ret(last 50)=+0.90 sr=0%]
... [trial 24 | 2 sheep | 959,608 steps | ret(last 50)=+2.63 sr=0%]
... [trial 24 | 2 sheep | 1,009,608 steps | ret(last 50)=+2.88 sr=0%]
... [trial 24 | eval n=1]
... [trial 24 | eval n=2]
... [trial 24 | eval n=3]
→ score=0.060 sr1=0.30 sr2=0.00 sr3=0.00 [335s]
[Trial 25] {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 10.0, 'W_STEP_COST': 0.05, 'W_COMPLETE': 50.0, 'W_COMPACT': 0.5, 'ALIGN_SHAPE': 'near', 'ALIGN_GATED': True, 'ent_coef': 0.02}
... [trial 25 | 1 sheep | 50,000 steps | ret(last 32)=-56.03 sr=3%]
... [trial 25 | 1 sheep | 100,000 steps | ret(last 50)=-53.61 sr=4%]
... [trial 25 | 1 sheep | 150,000 steps | ret(last 50)=-54.50 sr=4%]
... [trial 25 | 1 sheep | 200,000 steps | ret(last 50)=-57.55 sr=4%]
... [trial 25 | 1 sheep | 250,000 steps | ret(last 50)=-54.77 sr=8%]
... [trial 25 | 1 sheep | 300,000 steps | ret(last 50)=-55.53 sr=4%]
... [trial 25 | 1 sheep | 350,000 steps | ret(last 50)=-55.26 sr=4%]
... [trial 25 | 1 sheep | 400,000 steps | ret(last 50)=-56.11 sr=4%]
... [trial 25 | 2 sheep | 409,608 steps | ret(last 0)=+nan sr=nan%]
... [trial 25 | 2 sheep | 459,608 steps | ret(last 32)=-48.36 sr=0%]
... [trial 25 | 2 sheep | 509,608 steps | ret(last 50)=-54.87 sr=0%]
... [trial 25 | 2 sheep | 559,608 steps | ret(last 50)=-56.08 sr=0%]
... [trial 25 | 2 sheep | 609,608 steps | ret(last 50)=-54.86 sr=0%]
... [trial 25 | 2 sheep | 659,608 steps | ret(last 50)=-50.62 sr=0%]
... [trial 25 | 2 sheep | 709,608 steps | ret(last 50)=-49.92 sr=0%]
... [trial 25 | 2 sheep | 759,608 steps | ret(last 50)=-50.11 sr=0%]
... [trial 25 | 2 sheep | 809,608 steps | ret(last 50)=-51.41 sr=0%]
... [trial 25 | 2 sheep | 859,608 steps | ret(last 50)=-51.02 sr=0%]
... [trial 25 | 2 sheep | 909,608 steps | ret(last 50)=-50.80 sr=0%]
... [trial 25 | 2 sheep | 959,608 steps | ret(last 50)=-50.01 sr=0%]
... [trial 25 | 2 sheep | 1,009,608 steps | ret(last 50)=-49.71 sr=0%]
... [trial 25 | eval n=1]
... [trial 25 | eval n=2]
... [trial 25 | eval n=3]
→ score=0.000 sr1=0.00 sr2=0.00 sr3=0.00 [306s]
============================================================================================
LEADERBOARD
============================================================================================
rank score sr1 sr2 sr3 config
----------------------------------------------------------------------------------------
1 0.350 1.00 0.30 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.0 W_PEN_BONUS=5.0 W_STEP_COST=0.02 W_COMPLETE=200.0 W_COMPACT=1.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.02
2 0.270 0.70 0.20 0.10 W_PER_SHEEP=6.0 W_ALIGN=0.025 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=50.0 W_COMPACT=3.0 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.01
3 0.240 0.70 0.20 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=5.0 W_STEP_COST=0.05 W_COMPLETE=50.0 W_COMPACT=3.0 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.02
4 0.240 0.70 0.20 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.1 W_PEN_BONUS=5.0 W_STEP_COST=0.005 W_COMPLETE=200.0 W_COMPACT=0.0 ALIGN_SHAPE=near ALIGN_GATED=True ent_coef=0.05
5 0.200 1.00 0.00 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.1 W_PEN_BONUS=5.0 W_STEP_COST=0.02 W_COMPLETE=50.0 W_COMPACT=3.0 ALIGN_SHAPE=near ALIGN_GATED=True ent_coef=0.005
6 0.190 0.70 0.10 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.0 W_PEN_BONUS=20.0 W_STEP_COST=0.02 W_COMPLETE=50.0 W_COMPACT=0.0 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.05
7 0.160 0.80 0.00 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.025 W_PEN_BONUS=20.0 W_STEP_COST=0.05 W_COMPLETE=200.0 W_COMPACT=3.0 ALIGN_SHAPE=standoff ALIGN_GATED=True ent_coef=0.01
8 0.160 0.80 0.00 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.1 W_PEN_BONUS=20.0 W_STEP_COST=0.02 W_COMPLETE=200.0 W_COMPACT=0.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.02
9 0.160 0.80 0.00 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.025 W_PEN_BONUS=10.0 W_STEP_COST=0.05 W_COMPLETE=50.0 W_COMPACT=0.0 ALIGN_SHAPE=standoff ALIGN_GATED=True ent_coef=0.005
10 0.160 0.30 0.20 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.025 W_PEN_BONUS=10.0 W_STEP_COST=0.005 W_COMPLETE=100.0 W_COMPACT=1.5 ALIGN_SHAPE=near ALIGN_GATED=False ent_coef=0.02
11 0.150 0.50 0.10 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=100.0 W_COMPACT=3.0 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.005
12 0.140 0.70 0.00 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=100.0 W_COMPACT=1.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.05
13 0.130 0.40 0.10 0.00 W_PER_SHEEP=1.0 W_ALIGN=0.1 W_PEN_BONUS=20.0 W_STEP_COST=0.05 W_COMPLETE=100.0 W_COMPACT=1.5 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.02
14 0.110 0.30 0.10 0.00 W_PER_SHEEP=6.0 W_ALIGN=0.025 W_PEN_BONUS=5.0 W_STEP_COST=0.05 W_COMPLETE=100.0 W_COMPACT=0.0 ALIGN_SHAPE=standoff ALIGN_GATED=False ent_coef=0.005
15 0.110 0.30 0.10 0.00 W_PER_SHEEP=2.0 W_ALIGN=0.05 W_PEN_BONUS=10.0 W_STEP_COST=0.02 W_COMPLETE=100.0 W_COMPACT=3.0 ALIGN_SHAPE=standoff ALIGN_GATED=True ent_coef=0.005
Best config saved to runs/sweep_20260425_124630/best.json
Total trials: 25 (25 successful, 0 failed)
Total time: 2.28h