diff --git a/training/runs/final_v2.log b/training/runs/final_v2.log new file mode 100644 index 0000000..39cf38e --- /dev/null +++ b/training/runs/final_v2.log @@ -0,0 +1,219 @@ +Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02} +Run dir: runs/final_v2 +Curriculum: 1 → 10 sheep, 1,500,000 steps/stage + +[Stage n_sheep=1] training 1,500,000 steps + ... [trial 1 | 1 sheep | 100,000 steps | ret(last 41)=-38.49 win_sr=10% cum_sr=10%] + ... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-32.87 win_sr=8% cum_sr=9%] + ... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-33.60 win_sr=4% cum_sr=7%] + ... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-34.78 win_sr=8% cum_sr=7%] + ... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=-31.25 win_sr=12% cum_sr=8%] + ... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=-32.87 win_sr=2% cum_sr=7%] + ... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=-33.25 win_sr=6% cum_sr=7%] + ... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=-27.80 win_sr=16% cum_sr=8%] + ... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=-27.44 win_sr=14% cum_sr=9%] + ... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=-30.52 win_sr=6% cum_sr=9%] + ... [trial 1 | 1 sheep | 1,100,000 steps | ret(last 50)=-24.75 win_sr=20% cum_sr=10%] + ... [trial 1 | 1 sheep | 1,200,000 steps | ret(last 50)=-29.94 win_sr=4% cum_sr=10%] + ... [trial 1 | 1 sheep | 1,300,000 steps | ret(last 50)=-22.72 win_sr=22% cum_sr=11%] + ... [trial 1 | 1 sheep | 1,400,000 steps | ret(last 50)=-9.84 win_sr=46% cum_sr=14%] + ... [trial 1 | 1 sheep | 1,500,000 steps | ret(last 50)=+10.01 win_sr=96% cum_sr=24%] +[Stage n_sheep=1] evaluating 30 eps +[Stage n_sheep=1] sr=97% mean_len=351 mean_min_pen=3.9m mean_act=0.28 + +[Stage n_sheep=2] training 1,500,000 steps + ... [trial 1 | 2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 2 sheep | 1,607,336 steps | ret(last 43)=-4.11 win_sr=33% cum_sr=33%] + ... [trial 1 | 2 sheep | 1,707,336 steps | ret(last 50)=-0.34 win_sr=36% cum_sr=34%] + ... [trial 1 | 2 sheep | 1,807,336 steps | ret(last 50)=+14.73 win_sr=92% cum_sr=62%] + ... [trial 1 | 2 sheep | 1,907,336 steps | ret(last 50)=+17.38 win_sr=100% cum_sr=76%] + ... [trial 1 | 2 sheep | 2,007,336 steps | ret(last 50)=+16.80 win_sr=100% cum_sr=83%] + ... [trial 1 | 2 sheep | 2,107,336 steps | ret(last 50)=+15.67 win_sr=100% cum_sr=87%] + ... [trial 1 | 2 sheep | 2,207,336 steps | ret(last 50)=+15.39 win_sr=100% cum_sr=90%] + ... [trial 1 | 2 sheep | 2,307,336 steps | ret(last 50)=+15.58 win_sr=100% cum_sr=92%] + ... [trial 1 | 2 sheep | 2,407,336 steps | ret(last 50)=+15.01 win_sr=100% cum_sr=93%] + ... [trial 1 | 2 sheep | 2,507,336 steps | ret(last 50)=+15.50 win_sr=100% cum_sr=94%] + ... [trial 1 | 2 sheep | 2,607,336 steps | ret(last 50)=+15.21 win_sr=100% cum_sr=95%] + ... [trial 1 | 2 sheep | 2,707,336 steps | ret(last 50)=+15.22 win_sr=100% cum_sr=95%] + ... [trial 1 | 2 sheep | 2,807,336 steps | ret(last 50)=+15.05 win_sr=100% cum_sr=96%] + ... [trial 1 | 2 sheep | 2,907,336 steps | ret(last 50)=+14.37 win_sr=100% cum_sr=96%] + ... [trial 1 | 2 sheep | 3,007,336 steps | ret(last 50)=+14.70 win_sr=100% cum_sr=97%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=100% mean_len=421 mean_min_pen=3.5m mean_act=1.01 + +[Stage n_sheep=3] training 1,500,000 steps + ... [trial 1 | 3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 3 sheep | 3,114,664 steps | ret(last 50)=+16.52 win_sr=100% cum_sr=99%] + ... [trial 1 | 3 sheep | 3,214,664 steps | ret(last 50)=+16.74 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 3,314,664 steps | ret(last 50)=+17.09 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 3,414,664 steps | ret(last 50)=+16.90 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 3,514,664 steps | ret(last 50)=+16.97 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 3,614,664 steps | ret(last 50)=+17.20 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 3,714,664 steps | ret(last 50)=+17.09 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 3,814,664 steps | ret(last 50)=+17.12 win_sr=98% cum_sr=100%] + ... [trial 1 | 3 sheep | 3,914,664 steps | ret(last 50)=+17.17 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 4,014,664 steps | ret(last 50)=+16.25 win_sr=98% cum_sr=100%] + ... [trial 1 | 3 sheep | 4,114,664 steps | ret(last 50)=+17.04 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 4,214,664 steps | ret(last 50)=+16.31 win_sr=98% cum_sr=100%] + ... [trial 1 | 3 sheep | 4,314,664 steps | ret(last 50)=+16.82 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 4,414,664 steps | ret(last 50)=+16.49 win_sr=100% cum_sr=100%] + ... [trial 1 | 3 sheep | 4,514,664 steps | ret(last 50)=+16.54 win_sr=100% cum_sr=100%] +[Stage n_sheep=3] evaluating 30 eps +[Stage n_sheep=3] sr=100% mean_len=608 mean_min_pen=3.5m mean_act=1.06 + +[Stage n_sheep=4] training 1,500,000 steps + ... [trial 1 | 4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 4 sheep | 4,621,992 steps | ret(last 50)=+18.55 win_sr=98% cum_sr=94%] + ... [trial 1 | 4 sheep | 4,721,992 steps | ret(last 50)=+19.17 win_sr=100% cum_sr=97%] + ... [trial 1 | 4 sheep | 4,821,992 steps | ret(last 50)=+18.64 win_sr=100% cum_sr=98%] + ... [trial 1 | 4 sheep | 4,921,992 steps | ret(last 50)=+19.06 win_sr=100% cum_sr=99%] + ... [trial 1 | 4 sheep | 5,021,992 steps | ret(last 50)=+19.01 win_sr=100% cum_sr=99%] + ... [trial 1 | 4 sheep | 5,121,992 steps | ret(last 50)=+19.23 win_sr=100% cum_sr=99%] + ... [trial 1 | 4 sheep | 5,221,992 steps | ret(last 50)=+18.71 win_sr=100% cum_sr=99%] + ... [trial 1 | 4 sheep | 5,321,992 steps | ret(last 50)=+18.81 win_sr=100% cum_sr=99%] + ... [trial 1 | 4 sheep | 5,421,992 steps | ret(last 50)=+19.51 win_sr=100% cum_sr=99%] + ... [trial 1 | 4 sheep | 5,521,992 steps | ret(last 50)=+19.01 win_sr=100% cum_sr=100%] + ... [trial 1 | 4 sheep | 5,621,992 steps | ret(last 50)=+19.21 win_sr=100% cum_sr=100%] + ... [trial 1 | 4 sheep | 5,721,992 steps | ret(last 50)=+18.62 win_sr=100% cum_sr=100%] + ... [trial 1 | 4 sheep | 5,821,992 steps | ret(last 50)=+18.57 win_sr=100% cum_sr=100%] + ... [trial 1 | 4 sheep | 5,921,992 steps | ret(last 50)=+19.22 win_sr=100% cum_sr=100%] + ... [trial 1 | 4 sheep | 6,021,992 steps | ret(last 50)=+18.73 win_sr=100% cum_sr=100%] +[Stage n_sheep=4] evaluating 30 eps +[Stage n_sheep=4] sr=100% mean_len=874 mean_min_pen=3.3m mean_act=1.23 + +[Stage n_sheep=5] training 1,500,000 steps + ... [trial 1 | 5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 5 sheep | 6,129,320 steps | ret(last 50)=+22.70 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,229,320 steps | ret(last 50)=+20.82 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,329,320 steps | ret(last 50)=+20.84 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,429,320 steps | ret(last 50)=+21.70 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,529,320 steps | ret(last 50)=+21.25 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,629,320 steps | ret(last 50)=+20.61 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,729,320 steps | ret(last 50)=+21.10 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,829,320 steps | ret(last 50)=+21.42 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 6,929,320 steps | ret(last 50)=+21.39 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 7,029,320 steps | ret(last 50)=+20.80 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 7,129,320 steps | ret(last 50)=+21.19 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 7,229,320 steps | ret(last 50)=+20.92 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 7,329,320 steps | ret(last 50)=+20.97 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 7,429,320 steps | ret(last 50)=+20.48 win_sr=100% cum_sr=100%] + ... [trial 1 | 5 sheep | 7,529,320 steps | ret(last 50)=+21.36 win_sr=100% cum_sr=100%] +[Stage n_sheep=5] evaluating 30 eps +[Stage n_sheep=5] sr=97% mean_len=945 mean_min_pen=3.4m mean_act=1.33 + +[Stage n_sheep=6] training 1,500,000 steps + ... [trial 1 | 6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 6 sheep | 7,636,648 steps | ret(last 50)=+22.41 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 7,736,648 steps | ret(last 50)=+23.84 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 7,836,648 steps | ret(last 50)=+22.95 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 7,936,648 steps | ret(last 50)=+23.97 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,036,648 steps | ret(last 50)=+24.02 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,136,648 steps | ret(last 50)=+23.42 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,236,648 steps | ret(last 50)=+24.15 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,336,648 steps | ret(last 50)=+23.32 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,436,648 steps | ret(last 50)=+23.46 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,536,648 steps | ret(last 50)=+23.80 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,636,648 steps | ret(last 50)=+24.41 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,736,648 steps | ret(last 50)=+23.86 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,836,648 steps | ret(last 50)=+23.57 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 8,936,648 steps | ret(last 50)=+23.74 win_sr=100% cum_sr=100%] + ... [trial 1 | 6 sheep | 9,036,648 steps | ret(last 50)=+22.87 win_sr=100% cum_sr=100%] +[Stage n_sheep=6] evaluating 30 eps +[Stage n_sheep=6] sr=100% mean_len=1162 mean_min_pen=3.1m mean_act=1.36 + +[Stage n_sheep=7] training 1,500,000 steps + ... [trial 1 | 7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 7 sheep | 9,143,976 steps | ret(last 50)=+24.46 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,243,976 steps | ret(last 50)=+25.47 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,343,976 steps | ret(last 50)=+25.10 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,443,976 steps | ret(last 50)=+24.85 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,543,976 steps | ret(last 50)=+26.01 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,643,976 steps | ret(last 50)=+26.26 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,743,976 steps | ret(last 50)=+26.44 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,843,976 steps | ret(last 50)=+26.08 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 9,943,976 steps | ret(last 50)=+25.00 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 10,043,976 steps | ret(last 50)=+26.22 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 10,143,976 steps | ret(last 50)=+24.79 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 10,243,976 steps | ret(last 50)=+26.33 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 10,343,976 steps | ret(last 50)=+26.36 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 10,443,976 steps | ret(last 50)=+25.68 win_sr=100% cum_sr=100%] + ... [trial 1 | 7 sheep | 10,543,976 steps | ret(last 50)=+26.75 win_sr=100% cum_sr=100%] +[Stage n_sheep=7] evaluating 30 eps +[Stage n_sheep=7] sr=100% mean_len=1253 mean_min_pen=2.7m mean_act=1.38 + +[Stage n_sheep=8] training 1,500,000 steps + ... [trial 1 | 8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 8 sheep | 10,651,304 steps | ret(last 50)=+28.19 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 10,751,304 steps | ret(last 50)=+28.80 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 10,851,304 steps | ret(last 50)=+27.81 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 10,951,304 steps | ret(last 50)=+27.31 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,051,304 steps | ret(last 50)=+27.67 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,151,304 steps | ret(last 50)=+27.14 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,251,304 steps | ret(last 50)=+29.60 win_sr=98% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,351,304 steps | ret(last 50)=+28.81 win_sr=98% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,451,304 steps | ret(last 50)=+27.76 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,551,304 steps | ret(last 50)=+27.28 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,651,304 steps | ret(last 50)=+29.04 win_sr=98% cum_sr=99%] + ... [trial 1 | 8 sheep | 11,751,304 steps | ret(last 50)=+28.75 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,851,304 steps | ret(last 50)=+29.04 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 11,951,304 steps | ret(last 50)=+28.27 win_sr=100% cum_sr=100%] + ... [trial 1 | 8 sheep | 12,051,304 steps | ret(last 50)=+27.90 win_sr=100% cum_sr=100%] +[Stage n_sheep=8] evaluating 30 eps +[Stage n_sheep=8] sr=93% mean_len=1495 mean_min_pen=2.6m mean_act=1.39 + +[Stage n_sheep=9] training 1,500,000 steps + ... [trial 1 | 9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 9 sheep | 12,158,632 steps | ret(last 50)=+30.67 win_sr=98% cum_sr=98%] + ... [trial 1 | 9 sheep | 12,258,632 steps | ret(last 50)=+28.78 win_sr=100% cum_sr=99%] + ... [trial 1 | 9 sheep | 12,358,632 steps | ret(last 50)=+30.08 win_sr=100% cum_sr=99%] + ... [trial 1 | 9 sheep | 12,458,632 steps | ret(last 50)=+29.61 win_sr=100% cum_sr=99%] + ... [trial 1 | 9 sheep | 12,558,632 steps | ret(last 50)=+30.34 win_sr=98% cum_sr=99%] + ... [trial 1 | 9 sheep | 12,658,632 steps | ret(last 50)=+29.48 win_sr=98% cum_sr=99%] + ... [trial 1 | 9 sheep | 12,758,632 steps | ret(last 50)=+29.92 win_sr=98% cum_sr=99%] + ... [trial 1 | 9 sheep | 12,858,632 steps | ret(last 50)=+29.26 win_sr=100% cum_sr=99%] + ... [trial 1 | 9 sheep | 12,958,632 steps | ret(last 50)=+30.36 win_sr=96% cum_sr=98%] + ... [trial 1 | 9 sheep | 13,058,632 steps | ret(last 50)=+30.19 win_sr=100% cum_sr=98%] + ... [trial 1 | 9 sheep | 13,158,632 steps | ret(last 50)=+29.24 win_sr=100% cum_sr=99%] + ... [trial 1 | 9 sheep | 13,258,632 steps | ret(last 50)=+30.40 win_sr=100% cum_sr=99%] + ... [trial 1 | 9 sheep | 13,358,632 steps | ret(last 50)=+31.65 win_sr=100% cum_sr=99%] + ... [trial 1 | 9 sheep | 13,458,632 steps | ret(last 50)=+30.77 win_sr=98% cum_sr=99%] + ... [trial 1 | 9 sheep | 13,558,632 steps | ret(last 50)=+30.21 win_sr=94% cum_sr=98%] +[Stage n_sheep=9] evaluating 30 eps +[Stage n_sheep=9] sr=97% mean_len=1625 mean_min_pen=2.1m mean_act=1.39 + +[Stage n_sheep=10] training 1,500,000 steps + ... [trial 1 | 10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [trial 1 | 10 sheep | 13,665,960 steps | ret(last 50)=+30.13 win_sr=90% cum_sr=92%] + ... [trial 1 | 10 sheep | 13,765,960 steps | ret(last 50)=+31.84 win_sr=96% cum_sr=92%] + ... [trial 1 | 10 sheep | 13,865,960 steps | ret(last 50)=+32.66 win_sr=88% cum_sr=91%] + ... [trial 1 | 10 sheep | 13,965,960 steps | ret(last 50)=+32.56 win_sr=90% cum_sr=91%] + ... [trial 1 | 10 sheep | 14,065,960 steps | ret(last 50)=+31.29 win_sr=98% cum_sr=93%] + ... [trial 1 | 10 sheep | 14,165,960 steps | ret(last 50)=+32.72 win_sr=94% cum_sr=93%] + ... [trial 1 | 10 sheep | 14,265,960 steps | ret(last 50)=+32.42 win_sr=96% cum_sr=93%] + ... [trial 1 | 10 sheep | 14,365,960 steps | ret(last 50)=+33.96 win_sr=92% cum_sr=93%] + ... [trial 1 | 10 sheep | 14,465,960 steps | ret(last 50)=+33.17 win_sr=98% cum_sr=94%] + ... [trial 1 | 10 sheep | 14,565,960 steps | ret(last 50)=+31.48 win_sr=96% cum_sr=94%] + ... [trial 1 | 10 sheep | 14,665,960 steps | ret(last 50)=+31.19 win_sr=90% cum_sr=94%] + ... [trial 1 | 10 sheep | 14,765,960 steps | ret(last 50)=+32.87 win_sr=98% cum_sr=94%] + ... [trial 1 | 10 sheep | 14,865,960 steps | ret(last 50)=+32.36 win_sr=94% cum_sr=94%] + ... [trial 1 | 10 sheep | 14,965,960 steps | ret(last 50)=+31.14 win_sr=94% cum_sr=94%] + ... [trial 1 | 10 sheep | 15,065,960 steps | ret(last 50)=+32.18 win_sr=96% cum_sr=94%] +[Stage n_sheep=10] evaluating 30 eps +[Stage n_sheep=10] sr=97% mean_len=1816 mean_min_pen=2.0m mean_act=1.39 + +============================================================ + REPLAY SUMMARY +============================================================ + n_sheep=1 sr= 97% len= 351 min_pen= 3.9m act=0.28 + n_sheep=2 sr=100% len= 421 min_pen= 3.5m act=1.01 + n_sheep=3 sr=100% len= 608 min_pen= 3.5m act=1.06 + n_sheep=4 sr=100% len= 874 min_pen= 3.3m act=1.23 + n_sheep=5 sr= 97% len= 945 min_pen= 3.4m act=1.33 + n_sheep=6 sr=100% len= 1162 min_pen= 3.1m act=1.36 + n_sheep=7 sr=100% len= 1253 min_pen= 2.7m act=1.38 + n_sheep=8 sr= 93% len= 1495 min_pen= 2.6m act=1.39 + n_sheep=9 sr= 97% len= 1625 min_pen= 2.1m act=1.39 + n_sheep=10 sr= 97% len= 1816 min_pen= 2.0m act=1.39 + + Total time: 90.3 min + Artefacts: runs/final_v2/ diff --git a/training/runs/final_v2/config.json b/training/runs/final_v2/config.json new file mode 100644 index 0000000..b2d15fe --- /dev/null +++ b/training/runs/final_v2/config.json @@ -0,0 +1,11 @@ +{ + "W_PER_SHEEP": 1.0, + "W_ALIGN": 0.0, + "W_PEN_BONUS": 5.0, + "W_STEP_COST": 0.02, + "W_COMPLETE": 200.0, + "W_COMPACT": 1.5, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": false, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/final_v2/final_model.zip b/training/runs/final_v2/final_model.zip new file mode 100644 index 0000000..41dc86d Binary files /dev/null and b/training/runs/final_v2/final_model.zip differ diff --git a/training/runs/final_v2/stage_results.json b/training/runs/final_v2/stage_results.json new file mode 100644 index 0000000..a8f3266 --- /dev/null +++ b/training/runs/final_v2/stage_results.json @@ -0,0 +1,72 @@ +[ + { + "n_sheep": 1, + "sr": 0.9666666666666667, + "mean_len": 350.96666666666664, + "mean_min_pen": 3.913520161310832, + "mean_act": 0.2797267940386975 + }, + { + "n_sheep": 2, + "sr": 1.0, + "mean_len": 421.46666666666664, + "mean_min_pen": 3.485754116376241, + "mean_act": 1.0053067604365706 + }, + { + "n_sheep": 3, + "sr": 1.0, + "mean_len": 608.5, + "mean_min_pen": 3.52824010848999, + "mean_act": 1.0576287743527575 + }, + { + "n_sheep": 4, + "sr": 1.0, + "mean_len": 874.1333333333333, + "mean_min_pen": 3.2648465514183043, + "mean_act": 1.2302308682249101 + }, + { + "n_sheep": 5, + "sr": 0.9666666666666667, + "mean_len": 945.1333333333333, + "mean_min_pen": 3.390091093381246, + "mean_act": 1.328577256075333 + }, + { + "n_sheep": 6, + "sr": 1.0, + "mean_len": 1162.1, + "mean_min_pen": 3.0996540347735086, + "mean_act": 1.3581346810990618 + }, + { + "n_sheep": 7, + "sr": 1.0, + "mean_len": 1252.6, + "mean_min_pen": 2.6753984689712524, + "mean_act": 1.3753795162019462 + }, + { + "n_sheep": 8, + "sr": 0.9333333333333333, + "mean_len": 1495.2333333333333, + "mean_min_pen": 2.560386610031128, + "mean_act": 1.3861974064434042 + }, + { + "n_sheep": 9, + "sr": 0.9666666666666667, + "mean_len": 1624.9, + "mean_min_pen": 2.130835851033529, + "mean_act": 1.387693840600181 + }, + { + "n_sheep": 10, + "sr": 0.9666666666666667, + "mean_len": 1816.5, + "mean_min_pen": 1.9940622925758362, + "mean_act": 1.3946097864970635 + } +] \ No newline at end of file diff --git a/training/runs/final_v2/vecnorm.pkl b/training/runs/final_v2/vecnorm.pkl new file mode 100644 index 0000000..44319c8 Binary files /dev/null and b/training/runs/final_v2/vecnorm.pkl differ