diff --git a/training/runs/v3.log b/training/runs/v3.log new file mode 100644 index 0000000..568177a --- /dev/null +++ b/training/runs/v3.log @@ -0,0 +1,242 @@ +Config loaded from config.json +Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_SOUTH': 0.01, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02} +Run dir: runs/v3 +Curriculum: 1 → 10 sheep, 1,500,000 steps/stage + + +[Stage n_sheep=1] training 1,500,000 steps + ... [1 sheep | 100,000 steps | ret(last 24)=-47.74 win_sr=12% cum_sr=12%] + ... [1 sheep | 200,000 steps | ret(last 50)=-40.77 win_sr=14% cum_sr=16%] + ... [1 sheep | 300,000 steps | ret(last 50)=-36.39 win_sr=16% cum_sr=16%] + ... [1 sheep | 400,000 steps | ret(last 50)=-40.04 win_sr=14% cum_sr=15%] + ... [1 sheep | 500,000 steps | ret(last 50)=+7.09 win_sr=80% cum_sr=36%] + ... [1 sheep | 600,000 steps | ret(last 50)=+15.87 win_sr=100% cum_sr=71%] + ... [1 sheep | 700,000 steps | ret(last 50)=+14.78 win_sr=100% cum_sr=84%] + ... [1 sheep | 800,000 steps | ret(last 50)=+14.04 win_sr=100% cum_sr=90%] + ... [1 sheep | 900,000 steps | ret(last 50)=+14.08 win_sr=100% cum_sr=92%] + ... [1 sheep | 1,000,000 steps | ret(last 50)=+13.33 win_sr=100% cum_sr=94%] + ... [1 sheep | 1,100,000 steps | ret(last 50)=+13.99 win_sr=100% cum_sr=95%] + ... [1 sheep | 1,200,000 steps | ret(last 50)=+13.38 win_sr=100% cum_sr=96%] + ... [1 sheep | 1,300,000 steps | ret(last 50)=+13.18 win_sr=100% cum_sr=96%] + ... [1 sheep | 1,400,000 steps | ret(last 50)=+13.53 win_sr=100% cum_sr=97%] + ... [1 sheep | 1,500,000 steps | ret(last 50)=+13.46 win_sr=100% cum_sr=97%] +[Stage n_sheep=1] evaluating 30 eps +[Stage n_sheep=1] sr=100% mean_len=264 mean_min_pen=3.7m mean_act=0.45 + failure modes: SUCCESS=30 + reward/step: progress=+0.1156 alignment=+0.0001 south=-0.0005 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0378 step_cost=-0.0200 complete=+0.3784 + +[Stage n_sheep=2] training 1,500,000 steps + ... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [2 sheep | 1,607,336 steps | ret(last 35)=-3.04 win_sr=49% cum_sr=49%] + ... [2 sheep | 1,707,336 steps | ret(last 50)=-11.13 win_sr=20% cum_sr=33%] + ... [2 sheep | 1,807,336 steps | ret(last 50)=-11.83 win_sr=18% cum_sr=31%] + ... [2 sheep | 1,907,336 steps | ret(last 50)=-8.76 win_sr=30% cum_sr=31%] + ... [2 sheep | 2,007,336 steps | ret(last 50)=-8.95 win_sr=30% cum_sr=30%] + ... [2 sheep | 2,107,336 steps | ret(last 50)=-9.06 win_sr=32% cum_sr=30%] + ... [2 sheep | 2,207,336 steps | ret(last 50)=-9.48 win_sr=32% cum_sr=30%] + ... [2 sheep | 2,307,336 steps | ret(last 50)=-1.70 win_sr=44% cum_sr=33%] + ... [2 sheep | 2,407,336 steps | ret(last 50)=+5.02 win_sr=64% cum_sr=38%] + ... [2 sheep | 2,507,336 steps | ret(last 50)=+13.32 win_sr=88% cum_sr=46%] + ... [2 sheep | 2,607,336 steps | ret(last 50)=+12.15 win_sr=90% cum_sr=54%] + ... [2 sheep | 2,707,336 steps | ret(last 50)=+17.13 win_sr=98% cum_sr=63%] + ... [2 sheep | 2,807,336 steps | ret(last 50)=+18.81 win_sr=98% cum_sr=69%] + ... [2 sheep | 2,907,336 steps | ret(last 50)=+16.23 win_sr=92% cum_sr=73%] + ... [2 sheep | 3,007,336 steps | ret(last 50)=+18.83 win_sr=100% cum_sr=76%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=77% mean_len=1398 mean_min_pen=3.3m mean_act=0.97 + failure modes: SUCCESS=23 PARTIAL_1of2=6 COMPACT_CANT_DRIVE=1 + reward/step: progress=+0.0401 alignment=+0.0045 south=-0.0039 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0126 step_cost=-0.0200 complete=+0.0549 + +[Stage n_sheep=3] training 1,500,000 steps + ... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [3 sheep | 3,114,664 steps | ret(last 50)=+13.79 win_sr=82% cum_sr=84%] + ... [3 sheep | 3,214,664 steps | ret(last 50)=+21.64 win_sr=96% cum_sr=88%] + ... [3 sheep | 3,314,664 steps | ret(last 50)=+23.45 win_sr=98% cum_sr=92%] + ... [3 sheep | 3,414,664 steps | ret(last 50)=+22.18 win_sr=98% cum_sr=94%] + ... [3 sheep | 3,514,664 steps | ret(last 50)=+24.83 win_sr=100% cum_sr=96%] + ... [3 sheep | 3,614,664 steps | ret(last 50)=+19.77 win_sr=94% cum_sr=96%] + ... [3 sheep | 3,714,664 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=96%] + ... [3 sheep | 3,814,664 steps | ret(last 50)=+25.24 win_sr=100% cum_sr=97%] + ... [3 sheep | 3,914,664 steps | ret(last 50)=+24.43 win_sr=100% cum_sr=97%] + ... [3 sheep | 4,014,664 steps | ret(last 50)=+24.59 win_sr=100% cum_sr=97%] + ... [3 sheep | 4,114,664 steps | ret(last 50)=+22.18 win_sr=98% cum_sr=98%] + ... [3 sheep | 4,214,664 steps | ret(last 50)=+23.11 win_sr=96% cum_sr=97%] + ... [3 sheep | 4,314,664 steps | ret(last 50)=+23.06 win_sr=98% cum_sr=97%] + ... [3 sheep | 4,414,664 steps | ret(last 50)=+23.35 win_sr=100% cum_sr=97%] + ... [3 sheep | 4,514,664 steps | ret(last 50)=+22.50 win_sr=100% cum_sr=98%] +[Stage n_sheep=3] evaluating 30 eps +[Stage n_sheep=3] sr=97% mean_len=1095 mean_min_pen=2.5m mean_act=0.95 + failure modes: SUCCESS=29 COMPACT_CANT_DRIVE=1 + reward/step: progress=+0.0821 alignment=+0.0113 south=-0.0087 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0265 step_cost=-0.0200 complete=+0.0883 + +[Stage n_sheep=4] training 1,500,000 steps + ... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [4 sheep | 4,621,992 steps | ret(last 50)=+22.17 win_sr=92% cum_sr=94%] + ... [4 sheep | 4,721,992 steps | ret(last 50)=+25.81 win_sr=94% cum_sr=93%] + ... [4 sheep | 4,821,992 steps | ret(last 50)=+21.80 win_sr=90% cum_sr=93%] + ... [4 sheep | 4,921,992 steps | ret(last 50)=+26.38 win_sr=98% cum_sr=94%] + ... [4 sheep | 5,021,992 steps | ret(last 50)=+26.65 win_sr=98% cum_sr=95%] + ... [4 sheep | 5,121,992 steps | ret(last 50)=+26.07 win_sr=98% cum_sr=95%] + ... [4 sheep | 5,221,992 steps | ret(last 50)=+27.08 win_sr=98% cum_sr=96%] + ... [4 sheep | 5,321,992 steps | ret(last 50)=+27.87 win_sr=100% cum_sr=96%] + ... [4 sheep | 5,421,992 steps | ret(last 50)=+27.53 win_sr=100% cum_sr=97%] + ... [4 sheep | 5,521,992 steps | ret(last 50)=+25.91 win_sr=100% cum_sr=97%] + ... [4 sheep | 5,621,992 steps | ret(last 50)=+27.75 win_sr=100% cum_sr=97%] + ... [4 sheep | 5,721,992 steps | ret(last 50)=+25.63 win_sr=100% cum_sr=97%] + ... [4 sheep | 5,821,992 steps | ret(last 50)=+24.43 win_sr=98% cum_sr=97%] + ... [4 sheep | 5,921,992 steps | ret(last 50)=+22.52 win_sr=94% cum_sr=97%] + ... [4 sheep | 6,021,992 steps | ret(last 50)=+27.28 win_sr=100% cum_sr=98%] +[Stage n_sheep=4] evaluating 30 eps +[Stage n_sheep=4] sr=57% mean_len=2572 mean_min_pen=2.2m mean_act=1.28 + failure modes: SUCCESS=17 PARTIAL_1of4=6 PARTIAL_2of4=5 DROVE_NO_SHEEP=1 NEVER_COMPACT=1 + reward/step: progress=+0.0455 alignment=+0.0040 south=-0.0454 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0109 step_cost=-0.0200 complete=+0.0220 + +[Stage n_sheep=5] training 1,500,000 steps + ... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [5 sheep | 6,129,320 steps | ret(last 50)=+28.06 win_sr=96% cum_sr=96%] + ... [5 sheep | 6,229,320 steps | ret(last 50)=+31.40 win_sr=98% cum_sr=96%] + ... [5 sheep | 6,329,320 steps | ret(last 50)=+27.81 win_sr=96% cum_sr=96%] + ... [5 sheep | 6,429,320 steps | ret(last 50)=+22.08 win_sr=88% cum_sr=95%] + ... [5 sheep | 6,529,320 steps | ret(last 50)=+26.99 win_sr=94% cum_sr=95%] + ... [5 sheep | 6,629,320 steps | ret(last 50)=+21.24 win_sr=86% cum_sr=93%] + ... [5 sheep | 6,729,320 steps | ret(last 50)=+24.58 win_sr=94% cum_sr=93%] + ... [5 sheep | 6,829,320 steps | ret(last 50)=+29.66 win_sr=96% cum_sr=93%] + ... [5 sheep | 6,929,320 steps | ret(last 50)=+27.53 win_sr=96% cum_sr=93%] + ... [5 sheep | 7,029,320 steps | ret(last 50)=+28.99 win_sr=100% cum_sr=94%] + ... [5 sheep | 7,129,320 steps | ret(last 50)=+27.59 win_sr=98% cum_sr=94%] + ... [5 sheep | 7,229,320 steps | ret(last 50)=+30.79 win_sr=100% cum_sr=95%] + ... [5 sheep | 7,329,320 steps | ret(last 50)=+30.56 win_sr=98% cum_sr=95%] + ... [5 sheep | 7,429,320 steps | ret(last 50)=+31.55 win_sr=100% cum_sr=95%] + ... [5 sheep | 7,529,320 steps | ret(last 50)=+29.95 win_sr=100% cum_sr=96%] +[Stage n_sheep=5] evaluating 30 eps +[Stage n_sheep=5] sr=0% mean_len=4000 mean_min_pen=1.7m mean_act=1.36 + failure modes: PARTIAL_4of5=17 PARTIAL_1of5=9 PARTIAL_3of5=2 PARTIAL_2of5=2 + reward/step: progress=+0.0396 alignment=+0.0034 south=-0.0393 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0073 step_cost=-0.0200 complete=+0.0000 + +[Stage n_sheep=6] training 1,500,000 steps + ... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [6 sheep | 7,636,648 steps | ret(last 50)=+34.50 win_sr=100% cum_sr=100%] + ... [6 sheep | 7,736,648 steps | ret(last 50)=+31.01 win_sr=100% cum_sr=100%] + ... [6 sheep | 7,836,648 steps | ret(last 50)=+33.27 win_sr=100% cum_sr=100%] + ... [6 sheep | 7,936,648 steps | ret(last 50)=+34.81 win_sr=100% cum_sr=100%] + ... [6 sheep | 8,036,648 steps | ret(last 50)=+32.69 win_sr=100% cum_sr=100%] + ... [6 sheep | 8,136,648 steps | ret(last 50)=+31.36 win_sr=96% cum_sr=99%] + ... [6 sheep | 8,236,648 steps | ret(last 50)=+33.71 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,336,648 steps | ret(last 50)=+34.71 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,436,648 steps | ret(last 50)=+31.89 win_sr=96% cum_sr=99%] + ... [6 sheep | 8,536,648 steps | ret(last 50)=+35.63 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,636,648 steps | ret(last 50)=+35.92 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,736,648 steps | ret(last 50)=+33.70 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,836,648 steps | ret(last 50)=+33.46 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,936,648 steps | ret(last 50)=+35.12 win_sr=100% cum_sr=99%] + ... [6 sheep | 9,036,648 steps | ret(last 50)=+34.21 win_sr=100% cum_sr=100%] +[Stage n_sheep=6] evaluating 30 eps +[Stage n_sheep=6] sr=37% mean_len=3137 mean_min_pen=1.8m mean_act=1.37 + failure modes: PARTIAL_4of6=14 SUCCESS=11 PARTIAL_3of6=5 + reward/step: progress=+0.0654 alignment=+0.0085 south=-0.0392 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0146 step_cost=-0.0200 complete=+0.0117 + +[Stage n_sheep=7] training 1,500,000 steps + ... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [7 sheep | 9,143,976 steps | ret(last 50)=+36.14 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,243,976 steps | ret(last 50)=+33.77 win_sr=98% cum_sr=99%] + ... [7 sheep | 9,343,976 steps | ret(last 50)=+37.14 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,443,976 steps | ret(last 50)=+39.90 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,543,976 steps | ret(last 50)=+37.52 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,643,976 steps | ret(last 50)=+37.31 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,743,976 steps | ret(last 50)=+36.24 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,843,976 steps | ret(last 50)=+39.67 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,943,976 steps | ret(last 50)=+39.12 win_sr=100% cum_sr=100%] + ... [7 sheep | 10,043,976 steps | ret(last 50)=+37.82 win_sr=100% cum_sr=100%] + ... [7 sheep | 10,143,976 steps | ret(last 50)=+37.38 win_sr=100% cum_sr=100%] + ... [7 sheep | 10,243,976 steps | ret(last 50)=+37.47 win_sr=98% cum_sr=100%] + ... [7 sheep | 10,343,976 steps | ret(last 50)=+36.04 win_sr=98% cum_sr=99%] + ... [7 sheep | 10,443,976 steps | ret(last 50)=+31.71 win_sr=98% cum_sr=99%] + ... [7 sheep | 10,543,976 steps | ret(last 50)=+32.50 win_sr=96% cum_sr=99%] +[Stage n_sheep=7] evaluating 30 eps +[Stage n_sheep=7] sr=0% mean_len=4000 mean_min_pen=1.8m mean_act=1.38 + failure modes: PARTIAL_5of7=18 PARTIAL_6of7=7 PARTIAL_3of7=3 PARTIAL_4of7=2 + reward/step: progress=+0.0533 alignment=+0.0069 south=-0.0356 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0124 step_cost=-0.0200 complete=+0.0000 + +[Stage n_sheep=8] training 1,500,000 steps + ... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [8 sheep | 10,651,304 steps | ret(last 50)=+36.01 win_sr=96% cum_sr=96%] + ... [8 sheep | 10,751,304 steps | ret(last 50)=+37.97 win_sr=96% cum_sr=96%] + ... [8 sheep | 10,851,304 steps | ret(last 50)=+39.12 win_sr=100% cum_sr=98%] + ... [8 sheep | 10,951,304 steps | ret(last 50)=+36.54 win_sr=96% cum_sr=97%] + ... [8 sheep | 11,051,304 steps | ret(last 50)=+40.58 win_sr=100% cum_sr=98%] + ... [8 sheep | 11,151,304 steps | ret(last 50)=+39.00 win_sr=98% cum_sr=98%] + ... [8 sheep | 11,251,304 steps | ret(last 50)=+38.54 win_sr=98% cum_sr=98%] + ... [8 sheep | 11,351,304 steps | ret(last 50)=+39.29 win_sr=100% cum_sr=98%] + ... [8 sheep | 11,451,304 steps | ret(last 50)=+38.36 win_sr=100% cum_sr=98%] + ... [8 sheep | 11,551,304 steps | ret(last 50)=+40.04 win_sr=100% cum_sr=98%] + ... [8 sheep | 11,651,304 steps | ret(last 50)=+37.92 win_sr=100% cum_sr=99%] + ... [8 sheep | 11,751,304 steps | ret(last 50)=+40.01 win_sr=98% cum_sr=99%] + ... [8 sheep | 11,851,304 steps | ret(last 50)=+39.06 win_sr=100% cum_sr=99%] + ... [8 sheep | 11,951,304 steps | ret(last 50)=+41.39 win_sr=100% cum_sr=99%] + ... [8 sheep | 12,051,304 steps | ret(last 50)=+40.05 win_sr=100% cum_sr=99%] +[Stage n_sheep=8] evaluating 30 eps +[Stage n_sheep=8] sr=60% mean_len=2472 mean_min_pen=1.6m mean_act=1.39 + failure modes: SUCCESS=18 PARTIAL_6of8=9 PARTIAL_4of8=3 + reward/step: progress=+0.0956 alignment=+0.0106 south=-0.0508 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0283 step_cost=-0.0200 complete=+0.0243 + +[Stage n_sheep=9] training 1,500,000 steps + ... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [9 sheep | 12,158,632 steps | ret(last 50)=+41.35 win_sr=98% cum_sr=98%] + ... [9 sheep | 12,258,632 steps | ret(last 50)=+41.63 win_sr=100% cum_sr=99%] + ... [9 sheep | 12,358,632 steps | ret(last 50)=+41.85 win_sr=100% cum_sr=99%] + ... [9 sheep | 12,458,632 steps | ret(last 50)=+42.49 win_sr=100% cum_sr=100%] + ... [9 sheep | 12,558,632 steps | ret(last 50)=+40.87 win_sr=100% cum_sr=100%] + ... [9 sheep | 12,658,632 steps | ret(last 50)=+39.09 win_sr=100% cum_sr=100%] + ... [9 sheep | 12,758,632 steps | ret(last 50)=+42.23 win_sr=100% cum_sr=100%] + ... [9 sheep | 12,858,632 steps | ret(last 50)=+41.00 win_sr=100% cum_sr=100%] + ... [9 sheep | 12,958,632 steps | ret(last 50)=+43.02 win_sr=100% cum_sr=100%] + ... [9 sheep | 13,058,632 steps | ret(last 50)=+41.13 win_sr=100% cum_sr=100%] + ... [9 sheep | 13,158,632 steps | ret(last 50)=+41.02 win_sr=100% cum_sr=100%] + ... [9 sheep | 13,258,632 steps | ret(last 50)=+42.88 win_sr=100% cum_sr=100%] + ... [9 sheep | 13,358,632 steps | ret(last 50)=+46.16 win_sr=100% cum_sr=100%] + ... [9 sheep | 13,458,632 steps | ret(last 50)=+44.69 win_sr=100% cum_sr=100%] + ... [9 sheep | 13,558,632 steps | ret(last 50)=+44.49 win_sr=100% cum_sr=100%] +[Stage n_sheep=9] evaluating 30 eps +[Stage n_sheep=9] sr=0% mean_len=4000 mean_min_pen=1.5m mean_act=1.39 + failure modes: PARTIAL_8of9=26 PARTIAL_7of9=4 + reward/step: progress=+0.0787 alignment=+0.0079 south=-0.0184 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0197 step_cost=-0.0200 complete=+0.0000 + +[Stage n_sheep=10] training 1,500,000 steps + ... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [10 sheep | 13,665,960 steps | ret(last 50)=+43.38 win_sr=100% cum_sr=100%] + ... [10 sheep | 13,765,960 steps | ret(last 50)=+43.26 win_sr=100% cum_sr=100%] + ... [10 sheep | 13,865,960 steps | ret(last 50)=+46.91 win_sr=100% cum_sr=100%] + ... [10 sheep | 13,965,960 steps | ret(last 50)=+45.36 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,065,960 steps | ret(last 50)=+45.37 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,165,960 steps | ret(last 50)=+44.30 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,265,960 steps | ret(last 50)=+43.83 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,365,960 steps | ret(last 50)=+47.09 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,465,960 steps | ret(last 50)=+41.32 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,565,960 steps | ret(last 50)=+45.30 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,665,960 steps | ret(last 50)=+45.36 win_sr=98% cum_sr=100%] + ... [10 sheep | 14,765,960 steps | ret(last 50)=+41.83 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,865,960 steps | ret(last 50)=+44.40 win_sr=100% cum_sr=100%] + ... [10 sheep | 14,965,960 steps | ret(last 50)=+45.89 win_sr=100% cum_sr=100%] + ... [10 sheep | 15,065,960 steps | ret(last 50)=+42.49 win_sr=100% cum_sr=100%] +[Stage n_sheep=10] evaluating 30 eps +[Stage n_sheep=10] sr=83% mean_len=2243 mean_min_pen=1.5m mean_act=1.40 + failure modes: SUCCESS=25 PARTIAL_8of10=3 PARTIAL_7of10=2 + reward/step: progress=+0.1387 alignment=+0.0150 south=-0.0437 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0428 step_cost=-0.0200 complete=+0.0372 + +====================================================================== + TRAINING SUMMARY +====================================================================== + n_sheep=1 sr=100% len= 264 min_pen= 3.7m act=0.45 + n_sheep=2 sr= 77% len= 1398 min_pen= 3.3m act=0.97 + n_sheep=3 sr= 97% len= 1095 min_pen= 2.5m act=0.95 + n_sheep=4 sr= 57% len= 2572 min_pen= 2.2m act=1.28 + n_sheep=5 sr= 0% len= 4000 min_pen= 1.7m act=1.36 + n_sheep=6 sr= 37% len= 3137 min_pen= 1.8m act=1.37 + n_sheep=7 sr= 0% len= 4000 min_pen= 1.8m act=1.38 + n_sheep=8 sr= 60% len= 2472 min_pen= 1.6m act=1.39 + n_sheep=9 sr= 0% len= 4000 min_pen= 1.5m act=1.39 + n_sheep=10 sr= 83% len= 2243 min_pen= 1.5m act=1.40 + + Total time: 94.3 min + Artefacts: runs/v3/ + Plots: runs/v3/success_rate.png, runs/v3/eval/ diff --git a/training/runs/v3/config.json b/training/runs/v3/config.json new file mode 100644 index 0000000..a2a98c3 --- /dev/null +++ b/training/runs/v3/config.json @@ -0,0 +1,15 @@ +{ + "W_PER_SHEEP": 2.0, + "W_ALIGN": 0.05, + "W_PEN_BONUS": 10.0, + "W_COMPLETE": 100.0, + "W_STEP_COST": 0.02, + "W_SOUTH": 0.01, + "W_COMPACT": 0.0, + "W_WALL_TOUCH": 0.0, + "WALL_TOUCH_BUFFER": 0.4, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": true, + "ENTRY_AWARE": true, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/v3/eval/ep_10s_success.gif b/training/runs/v3/eval/ep_10s_success.gif new file mode 100644 index 0000000..5bcfbd1 Binary files /dev/null and b/training/runs/v3/eval/ep_10s_success.gif differ diff --git a/training/runs/v3/eval/ep_1s_success.gif b/training/runs/v3/eval/ep_1s_success.gif new file mode 100644 index 0000000..a798973 Binary files /dev/null and b/training/runs/v3/eval/ep_1s_success.gif differ diff --git a/training/runs/v3/eval/ep_2s_success.gif b/training/runs/v3/eval/ep_2s_success.gif new file mode 100644 index 0000000..c8d68dd Binary files /dev/null and b/training/runs/v3/eval/ep_2s_success.gif differ diff --git a/training/runs/v3/eval/ep_3s_success.gif b/training/runs/v3/eval/ep_3s_success.gif new file mode 100644 index 0000000..0626381 Binary files /dev/null and b/training/runs/v3/eval/ep_3s_success.gif differ diff --git a/training/runs/v3/eval/ep_4s_fail.gif b/training/runs/v3/eval/ep_4s_fail.gif new file mode 100644 index 0000000..fabc0a2 Binary files /dev/null and b/training/runs/v3/eval/ep_4s_fail.gif differ diff --git a/training/runs/v3/eval/ep_5s_fail.gif b/training/runs/v3/eval/ep_5s_fail.gif new file mode 100644 index 0000000..59bbc7f Binary files /dev/null and b/training/runs/v3/eval/ep_5s_fail.gif differ diff --git a/training/runs/v3/eval/ep_6s_success.gif b/training/runs/v3/eval/ep_6s_success.gif new file mode 100644 index 0000000..c182597 Binary files /dev/null and b/training/runs/v3/eval/ep_6s_success.gif differ diff --git a/training/runs/v3/eval/ep_7s_fail.gif b/training/runs/v3/eval/ep_7s_fail.gif new file mode 100644 index 0000000..df86225 Binary files /dev/null and b/training/runs/v3/eval/ep_7s_fail.gif differ diff --git a/training/runs/v3/eval/ep_8s_fail.gif b/training/runs/v3/eval/ep_8s_fail.gif new file mode 100644 index 0000000..ef93559 Binary files /dev/null and b/training/runs/v3/eval/ep_8s_fail.gif differ diff --git a/training/runs/v3/eval/ep_9s_fail.gif b/training/runs/v3/eval/ep_9s_fail.gif new file mode 100644 index 0000000..6b2381f Binary files /dev/null and b/training/runs/v3/eval/ep_9s_fail.gif differ diff --git a/training/runs/v3/eval/traj_10s_success.png b/training/runs/v3/eval/traj_10s_success.png new file mode 100644 index 0000000..87077f4 Binary files /dev/null and b/training/runs/v3/eval/traj_10s_success.png differ diff --git a/training/runs/v3/eval/traj_1s_success.png b/training/runs/v3/eval/traj_1s_success.png new file mode 100644 index 0000000..60388c6 Binary files /dev/null and b/training/runs/v3/eval/traj_1s_success.png differ diff --git a/training/runs/v3/eval/traj_2s_success.png b/training/runs/v3/eval/traj_2s_success.png new file mode 100644 index 0000000..6ef05e6 Binary files /dev/null and b/training/runs/v3/eval/traj_2s_success.png differ diff --git a/training/runs/v3/eval/traj_3s_success.png b/training/runs/v3/eval/traj_3s_success.png new file mode 100644 index 0000000..a2553ac Binary files /dev/null and b/training/runs/v3/eval/traj_3s_success.png differ diff --git a/training/runs/v3/eval/traj_4s_fail.png b/training/runs/v3/eval/traj_4s_fail.png new file mode 100644 index 0000000..54cd189 Binary files /dev/null and b/training/runs/v3/eval/traj_4s_fail.png differ diff --git a/training/runs/v3/eval/traj_5s_fail.png b/training/runs/v3/eval/traj_5s_fail.png new file mode 100644 index 0000000..4fabece Binary files /dev/null and b/training/runs/v3/eval/traj_5s_fail.png differ diff --git a/training/runs/v3/eval/traj_6s_success.png b/training/runs/v3/eval/traj_6s_success.png new file mode 100644 index 0000000..4207cc7 Binary files /dev/null and b/training/runs/v3/eval/traj_6s_success.png differ diff --git a/training/runs/v3/eval/traj_7s_fail.png b/training/runs/v3/eval/traj_7s_fail.png new file mode 100644 index 0000000..6bd0a80 Binary files /dev/null and b/training/runs/v3/eval/traj_7s_fail.png differ diff --git a/training/runs/v3/eval/traj_8s_fail.png b/training/runs/v3/eval/traj_8s_fail.png new file mode 100644 index 0000000..c4d3b86 Binary files /dev/null and b/training/runs/v3/eval/traj_8s_fail.png differ diff --git a/training/runs/v3/eval/traj_9s_fail.png b/training/runs/v3/eval/traj_9s_fail.png new file mode 100644 index 0000000..dede9fa Binary files /dev/null and b/training/runs/v3/eval/traj_9s_fail.png differ diff --git a/training/runs/v3/eval/ts_10s_success.png b/training/runs/v3/eval/ts_10s_success.png new file mode 100644 index 0000000..2fbc289 Binary files /dev/null and b/training/runs/v3/eval/ts_10s_success.png differ diff --git a/training/runs/v3/eval/ts_1s_success.png b/training/runs/v3/eval/ts_1s_success.png new file mode 100644 index 0000000..54492aa Binary files /dev/null and b/training/runs/v3/eval/ts_1s_success.png differ diff --git a/training/runs/v3/eval/ts_2s_success.png b/training/runs/v3/eval/ts_2s_success.png new file mode 100644 index 0000000..2147c10 Binary files /dev/null and b/training/runs/v3/eval/ts_2s_success.png differ diff --git a/training/runs/v3/eval/ts_3s_success.png b/training/runs/v3/eval/ts_3s_success.png new file mode 100644 index 0000000..61f7bd7 Binary files /dev/null and b/training/runs/v3/eval/ts_3s_success.png differ diff --git a/training/runs/v3/eval/ts_4s_fail.png b/training/runs/v3/eval/ts_4s_fail.png new file mode 100644 index 0000000..e868af0 Binary files /dev/null and b/training/runs/v3/eval/ts_4s_fail.png differ diff --git a/training/runs/v3/eval/ts_5s_fail.png b/training/runs/v3/eval/ts_5s_fail.png new file mode 100644 index 0000000..80fc56a Binary files /dev/null and b/training/runs/v3/eval/ts_5s_fail.png differ diff --git a/training/runs/v3/eval/ts_6s_success.png b/training/runs/v3/eval/ts_6s_success.png new file mode 100644 index 0000000..f9d88aa Binary files /dev/null and b/training/runs/v3/eval/ts_6s_success.png differ diff --git a/training/runs/v3/eval/ts_7s_fail.png b/training/runs/v3/eval/ts_7s_fail.png new file mode 100644 index 0000000..073daf2 Binary files /dev/null and b/training/runs/v3/eval/ts_7s_fail.png differ diff --git a/training/runs/v3/eval/ts_8s_fail.png b/training/runs/v3/eval/ts_8s_fail.png new file mode 100644 index 0000000..89fa462 Binary files /dev/null and b/training/runs/v3/eval/ts_8s_fail.png differ diff --git a/training/runs/v3/eval/ts_9s_fail.png b/training/runs/v3/eval/ts_9s_fail.png new file mode 100644 index 0000000..4223443 Binary files /dev/null and b/training/runs/v3/eval/ts_9s_fail.png differ diff --git a/training/runs/v3/final_model.zip b/training/runs/v3/final_model.zip new file mode 100644 index 0000000..0eb219d Binary files /dev/null and b/training/runs/v3/final_model.zip differ diff --git a/training/runs/v3/stage_results.json b/training/runs/v3/stage_results.json new file mode 100644 index 0000000..0f6e20b --- /dev/null +++ b/training/runs/v3/stage_results.json @@ -0,0 +1,222 @@ +[ + { + "sr": 1.0, + "mean_len": 264.3, + "mean_min_pen": 3.6947483142217, + "mean_act": 0.4488927691353647, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.11562251145796992, + "alignment": 0.00012847888517811197, + "south": -0.00046327802870008703, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.037835792659856225, + "step_cost": -0.020000000000000923, + "complete": 0.37835792659856227 + }, + "n_sheep": 1 + }, + { + "sr": 0.7666666666666667, + "mean_len": 1397.6333333333334, + "mean_min_pen": 3.3354002753893535, + "mean_act": 0.9679237489606706, + "failure_modes": { + "SUCCESS": 23, + "PARTIAL_1of2": 6, + "COMPACT_CANT_DRIVE": 1 + }, + "reward_per_step": { + "progress": 0.04012407340533507, + "alignment": 0.004549029322963513, + "south": -0.003855391958439705, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.01264041594123399, + "step_cost": -0.019999999999988728, + "complete": 0.05485463521667581 + }, + "n_sheep": 2 + }, + { + "sr": 0.9666666666666667, + "mean_len": 1095.3666666666666, + "mean_min_pen": 2.4724439779917398, + "mean_act": 0.950618689999602, + "failure_modes": { + "SUCCESS": 29, + "COMPACT_CANT_DRIVE": 1 + }, + "reward_per_step": { + "progress": 0.08207998032411863, + "alignment": 0.011342550088712133, + "south": -0.008689572376747992, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.0264751529168315, + "step_cost": -0.019999999999990636, + "complete": 0.08825050972277168 + }, + "n_sheep": 3 + }, + { + "sr": 0.5666666666666667, + "mean_len": 2571.866666666667, + "mean_min_pen": 2.1761705835660297, + "mean_act": 1.2794624905502197, + "failure_modes": { + "PARTIAL_2of4": 5, + "SUCCESS": 17, + "DROVE_NO_SHEEP": 1, + "NEVER_COMPACT": 1, + "PARTIAL_1of4": 6 + }, + "reward_per_step": { + "progress": 0.04547638401556759, + "alignment": 0.003989776116242459, + "south": -0.04544084245355691, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.010887034060863705, + "step_cost": -0.01999999999998557, + "complete": 0.02203328321841464 + }, + "n_sheep": 4 + }, + { + "sr": 0.0, + "mean_len": 4000.0, + "mean_min_pen": 1.7023075381914774, + "mean_act": 1.3590981605617019, + "failure_modes": { + "PARTIAL_1of5": 9, + "PARTIAL_3of5": 2, + "PARTIAL_2of5": 2, + "PARTIAL_4of5": 17 + }, + "reward_per_step": { + "progress": 0.039584031492471694, + "alignment": 0.003391631218188155, + "south": -0.03930825256315925, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.00725, + "step_cost": -0.01999999999998423, + "complete": 0.0 + }, + "n_sheep": 5 + }, + { + "sr": 0.36666666666666664, + "mean_len": 3136.766666666667, + "mean_min_pen": 1.7896055857340494, + "mean_act": 1.3694271957435262, + "failure_modes": { + "SUCCESS": 11, + "PARTIAL_3of6": 5, + "PARTIAL_4of6": 14 + }, + "reward_per_step": { + "progress": 0.06539200159542725, + "alignment": 0.00849681660918308, + "south": -0.03917853851677538, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.014558515669000988, + "step_cost": -0.019999999999984894, + "complete": 0.011689319150292764 + }, + "n_sheep": 6 + }, + { + "sr": 0.0, + "mean_len": 4000.0, + "mean_min_pen": 1.8426543315251669, + "mean_act": 1.383490810132896, + "failure_modes": { + "PARTIAL_5of7": 18, + "PARTIAL_3of7": 3, + "PARTIAL_6of7": 7, + "PARTIAL_4of7": 2 + }, + "reward_per_step": { + "progress": 0.05331589305400848, + "alignment": 0.00686667034524816, + "south": -0.03559404498259062, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.012416666666666666, + "step_cost": -0.01999999999998423, + "complete": 0.0 + }, + "n_sheep": 7 + }, + { + "sr": 0.6, + "mean_len": 2472.0666666666666, + "mean_min_pen": 1.609976100921631, + "mean_act": 1.3901071324053385, + "failure_modes": { + "SUCCESS": 18, + "PARTIAL_4of8": 3, + "PARTIAL_6of8": 9 + }, + "reward_per_step": { + "progress": 0.09555249268374479, + "alignment": 0.010622170754243947, + "south": -0.050827120587952045, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.028316388446913515, + "step_cost": -0.019999999999985724, + "complete": 0.024271190097354442 + }, + "n_sheep": 8 + }, + { + "sr": 0.0, + "mean_len": 4000.0, + "mean_min_pen": 1.5165573159853618, + "mean_act": 1.3936255563423037, + "failure_modes": { + "PARTIAL_8of9": 26, + "PARTIAL_7of9": 4 + }, + "reward_per_step": { + "progress": 0.07870613823334376, + "alignment": 0.007913931652916581, + "south": -0.01837508026464782, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.019666666666666666, + "step_cost": -0.01999999999998423, + "complete": 0.0 + }, + "n_sheep": 9 + }, + { + "sr": 0.8333333333333334, + "mean_len": 2243.0, + "mean_min_pen": 1.5175361116727193, + "mean_act": 1.3979439154633806, + "failure_modes": { + "SUCCESS": 25, + "PARTIAL_7of10": 2, + "PARTIAL_8of10": 3 + }, + "reward_per_step": { + "progress": 0.13872242361851903, + "alignment": 0.015031396471697371, + "south": -0.04365405077614671, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.04279982166740972, + "step_cost": -0.019999999999986123, + "complete": 0.03715262297518205 + }, + "n_sheep": 10 + } +] \ No newline at end of file diff --git a/training/runs/v3/success_rate.png b/training/runs/v3/success_rate.png new file mode 100644 index 0000000..f7b3293 Binary files /dev/null and b/training/runs/v3/success_rate.png differ diff --git a/training/runs/v3/vecnorm.pkl b/training/runs/v3/vecnorm.pkl new file mode 100644 index 0000000..1aaffb7 Binary files /dev/null and b/training/runs/v3/vecnorm.pkl differ