diff --git a/training/runs/v2.log b/training/runs/v2.log new file mode 100644 index 0000000..4fe7c39 --- /dev/null +++ b/training/runs/v2.log @@ -0,0 +1,242 @@ +Config loaded from config.json +Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02} +Run dir: runs/v2 +Curriculum: 1 → 10 sheep, 1,500,000 steps/stage + + +[Stage n_sheep=1] training 1,500,000 steps + ... [1 sheep | 100,000 steps | ret(last 40)=-23.39 win_sr=8% cum_sr=8%] + ... [1 sheep | 200,000 steps | ret(last 50)=-22.10 win_sr=10% cum_sr=9%] + ... [1 sheep | 300,000 steps | ret(last 50)=-23.02 win_sr=10% cum_sr=10%] + ... [1 sheep | 400,000 steps | ret(last 50)=-18.97 win_sr=18% cum_sr=12%] + ... [1 sheep | 500,000 steps | ret(last 50)=-20.01 win_sr=8% cum_sr=11%] + ... [1 sheep | 600,000 steps | ret(last 50)=-18.57 win_sr=14% cum_sr=12%] + ... [1 sheep | 700,000 steps | ret(last 50)=-17.55 win_sr=22% cum_sr=14%] + ... [1 sheep | 800,000 steps | ret(last 50)=+7.41 win_sr=66% cum_sr=23%] + ... [1 sheep | 900,000 steps | ret(last 50)=+17.61 win_sr=100% cum_sr=47%] + ... [1 sheep | 1,000,000 steps | ret(last 50)=+16.11 win_sr=100% cum_sr=65%] + ... [1 sheep | 1,100,000 steps | ret(last 50)=+15.82 win_sr=100% cum_sr=74%] + ... [1 sheep | 1,200,000 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=80%] + ... [1 sheep | 1,300,000 steps | ret(last 50)=+14.19 win_sr=100% cum_sr=84%] + ... [1 sheep | 1,400,000 steps | ret(last 50)=+14.00 win_sr=100% cum_sr=87%] + ... [1 sheep | 1,500,000 steps | ret(last 50)=+13.96 win_sr=100% cum_sr=89%] +[Stage n_sheep=1] evaluating 30 eps +[Stage n_sheep=1] sr=100% mean_len=234 mean_min_pen=3.7m mean_act=0.41 + failure modes: SUCCESS=30 + reward/step: progress=+0.1118 alignment=+0.0003 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0427 step_cost=-0.0200 complete=+0.4274 + +[Stage n_sheep=2] training 1,500,000 steps + ... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [2 sheep | 1,607,336 steps | ret(last 40)=-4.45 win_sr=8% cum_sr=8%] + ... [2 sheep | 1,707,336 steps | ret(last 50)=-4.56 win_sr=8% cum_sr=9%] + ... [2 sheep | 1,807,336 steps | ret(last 50)=-2.33 win_sr=12% cum_sr=10%] + ... [2 sheep | 1,907,336 steps | ret(last 50)=+1.93 win_sr=24% cum_sr=14%] + ... [2 sheep | 2,007,336 steps | ret(last 50)=+7.32 win_sr=52% cum_sr=24%] + ... [2 sheep | 2,107,336 steps | ret(last 50)=+10.52 win_sr=58% cum_sr=30%] + ... [2 sheep | 2,207,336 steps | ret(last 50)=+15.67 win_sr=76% cum_sr=39%] + ... [2 sheep | 2,307,336 steps | ret(last 50)=+16.91 win_sr=78% cum_sr=46%] + ... [2 sheep | 2,407,336 steps | ret(last 50)=+21.91 win_sr=96% cum_sr=53%] + ... [2 sheep | 2,507,336 steps | ret(last 50)=+21.08 win_sr=94% cum_sr=60%] + ... [2 sheep | 2,607,336 steps | ret(last 50)=+20.24 win_sr=92% cum_sr=65%] + ... [2 sheep | 2,707,336 steps | ret(last 50)=+21.40 win_sr=96% cum_sr=70%] + ... [2 sheep | 2,807,336 steps | ret(last 50)=+21.95 win_sr=100% cum_sr=73%] + ... [2 sheep | 2,907,336 steps | ret(last 50)=+20.73 win_sr=100% cum_sr=76%] + ... [2 sheep | 3,007,336 steps | ret(last 50)=+21.25 win_sr=100% cum_sr=79%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=87% mean_len=1064 mean_min_pen=4.1m mean_act=0.59 + failure modes: SUCCESS=26 COMPACT_CANT_DRIVE=4 + reward/step: progress=+0.0565 alignment=+0.0071 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0163 step_cost=-0.0200 complete=+0.0815 + +[Stage n_sheep=3] training 1,500,000 steps + ... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [3 sheep | 3,114,664 steps | ret(last 50)=+17.60 win_sr=72% cum_sr=73%] + ... [3 sheep | 3,214,664 steps | ret(last 50)=+25.44 win_sr=98% cum_sr=87%] + ... [3 sheep | 3,314,664 steps | ret(last 50)=+25.73 win_sr=92% cum_sr=90%] + ... [3 sheep | 3,414,664 steps | ret(last 50)=+28.01 win_sr=98% cum_sr=92%] + ... [3 sheep | 3,514,664 steps | ret(last 50)=+25.71 win_sr=94% cum_sr=93%] + ... [3 sheep | 3,614,664 steps | ret(last 50)=+24.73 win_sr=94% cum_sr=93%] + ... [3 sheep | 3,714,664 steps | ret(last 50)=+23.51 win_sr=88% cum_sr=92%] + ... [3 sheep | 3,814,664 steps | ret(last 50)=+25.11 win_sr=96% cum_sr=93%] + ... [3 sheep | 3,914,664 steps | ret(last 50)=+27.02 win_sr=100% cum_sr=93%] + ... [3 sheep | 4,014,664 steps | ret(last 50)=+24.67 win_sr=94% cum_sr=94%] + ... [3 sheep | 4,114,664 steps | ret(last 50)=+26.08 win_sr=98% cum_sr=94%] + ... [3 sheep | 4,214,664 steps | ret(last 50)=+26.69 win_sr=98% cum_sr=94%] + ... [3 sheep | 4,314,664 steps | ret(last 50)=+24.01 win_sr=92% cum_sr=94%] + ... [3 sheep | 4,414,664 steps | ret(last 50)=+25.74 win_sr=98% cum_sr=94%] + ... [3 sheep | 4,514,664 steps | ret(last 50)=+27.43 win_sr=100% cum_sr=95%] +[Stage n_sheep=3] evaluating 30 eps +[Stage n_sheep=3] sr=100% mean_len=769 mean_min_pen=3.5m mean_act=0.72 + failure modes: SUCCESS=30 + reward/step: progress=+0.1121 alignment=+0.0078 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0390 step_cost=-0.0200 complete=+0.1301 + +[Stage n_sheep=4] training 1,500,000 steps + ... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [4 sheep | 4,621,992 steps | ret(last 50)=+32.50 win_sr=100% cum_sr=96%] + ... [4 sheep | 4,721,992 steps | ret(last 50)=+31.21 win_sr=100% cum_sr=98%] + ... [4 sheep | 4,821,992 steps | ret(last 50)=+34.05 win_sr=100% cum_sr=99%] + ... [4 sheep | 4,921,992 steps | ret(last 50)=+32.04 win_sr=100% cum_sr=99%] + ... [4 sheep | 5,021,992 steps | ret(last 50)=+29.20 win_sr=100% cum_sr=99%] + ... [4 sheep | 5,121,992 steps | ret(last 50)=+31.56 win_sr=100% cum_sr=99%] + ... [4 sheep | 5,221,992 steps | ret(last 50)=+31.25 win_sr=100% cum_sr=100%] + ... [4 sheep | 5,321,992 steps | ret(last 50)=+30.62 win_sr=100% cum_sr=100%] + ... [4 sheep | 5,421,992 steps | ret(last 50)=+30.44 win_sr=100% cum_sr=100%] + ... [4 sheep | 5,521,992 steps | ret(last 50)=+32.84 win_sr=100% cum_sr=100%] + ... [4 sheep | 5,621,992 steps | ret(last 50)=+30.98 win_sr=100% cum_sr=100%] + ... [4 sheep | 5,721,992 steps | ret(last 50)=+28.77 win_sr=98% cum_sr=100%] + ... [4 sheep | 5,821,992 steps | ret(last 50)=+29.24 win_sr=100% cum_sr=100%] + ... [4 sheep | 5,921,992 steps | ret(last 50)=+30.83 win_sr=100% cum_sr=100%] + ... [4 sheep | 6,021,992 steps | ret(last 50)=+30.06 win_sr=100% cum_sr=100%] +[Stage n_sheep=4] evaluating 30 eps +[Stage n_sheep=4] sr=100% mean_len=750 mean_min_pen=3.5m mean_act=1.23 + failure modes: SUCCESS=30 + reward/step: progress=+0.1586 alignment=+0.0113 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0533 step_cost=-0.0200 complete=+0.1334 + +[Stage n_sheep=5] training 1,500,000 steps + ... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [5 sheep | 6,129,320 steps | ret(last 50)=+31.97 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,229,320 steps | ret(last 50)=+32.32 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,329,320 steps | ret(last 50)=+34.26 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,429,320 steps | ret(last 50)=+33.75 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,529,320 steps | ret(last 50)=+34.77 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,629,320 steps | ret(last 50)=+34.06 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,729,320 steps | ret(last 50)=+32.39 win_sr=96% cum_sr=100%] + ... [5 sheep | 6,829,320 steps | ret(last 50)=+32.33 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,929,320 steps | ret(last 50)=+33.29 win_sr=100% cum_sr=100%] + ... [5 sheep | 7,029,320 steps | ret(last 50)=+32.12 win_sr=100% cum_sr=100%] + ... [5 sheep | 7,129,320 steps | ret(last 50)=+32.58 win_sr=100% cum_sr=100%] + ... [5 sheep | 7,229,320 steps | ret(last 50)=+33.27 win_sr=100% cum_sr=100%] + ... [5 sheep | 7,329,320 steps | ret(last 50)=+33.64 win_sr=100% cum_sr=100%] + ... [5 sheep | 7,429,320 steps | ret(last 50)=+32.67 win_sr=100% cum_sr=100%] + ... [5 sheep | 7,529,320 steps | ret(last 50)=+32.79 win_sr=100% cum_sr=100%] +[Stage n_sheep=5] evaluating 30 eps +[Stage n_sheep=5] sr=97% mean_len=921 mean_min_pen=3.2m mean_act=1.33 + failure modes: SUCCESS=29 PARTIAL_3of5=1 + reward/step: progress=+0.1565 alignment=+0.0135 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0536 step_cost=-0.0200 complete=+0.1050 + +[Stage n_sheep=6] training 1,500,000 steps + ... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [6 sheep | 7,636,648 steps | ret(last 50)=+35.93 win_sr=100% cum_sr=96%] + ... [6 sheep | 7,736,648 steps | ret(last 50)=+37.56 win_sr=100% cum_sr=97%] + ... [6 sheep | 7,836,648 steps | ret(last 50)=+34.93 win_sr=100% cum_sr=98%] + ... [6 sheep | 7,936,648 steps | ret(last 50)=+32.71 win_sr=98% cum_sr=98%] + ... [6 sheep | 8,036,648 steps | ret(last 50)=+36.84 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,136,648 steps | ret(last 50)=+35.11 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,236,648 steps | ret(last 50)=+36.54 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,336,648 steps | ret(last 50)=+34.67 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,436,648 steps | ret(last 50)=+36.14 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,536,648 steps | ret(last 50)=+36.95 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,636,648 steps | ret(last 50)=+35.42 win_sr=100% cum_sr=99%] + ... [6 sheep | 8,736,648 steps | ret(last 50)=+33.44 win_sr=100% cum_sr=100%] + ... [6 sheep | 8,836,648 steps | ret(last 50)=+36.70 win_sr=100% cum_sr=100%] + ... [6 sheep | 8,936,648 steps | ret(last 50)=+34.03 win_sr=100% cum_sr=100%] + ... [6 sheep | 9,036,648 steps | ret(last 50)=+34.53 win_sr=100% cum_sr=100%] +[Stage n_sheep=6] evaluating 30 eps +[Stage n_sheep=6] sr=97% mean_len=1193 mean_min_pen=3.4m mean_act=1.36 + failure modes: SUCCESS=29 COMPACT_CANT_DRIVE=1 + reward/step: progress=+0.1597 alignment=+0.0173 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0492 step_cost=-0.0200 complete=+0.0810 + +[Stage n_sheep=7] training 1,500,000 steps + ... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [7 sheep | 9,143,976 steps | ret(last 50)=+40.54 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,243,976 steps | ret(last 50)=+38.70 win_sr=98% cum_sr=99%] + ... [7 sheep | 9,343,976 steps | ret(last 50)=+38.13 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,443,976 steps | ret(last 50)=+40.37 win_sr=100% cum_sr=100%] + ... [7 sheep | 9,543,976 steps | ret(last 50)=+39.40 win_sr=100% cum_sr=99%] + ... [7 sheep | 9,643,976 steps | ret(last 50)=+40.44 win_sr=98% cum_sr=99%] + ... [7 sheep | 9,743,976 steps | ret(last 50)=+37.74 win_sr=100% cum_sr=99%] + ... [7 sheep | 9,843,976 steps | ret(last 50)=+39.91 win_sr=98% cum_sr=99%] + ... [7 sheep | 9,943,976 steps | ret(last 50)=+40.67 win_sr=100% cum_sr=99%] + ... [7 sheep | 10,043,976 steps | ret(last 50)=+35.38 win_sr=100% cum_sr=99%] + ... [7 sheep | 10,143,976 steps | ret(last 50)=+38.31 win_sr=100% cum_sr=99%] + ... [7 sheep | 10,243,976 steps | ret(last 50)=+40.86 win_sr=100% cum_sr=99%] + ... [7 sheep | 10,343,976 steps | ret(last 50)=+40.95 win_sr=100% cum_sr=99%] + ... [7 sheep | 10,443,976 steps | ret(last 50)=+37.90 win_sr=100% cum_sr=99%] + ... [7 sheep | 10,543,976 steps | ret(last 50)=+39.07 win_sr=100% cum_sr=99%] +[Stage n_sheep=7] evaluating 30 eps +[Stage n_sheep=7] sr=100% mean_len=1209 mean_min_pen=3.2m mean_act=1.37 + failure modes: SUCCESS=30 + reward/step: progress=+0.1774 alignment=+0.0179 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0579 step_cost=-0.0200 complete=+0.0827 + +[Stage n_sheep=8] training 1,500,000 steps + ... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [8 sheep | 10,651,304 steps | ret(last 50)=+42.81 win_sr=100% cum_sr=100%] + ... [8 sheep | 10,751,304 steps | ret(last 50)=+44.59 win_sr=100% cum_sr=100%] + ... [8 sheep | 10,851,304 steps | ret(last 50)=+45.59 win_sr=98% cum_sr=99%] + ... [8 sheep | 10,951,304 steps | ret(last 50)=+42.27 win_sr=98% cum_sr=99%] + ... [8 sheep | 11,051,304 steps | ret(last 50)=+45.05 win_sr=98% cum_sr=99%] + ... [8 sheep | 11,151,304 steps | ret(last 50)=+45.50 win_sr=100% cum_sr=99%] + ... [8 sheep | 11,251,304 steps | ret(last 50)=+43.60 win_sr=100% cum_sr=99%] + ... [8 sheep | 11,351,304 steps | ret(last 50)=+40.26 win_sr=100% cum_sr=99%] + ... [8 sheep | 11,451,304 steps | ret(last 50)=+43.00 win_sr=100% cum_sr=99%] + ... [8 sheep | 11,551,304 steps | ret(last 50)=+43.16 win_sr=100% cum_sr=100%] + ... [8 sheep | 11,651,304 steps | ret(last 50)=+42.78 win_sr=100% cum_sr=100%] + ... [8 sheep | 11,751,304 steps | ret(last 50)=+42.32 win_sr=98% cum_sr=99%] + ... [8 sheep | 11,851,304 steps | ret(last 50)=+41.62 win_sr=100% cum_sr=99%] + ... [8 sheep | 11,951,304 steps | ret(last 50)=+42.56 win_sr=98% cum_sr=99%] + ... [8 sheep | 12,051,304 steps | ret(last 50)=+41.83 win_sr=100% cum_sr=99%] +[Stage n_sheep=8] evaluating 30 eps +[Stage n_sheep=8] sr=100% mean_len=1492 mean_min_pen=3.2m mean_act=1.38 + failure modes: SUCCESS=30 + reward/step: progress=+0.1916 alignment=+0.0190 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0536 step_cost=-0.0200 complete=+0.0670 + +[Stage n_sheep=9] training 1,500,000 steps + ... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [9 sheep | 12,158,632 steps | ret(last 50)=+46.03 win_sr=100% cum_sr=100%] + ... [9 sheep | 12,258,632 steps | ret(last 50)=+46.87 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,358,632 steps | ret(last 50)=+45.48 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,458,632 steps | ret(last 50)=+47.02 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,558,632 steps | ret(last 50)=+44.66 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,658,632 steps | ret(last 50)=+46.60 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,758,632 steps | ret(last 50)=+41.85 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,858,632 steps | ret(last 50)=+47.81 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,958,632 steps | ret(last 50)=+44.92 win_sr=90% cum_sr=96%] + ... [9 sheep | 13,058,632 steps | ret(last 50)=+47.40 win_sr=90% cum_sr=96%] + ... [9 sheep | 13,158,632 steps | ret(last 50)=+47.16 win_sr=92% cum_sr=95%] + ... [9 sheep | 13,258,632 steps | ret(last 50)=+45.55 win_sr=98% cum_sr=96%] + ... [9 sheep | 13,358,632 steps | ret(last 50)=+46.87 win_sr=96% cum_sr=96%] + ... [9 sheep | 13,458,632 steps | ret(last 50)=+47.69 win_sr=98% cum_sr=96%] + ... [9 sheep | 13,558,632 steps | ret(last 50)=+45.17 win_sr=94% cum_sr=96%] +[Stage n_sheep=9] evaluating 30 eps +[Stage n_sheep=9] sr=90% mean_len=1628 mean_min_pen=3.2m mean_act=1.38 + failure modes: SUCCESS=27 COMPACT_CANT_DRIVE=3 + reward/step: progress=+0.1802 alignment=+0.0204 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0514 step_cost=-0.0200 complete=+0.0553 + +[Stage n_sheep=10] training 1,500,000 steps + ... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [10 sheep | 13,665,960 steps | ret(last 50)=+49.00 win_sr=82% cum_sr=82%] + ... [10 sheep | 13,765,960 steps | ret(last 50)=+48.55 win_sr=86% cum_sr=84%] + ... [10 sheep | 13,865,960 steps | ret(last 50)=+46.53 win_sr=80% cum_sr=83%] + ... [10 sheep | 13,965,960 steps | ret(last 50)=+44.70 win_sr=82% cum_sr=83%] + ... [10 sheep | 14,065,960 steps | ret(last 50)=+52.57 win_sr=92% cum_sr=85%] + ... [10 sheep | 14,165,960 steps | ret(last 50)=+50.20 win_sr=82% cum_sr=85%] + ... [10 sheep | 14,265,960 steps | ret(last 50)=+50.34 win_sr=90% cum_sr=85%] + ... [10 sheep | 14,365,960 steps | ret(last 50)=+50.24 win_sr=90% cum_sr=86%] + ... [10 sheep | 14,465,960 steps | ret(last 50)=+48.40 win_sr=86% cum_sr=86%] + ... [10 sheep | 14,565,960 steps | ret(last 50)=+48.74 win_sr=88% cum_sr=87%] + ... [10 sheep | 14,665,960 steps | ret(last 50)=+48.46 win_sr=80% cum_sr=86%] + ... [10 sheep | 14,765,960 steps | ret(last 50)=+51.46 win_sr=70% cum_sr=85%] + ... [10 sheep | 14,865,960 steps | ret(last 50)=+49.28 win_sr=92% cum_sr=85%] + ... [10 sheep | 14,965,960 steps | ret(last 50)=+51.12 win_sr=88% cum_sr=86%] + ... [10 sheep | 15,065,960 steps | ret(last 50)=+52.03 win_sr=84% cum_sr=85%] +[Stage n_sheep=10] evaluating 30 eps +[Stage n_sheep=10] sr=93% mean_len=1870 mean_min_pen=3.1m mean_act=1.38 + failure modes: SUCCESS=28 COMPACT_CANT_DRIVE=2 + reward/step: progress=+0.1727 alignment=+0.0219 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0522 step_cost=-0.0200 complete=+0.0499 + +====================================================================== + TRAINING SUMMARY +====================================================================== + n_sheep=1 sr=100% len= 234 min_pen= 3.7m act=0.41 + n_sheep=2 sr= 87% len= 1064 min_pen= 4.1m act=0.59 + n_sheep=3 sr=100% len= 769 min_pen= 3.5m act=0.72 + n_sheep=4 sr=100% len= 750 min_pen= 3.5m act=1.23 + n_sheep=5 sr= 97% len= 921 min_pen= 3.2m act=1.33 + n_sheep=6 sr= 97% len= 1193 min_pen= 3.4m act=1.36 + n_sheep=7 sr=100% len= 1209 min_pen= 3.2m act=1.37 + n_sheep=8 sr=100% len= 1492 min_pen= 3.2m act=1.38 + n_sheep=9 sr= 90% len= 1628 min_pen= 3.2m act=1.38 + n_sheep=10 sr= 93% len= 1870 min_pen= 3.1m act=1.38 + + Total time: 92.0 min + Artefacts: runs/v2/ + Plots: runs/v2/success_rate.png, runs/v2/eval/ diff --git a/training/runs/v2/config.json b/training/runs/v2/config.json new file mode 100644 index 0000000..89f2062 --- /dev/null +++ b/training/runs/v2/config.json @@ -0,0 +1,14 @@ +{ + "W_PER_SHEEP": 2.0, + "W_ALIGN": 0.05, + "W_PEN_BONUS": 10.0, + "W_COMPLETE": 100.0, + "W_STEP_COST": 0.02, + "W_COMPACT": 0.0, + "W_WALL_TOUCH": 0.0, + "WALL_TOUCH_BUFFER": 0.4, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": true, + "ENTRY_AWARE": true, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/v2/eval/ep_10s_success.gif b/training/runs/v2/eval/ep_10s_success.gif new file mode 100644 index 0000000..c6ffdd0 Binary files /dev/null and b/training/runs/v2/eval/ep_10s_success.gif differ diff --git a/training/runs/v2/eval/ep_1s_success.gif b/training/runs/v2/eval/ep_1s_success.gif new file mode 100644 index 0000000..12967b0 Binary files /dev/null and b/training/runs/v2/eval/ep_1s_success.gif differ diff --git a/training/runs/v2/eval/ep_2s_success.gif b/training/runs/v2/eval/ep_2s_success.gif new file mode 100644 index 0000000..4b35be6 Binary files /dev/null and b/training/runs/v2/eval/ep_2s_success.gif differ diff --git a/training/runs/v2/eval/ep_3s_success.gif b/training/runs/v2/eval/ep_3s_success.gif new file mode 100644 index 0000000..8695631 Binary files /dev/null and b/training/runs/v2/eval/ep_3s_success.gif differ diff --git a/training/runs/v2/eval/ep_4s_success.gif b/training/runs/v2/eval/ep_4s_success.gif new file mode 100644 index 0000000..8af261e Binary files /dev/null and b/training/runs/v2/eval/ep_4s_success.gif differ diff --git a/training/runs/v2/eval/ep_5s_success.gif b/training/runs/v2/eval/ep_5s_success.gif new file mode 100644 index 0000000..d4c1523 Binary files /dev/null and b/training/runs/v2/eval/ep_5s_success.gif differ diff --git a/training/runs/v2/eval/ep_6s_success.gif b/training/runs/v2/eval/ep_6s_success.gif new file mode 100644 index 0000000..375beda Binary files /dev/null and b/training/runs/v2/eval/ep_6s_success.gif differ diff --git a/training/runs/v2/eval/ep_7s_success.gif b/training/runs/v2/eval/ep_7s_success.gif new file mode 100644 index 0000000..f78c97b Binary files /dev/null and b/training/runs/v2/eval/ep_7s_success.gif differ diff --git a/training/runs/v2/eval/ep_8s_success.gif b/training/runs/v2/eval/ep_8s_success.gif new file mode 100644 index 0000000..c4d3e49 Binary files /dev/null and b/training/runs/v2/eval/ep_8s_success.gif differ diff --git a/training/runs/v2/eval/ep_9s_success.gif b/training/runs/v2/eval/ep_9s_success.gif new file mode 100644 index 0000000..ae6dc44 Binary files /dev/null and b/training/runs/v2/eval/ep_9s_success.gif differ diff --git a/training/runs/v2/eval/traj_10s_success.png b/training/runs/v2/eval/traj_10s_success.png new file mode 100644 index 0000000..0b6dc1a Binary files /dev/null and b/training/runs/v2/eval/traj_10s_success.png differ diff --git a/training/runs/v2/eval/traj_1s_success.png b/training/runs/v2/eval/traj_1s_success.png new file mode 100644 index 0000000..ad315fb Binary files /dev/null and b/training/runs/v2/eval/traj_1s_success.png differ diff --git a/training/runs/v2/eval/traj_2s_success.png b/training/runs/v2/eval/traj_2s_success.png new file mode 100644 index 0000000..6af9635 Binary files /dev/null and b/training/runs/v2/eval/traj_2s_success.png differ diff --git a/training/runs/v2/eval/traj_3s_success.png b/training/runs/v2/eval/traj_3s_success.png new file mode 100644 index 0000000..d8d0f25 Binary files /dev/null and b/training/runs/v2/eval/traj_3s_success.png differ diff --git a/training/runs/v2/eval/traj_4s_success.png b/training/runs/v2/eval/traj_4s_success.png new file mode 100644 index 0000000..41070d1 Binary files /dev/null and b/training/runs/v2/eval/traj_4s_success.png differ diff --git a/training/runs/v2/eval/traj_5s_success.png b/training/runs/v2/eval/traj_5s_success.png new file mode 100644 index 0000000..c052565 Binary files /dev/null and b/training/runs/v2/eval/traj_5s_success.png differ diff --git a/training/runs/v2/eval/traj_6s_success.png b/training/runs/v2/eval/traj_6s_success.png new file mode 100644 index 0000000..aa7f69b Binary files /dev/null and b/training/runs/v2/eval/traj_6s_success.png differ diff --git a/training/runs/v2/eval/traj_7s_success.png b/training/runs/v2/eval/traj_7s_success.png new file mode 100644 index 0000000..0bd27ba Binary files /dev/null and b/training/runs/v2/eval/traj_7s_success.png differ diff --git a/training/runs/v2/eval/traj_8s_success.png b/training/runs/v2/eval/traj_8s_success.png new file mode 100644 index 0000000..de89f95 Binary files /dev/null and b/training/runs/v2/eval/traj_8s_success.png differ diff --git a/training/runs/v2/eval/traj_9s_success.png b/training/runs/v2/eval/traj_9s_success.png new file mode 100644 index 0000000..d9075cc Binary files /dev/null and b/training/runs/v2/eval/traj_9s_success.png differ diff --git a/training/runs/v2/eval/ts_10s_success.png b/training/runs/v2/eval/ts_10s_success.png new file mode 100644 index 0000000..ab13e1d Binary files /dev/null and b/training/runs/v2/eval/ts_10s_success.png differ diff --git a/training/runs/v2/eval/ts_1s_success.png b/training/runs/v2/eval/ts_1s_success.png new file mode 100644 index 0000000..63eb7d9 Binary files /dev/null and b/training/runs/v2/eval/ts_1s_success.png differ diff --git a/training/runs/v2/eval/ts_2s_success.png b/training/runs/v2/eval/ts_2s_success.png new file mode 100644 index 0000000..cd7970f Binary files /dev/null and b/training/runs/v2/eval/ts_2s_success.png differ diff --git a/training/runs/v2/eval/ts_3s_success.png b/training/runs/v2/eval/ts_3s_success.png new file mode 100644 index 0000000..f4d29b7 Binary files /dev/null and b/training/runs/v2/eval/ts_3s_success.png differ diff --git a/training/runs/v2/eval/ts_4s_success.png b/training/runs/v2/eval/ts_4s_success.png new file mode 100644 index 0000000..f982b19 Binary files /dev/null and b/training/runs/v2/eval/ts_4s_success.png differ diff --git a/training/runs/v2/eval/ts_5s_success.png b/training/runs/v2/eval/ts_5s_success.png new file mode 100644 index 0000000..9166e19 Binary files /dev/null and b/training/runs/v2/eval/ts_5s_success.png differ diff --git a/training/runs/v2/eval/ts_6s_success.png b/training/runs/v2/eval/ts_6s_success.png new file mode 100644 index 0000000..05e743e Binary files /dev/null and b/training/runs/v2/eval/ts_6s_success.png differ diff --git a/training/runs/v2/eval/ts_7s_success.png b/training/runs/v2/eval/ts_7s_success.png new file mode 100644 index 0000000..5802086 Binary files /dev/null and b/training/runs/v2/eval/ts_7s_success.png differ diff --git a/training/runs/v2/eval/ts_8s_success.png b/training/runs/v2/eval/ts_8s_success.png new file mode 100644 index 0000000..e3a3be2 Binary files /dev/null and b/training/runs/v2/eval/ts_8s_success.png differ diff --git a/training/runs/v2/eval/ts_9s_success.png b/training/runs/v2/eval/ts_9s_success.png new file mode 100644 index 0000000..a8cc747 Binary files /dev/null and b/training/runs/v2/eval/ts_9s_success.png differ diff --git a/training/runs/v2/final_model.zip b/training/runs/v2/final_model.zip new file mode 100644 index 0000000..c3f69ce Binary files /dev/null and b/training/runs/v2/final_model.zip differ diff --git a/training/runs/v2/stage_results.json b/training/runs/v2/stage_results.json new file mode 100644 index 0000000..97dcbc3 --- /dev/null +++ b/training/runs/v2/stage_results.json @@ -0,0 +1,197 @@ +[ + { + "sr": 1.0, + "mean_len": 234.0, + "mean_min_pen": 3.6668872674306234, + "mean_act": 0.4068990752695293, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.11183513424165568, + "alignment": 0.0002786317654047819, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.042735042735042736, + "step_cost": -0.019999999999999716, + "complete": 0.42735042735042733 + }, + "n_sheep": 1 + }, + { + "sr": 0.8666666666666667, + "mean_len": 1063.6666666666667, + "mean_min_pen": 4.120940693219503, + "mean_act": 0.5870139278816712, + "failure_modes": { + "SUCCESS": 26, + "COMPACT_CANT_DRIVE": 4 + }, + "reward_per_step": { + "progress": 0.05651345582855781, + "alignment": 0.007121706701510673, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.01629583202757756, + "step_cost": -0.0199999999999909, + "complete": 0.08147916013788781 + }, + "n_sheep": 2 + }, + { + "sr": 1.0, + "mean_len": 768.6, + "mean_min_pen": 3.4802104949951174, + "mean_act": 0.7173416881465967, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.11210350058336283, + "alignment": 0.007752684222105381, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.039032006245121, + "step_cost": -0.019999999999994387, + "complete": 0.13010668748373666 + }, + "n_sheep": 3 + }, + { + "sr": 1.0, + "mean_len": 749.8666666666667, + "mean_min_pen": 3.491257842381795, + "mean_act": 1.2302732761302806, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.15859288932254823, + "alignment": 0.011327628562653137, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.05334281650071124, + "step_cost": -0.0199999999999947, + "complete": 0.13335704125177808 + }, + "n_sheep": 4 + }, + { + "sr": 0.9666666666666667, + "mean_len": 920.5666666666667, + "mean_min_pen": 3.2368871172269187, + "mean_act": 1.329068384219205, + "failure_modes": { + "SUCCESS": 29, + "PARTIAL_3of5": 1 + }, + "reward_per_step": { + "progress": 0.15654392868672135, + "alignment": 0.013497823599666012, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.05359017996161784, + "step_cost": -0.019999999999992312, + "complete": 0.10500778505992686 + }, + "n_sheep": 5 + }, + { + "sr": 0.9666666666666667, + "mean_len": 1193.2333333333333, + "mean_min_pen": 3.4217512369155885, + "mean_act": 1.3575613093489967, + "failure_modes": { + "COMPACT_CANT_DRIVE": 1, + "SUCCESS": 29 + }, + "reward_per_step": { + "progress": 0.15969395095863717, + "alignment": 0.017340700156353795, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.049166131240048046, + "step_cost": -0.01999999999998991, + "complete": 0.08101237533871554 + }, + "n_sheep": 6 + }, + { + "sr": 1.0, + "mean_len": 1209.4666666666667, + "mean_min_pen": 3.2339003403981526, + "mean_act": 1.3714931576761524, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.17738547200352864, + "alignment": 0.017914342656107935, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.057876750082681075, + "step_cost": -0.019999999999989804, + "complete": 0.08268107154668725 + }, + "n_sheep": 7 + }, + { + "sr": 1.0, + "mean_len": 1491.7666666666667, + "mean_min_pen": 3.216744065284729, + "mean_act": 1.3783802580111435, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.19162546125035912, + "alignment": 0.018971863842493202, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.05362768976381472, + "step_cost": -0.01999999999998829, + "complete": 0.06703461220476839 + }, + "n_sheep": 8 + }, + { + "sr": 0.9, + "mean_len": 1627.5666666666666, + "mean_min_pen": 3.23857311407725, + "mean_act": 1.3832202011732966, + "failure_modes": { + "SUCCESS": 27, + "COMPACT_CANT_DRIVE": 3 + }, + "reward_per_step": { + "progress": 0.18015228593205654, + "alignment": 0.020407598899987247, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.05140598439388044, + "step_cost": -0.01999999999998775, + "complete": 0.055297274049194094 + }, + "n_sheep": 9 + }, + { + "sr": 0.9333333333333333, + "mean_len": 1869.9666666666667, + "mean_min_pen": 3.1344878753026326, + "mean_act": 1.3841143385300063, + "failure_modes": { + "SUCCESS": 28, + "COMPACT_CANT_DRIVE": 2 + }, + "reward_per_step": { + "progress": 0.17267533684098152, + "alignment": 0.021850885374692264, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.05222909499278062, + "step_cost": -0.019999999999986983, + "complete": 0.04991176313303267 + }, + "n_sheep": 10 + } +] \ No newline at end of file diff --git a/training/runs/v2/success_rate.png b/training/runs/v2/success_rate.png new file mode 100644 index 0000000..030c861 Binary files /dev/null and b/training/runs/v2/success_rate.png differ diff --git a/training/runs/v2/vecnorm.pkl b/training/runs/v2/vecnorm.pkl new file mode 100644 index 0000000..776703e Binary files /dev/null and b/training/runs/v2/vecnorm.pkl differ