diff --git a/training/config.json b/training/config.json index 1f25dd3..b1e5149 100644 --- a/training/config.json +++ b/training/config.json @@ -9,6 +9,6 @@ "WALL_TOUCH_BUFFER": 0.4, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, - "ENTRY_AWARE": false, + "ENTRY_AWARE": true, "ent_coef": 0.02 } diff --git a/training/runs/v1.log b/training/runs/v1.log new file mode 100644 index 0000000..1a4260e --- /dev/null +++ b/training/runs/v1.log @@ -0,0 +1,242 @@ +Config loaded from config.json +Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.01, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02} +Run dir: runs/v1 +Curriculum: 1 → 10 sheep, 1,500,000 steps/stage + + +[Stage n_sheep=1] training 1,500,000 steps + ... [1 sheep | 100,000 steps | ret(last 40)=-19.52 win_sr=2% cum_sr=2%] + ... [1 sheep | 200,000 steps | ret(last 50)=-21.11 win_sr=4% cum_sr=4%] + ... [1 sheep | 300,000 steps | ret(last 50)=-7.06 win_sr=12% cum_sr=7%] + ... [1 sheep | 400,000 steps | ret(last 50)=+18.18 win_sr=90% cum_sr=40%] + ... [1 sheep | 500,000 steps | ret(last 50)=+16.17 win_sr=100% cum_sr=69%] + ... [1 sheep | 600,000 steps | ret(last 50)=+14.68 win_sr=100% cum_sr=82%] + ... [1 sheep | 700,000 steps | ret(last 50)=+14.33 win_sr=100% cum_sr=88%] + ... [1 sheep | 800,000 steps | ret(last 50)=+14.20 win_sr=100% cum_sr=91%] + ... [1 sheep | 900,000 steps | ret(last 50)=+13.82 win_sr=100% cum_sr=93%] + ... [1 sheep | 1,000,000 steps | ret(last 50)=+13.76 win_sr=100% cum_sr=94%] + ... [1 sheep | 1,100,000 steps | ret(last 50)=+13.72 win_sr=100% cum_sr=95%] + ... [1 sheep | 1,200,000 steps | ret(last 50)=+13.41 win_sr=100% cum_sr=95%] + ... [1 sheep | 1,300,000 steps | ret(last 50)=+13.42 win_sr=100% cum_sr=96%] + ... [1 sheep | 1,400,000 steps | ret(last 50)=+13.40 win_sr=100% cum_sr=96%] + ... [1 sheep | 1,500,000 steps | ret(last 50)=+13.24 win_sr=100% cum_sr=97%] +[Stage n_sheep=1] evaluating 30 eps +[Stage n_sheep=1] sr=100% mean_len=243 mean_min_pen=3.7m mean_act=0.39 + failure modes: SUCCESS=30 + reward/step: progress=+0.1141 alignment=+0.0003 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0412 step_cost=-0.0200 complete=+0.4115 + +[Stage n_sheep=2] training 1,500,000 steps + ... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [2 sheep | 1,607,336 steps | ret(last 42)=-4.07 win_sr=7% cum_sr=7%] + ... [2 sheep | 1,707,336 steps | ret(last 50)=-6.10 win_sr=0% cum_sr=4%] + ... [2 sheep | 1,807,336 steps | ret(last 50)=-5.57 win_sr=2% cum_sr=3%] + ... [2 sheep | 1,907,336 steps | ret(last 50)=-5.04 win_sr=4% cum_sr=4%] + ... [2 sheep | 2,007,336 steps | ret(last 50)=-4.27 win_sr=10% cum_sr=5%] + ... [2 sheep | 2,107,336 steps | ret(last 50)=-4.42 win_sr=6% cum_sr=5%] + ... [2 sheep | 2,207,336 steps | ret(last 50)=+4.57 win_sr=50% cum_sr=13%] + ... [2 sheep | 2,307,336 steps | ret(last 50)=+11.35 win_sr=70% cum_sr=24%] + ... [2 sheep | 2,407,336 steps | ret(last 50)=+15.75 win_sr=86% cum_sr=32%] + ... [2 sheep | 2,507,336 steps | ret(last 50)=+19.97 win_sr=100% cum_sr=44%] + ... [2 sheep | 2,607,336 steps | ret(last 50)=+20.73 win_sr=100% cum_sr=54%] + ... [2 sheep | 2,707,336 steps | ret(last 50)=+19.81 win_sr=100% cum_sr=62%] + ... [2 sheep | 2,807,336 steps | ret(last 50)=+20.83 win_sr=100% cum_sr=67%] + ... [2 sheep | 2,907,336 steps | ret(last 50)=+20.43 win_sr=100% cum_sr=72%] + ... [2 sheep | 3,007,336 steps | ret(last 50)=+19.65 win_sr=100% cum_sr=75%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=63% mean_len=1325 mean_min_pen=3.1m mean_act=0.42 + failure modes: SUCCESS=19 PARTIAL_1of2=10 COMPACT_CANT_DRIVE=1 + reward/step: progress=+0.0453 alignment=+0.0065 compact=+0.0000 wall_touch=-0.0052 pen_bonus=+0.0123 step_cost=-0.0200 complete=+0.0478 + +[Stage n_sheep=3] training 1,500,000 steps + ... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [3 sheep | 3,114,664 steps | ret(last 50)=+28.02 win_sr=100% cum_sr=100%] + ... [3 sheep | 3,214,664 steps | ret(last 50)=+24.04 win_sr=96% cum_sr=99%] + ... [3 sheep | 3,314,664 steps | ret(last 50)=+27.02 win_sr=100% cum_sr=99%] + ... [3 sheep | 3,414,664 steps | ret(last 50)=+25.53 win_sr=100% cum_sr=99%] + ... [3 sheep | 3,514,664 steps | ret(last 50)=+25.13 win_sr=96% cum_sr=99%] + ... [3 sheep | 3,614,664 steps | ret(last 50)=+26.45 win_sr=100% cum_sr=99%] + ... [3 sheep | 3,714,664 steps | ret(last 50)=+25.83 win_sr=100% cum_sr=99%] + ... [3 sheep | 3,814,664 steps | ret(last 50)=+26.07 win_sr=100% cum_sr=99%] + ... [3 sheep | 3,914,664 steps | ret(last 50)=+25.03 win_sr=96% cum_sr=99%] + ... [3 sheep | 4,014,664 steps | ret(last 50)=+24.53 win_sr=98% cum_sr=99%] + ... [3 sheep | 4,114,664 steps | ret(last 50)=+24.98 win_sr=100% cum_sr=99%] + ... [3 sheep | 4,214,664 steps | ret(last 50)=+26.81 win_sr=100% cum_sr=99%] + ... [3 sheep | 4,314,664 steps | ret(last 50)=+24.78 win_sr=98% cum_sr=99%] + ... [3 sheep | 4,414,664 steps | ret(last 50)=+26.79 win_sr=100% cum_sr=99%] + ... [3 sheep | 4,514,664 steps | ret(last 50)=+26.26 win_sr=100% cum_sr=99%] +[Stage n_sheep=3] evaluating 30 eps +[Stage n_sheep=3] sr=97% mean_len=828 mean_min_pen=2.7m mean_act=1.15 + failure modes: SUCCESS=29 PARTIAL_1of3=1 + reward/step: progress=+0.1017 alignment=+0.0139 compact=+0.0000 wall_touch=-0.0023 pen_bonus=+0.0354 step_cost=-0.0200 complete=+0.1168 + +[Stage n_sheep=4] training 1,500,000 steps + ... [4 sheep | 4,521,992 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [4 sheep | 4,621,992 steps | ret(last 50)=+28.27 win_sr=90% cum_sr=93%] + ... [4 sheep | 4,721,992 steps | ret(last 50)=+31.16 win_sr=98% cum_sr=95%] + ... [4 sheep | 4,821,992 steps | ret(last 50)=+30.45 win_sr=100% cum_sr=96%] + ... [4 sheep | 4,921,992 steps | ret(last 50)=+31.12 win_sr=100% cum_sr=96%] + ... [4 sheep | 5,021,992 steps | ret(last 50)=+30.78 win_sr=100% cum_sr=97%] + ... [4 sheep | 5,121,992 steps | ret(last 50)=+30.42 win_sr=100% cum_sr=97%] + ... [4 sheep | 5,221,992 steps | ret(last 50)=+31.14 win_sr=100% cum_sr=98%] + ... [4 sheep | 5,321,992 steps | ret(last 50)=+31.20 win_sr=100% cum_sr=98%] + ... [4 sheep | 5,421,992 steps | ret(last 50)=+30.47 win_sr=98% cum_sr=98%] + ... [4 sheep | 5,521,992 steps | ret(last 50)=+30.13 win_sr=100% cum_sr=98%] + ... [4 sheep | 5,621,992 steps | ret(last 50)=+28.52 win_sr=98% cum_sr=98%] + ... [4 sheep | 5,721,992 steps | ret(last 50)=+31.40 win_sr=100% cum_sr=98%] + ... [4 sheep | 5,821,992 steps | ret(last 50)=+30.30 win_sr=100% cum_sr=99%] + ... [4 sheep | 5,921,992 steps | ret(last 50)=+29.45 win_sr=100% cum_sr=99%] + ... [4 sheep | 6,021,992 steps | ret(last 50)=+30.26 win_sr=100% cum_sr=99%] +[Stage n_sheep=4] evaluating 30 eps +[Stage n_sheep=4] sr=57% mean_len=1686 mean_min_pen=1.8m mean_act=1.01 + failure modes: SUCCESS=17 PARTIAL_1of4=10 PARTIAL_2of4=3 + reward/step: progress=+0.0721 alignment=+0.0085 compact=+0.0000 wall_touch=-0.0110 pen_bonus=+0.0166 step_cost=-0.0200 complete=+0.0336 + +[Stage n_sheep=5] training 1,500,000 steps + ... [5 sheep | 6,029,320 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [5 sheep | 6,129,320 steps | ret(last 50)=+34.59 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,229,320 steps | ret(last 50)=+35.53 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,329,320 steps | ret(last 50)=+34.77 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,429,320 steps | ret(last 50)=+34.30 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,529,320 steps | ret(last 50)=+35.12 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,629,320 steps | ret(last 50)=+33.76 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,729,320 steps | ret(last 50)=+34.81 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,829,320 steps | ret(last 50)=+31.82 win_sr=100% cum_sr=100%] + ... [5 sheep | 6,929,320 steps | ret(last 50)=+33.69 win_sr=98% cum_sr=100%] + ... [5 sheep | 7,029,320 steps | ret(last 50)=+31.65 win_sr=100% cum_sr=100%] + ... [5 sheep | 7,129,320 steps | ret(last 50)=+31.83 win_sr=96% cum_sr=99%] + ... [5 sheep | 7,229,320 steps | ret(last 50)=+33.96 win_sr=100% cum_sr=99%] + ... [5 sheep | 7,329,320 steps | ret(last 50)=+33.65 win_sr=98% cum_sr=99%] + ... [5 sheep | 7,429,320 steps | ret(last 50)=+34.20 win_sr=100% cum_sr=99%] + ... [5 sheep | 7,529,320 steps | ret(last 50)=+35.27 win_sr=98% cum_sr=99%] +[Stage n_sheep=5] evaluating 30 eps +[Stage n_sheep=5] sr=63% mean_len=1654 mean_min_pen=1.6m mean_act=1.36 + failure modes: SUCCESS=19 PARTIAL_2of5=9 PARTIAL_3of5=2 + reward/step: progress=+0.1043 alignment=+0.0108 compact=+0.0000 wall_touch=-0.0100 pen_bonus=+0.0240 step_cost=-0.0200 complete=+0.0383 + +[Stage n_sheep=6] training 1,500,000 steps + ... [6 sheep | 7,536,648 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [6 sheep | 7,636,648 steps | ret(last 50)=+37.79 win_sr=98% cum_sr=95%] + ... [6 sheep | 7,736,648 steps | ret(last 50)=+38.87 win_sr=94% cum_sr=95%] + ... [6 sheep | 7,836,648 steps | ret(last 50)=+37.50 win_sr=98% cum_sr=96%] + ... [6 sheep | 7,936,648 steps | ret(last 50)=+39.42 win_sr=94% cum_sr=96%] + ... [6 sheep | 8,036,648 steps | ret(last 50)=+38.28 win_sr=98% cum_sr=96%] + ... [6 sheep | 8,136,648 steps | ret(last 50)=+36.39 win_sr=100% cum_sr=97%] + ... [6 sheep | 8,236,648 steps | ret(last 50)=+39.29 win_sr=100% cum_sr=97%] + ... [6 sheep | 8,336,648 steps | ret(last 50)=+37.92 win_sr=98% cum_sr=97%] + ... [6 sheep | 8,436,648 steps | ret(last 50)=+38.64 win_sr=98% cum_sr=97%] + ... [6 sheep | 8,536,648 steps | ret(last 50)=+38.46 win_sr=98% cum_sr=97%] + ... [6 sheep | 8,636,648 steps | ret(last 50)=+38.08 win_sr=98% cum_sr=97%] + ... [6 sheep | 8,736,648 steps | ret(last 50)=+36.78 win_sr=100% cum_sr=97%] + ... [6 sheep | 8,836,648 steps | ret(last 50)=+36.81 win_sr=98% cum_sr=98%] + ... [6 sheep | 8,936,648 steps | ret(last 50)=+37.89 win_sr=98% cum_sr=98%] + ... [6 sheep | 9,036,648 steps | ret(last 50)=+36.17 win_sr=98% cum_sr=98%] +[Stage n_sheep=6] evaluating 30 eps +[Stage n_sheep=6] sr=33% mean_len=2161 mean_min_pen=1.8m mean_act=1.37 + failure modes: PARTIAL_5of6=14 SUCCESS=10 PARTIAL_4of6=4 COMPACT_CANT_DRIVE=2 + reward/step: progress=+0.0915 alignment=+0.0102 compact=+0.0000 wall_touch=-0.0068 pen_bonus=+0.0225 step_cost=-0.0200 complete=+0.0154 + +[Stage n_sheep=7] training 1,500,000 steps + ... [7 sheep | 9,043,976 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [7 sheep | 9,143,976 steps | ret(last 50)=+39.61 win_sr=98% cum_sr=97%] + ... [7 sheep | 9,243,976 steps | ret(last 50)=+42.39 win_sr=100% cum_sr=99%] + ... [7 sheep | 9,343,976 steps | ret(last 50)=+39.89 win_sr=96% cum_sr=98%] + ... [7 sheep | 9,443,976 steps | ret(last 50)=+42.48 win_sr=98% cum_sr=98%] + ... [7 sheep | 9,543,976 steps | ret(last 50)=+39.16 win_sr=98% cum_sr=98%] + ... [7 sheep | 9,643,976 steps | ret(last 50)=+38.80 win_sr=96% cum_sr=98%] + ... [7 sheep | 9,743,976 steps | ret(last 50)=+43.06 win_sr=96% cum_sr=98%] + ... [7 sheep | 9,843,976 steps | ret(last 50)=+40.04 win_sr=94% cum_sr=98%] + ... [7 sheep | 9,943,976 steps | ret(last 50)=+40.45 win_sr=98% cum_sr=97%] + ... [7 sheep | 10,043,976 steps | ret(last 50)=+39.21 win_sr=96% cum_sr=97%] + ... [7 sheep | 10,143,976 steps | ret(last 50)=+40.23 win_sr=100% cum_sr=97%] + ... [7 sheep | 10,243,976 steps | ret(last 50)=+41.51 win_sr=96% cum_sr=97%] + ... [7 sheep | 10,343,976 steps | ret(last 50)=+40.05 win_sr=98% cum_sr=97%] + ... [7 sheep | 10,443,976 steps | ret(last 50)=+39.17 win_sr=96% cum_sr=97%] + ... [7 sheep | 10,543,976 steps | ret(last 50)=+41.80 win_sr=98% cum_sr=97%] +[Stage n_sheep=7] evaluating 30 eps +[Stage n_sheep=7] sr=20% mean_len=2211 mean_min_pen=1.8m mean_act=1.36 + failure modes: PARTIAL_4of7=11 PARTIAL_3of7=8 SUCCESS=6 PARTIAL_6of7=3 PARTIAL_5of7=2 + reward/step: progress=+0.0879 alignment=+0.0086 compact=+0.0000 wall_touch=-0.0142 pen_bonus=+0.0208 step_cost=-0.0200 complete=+0.0090 + +[Stage n_sheep=8] training 1,500,000 steps + ... [8 sheep | 10,551,304 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [8 sheep | 10,651,304 steps | ret(last 50)=+48.67 win_sr=96% cum_sr=97%] + ... [8 sheep | 10,751,304 steps | ret(last 50)=+46.60 win_sr=96% cum_sr=97%] + ... [8 sheep | 10,851,304 steps | ret(last 50)=+41.39 win_sr=98% cum_sr=96%] + ... [8 sheep | 10,951,304 steps | ret(last 50)=+41.47 win_sr=96% cum_sr=96%] + ... [8 sheep | 11,051,304 steps | ret(last 50)=+40.29 win_sr=96% cum_sr=95%] + ... [8 sheep | 11,151,304 steps | ret(last 50)=+42.96 win_sr=100% cum_sr=96%] + ... [8 sheep | 11,251,304 steps | ret(last 50)=+42.87 win_sr=94% cum_sr=96%] + ... [8 sheep | 11,351,304 steps | ret(last 50)=+44.71 win_sr=100% cum_sr=96%] + ... [8 sheep | 11,451,304 steps | ret(last 50)=+45.20 win_sr=96% cum_sr=96%] + ... [8 sheep | 11,551,304 steps | ret(last 50)=+46.82 win_sr=96% cum_sr=96%] + ... [8 sheep | 11,651,304 steps | ret(last 50)=+43.23 win_sr=96% cum_sr=96%] + ... [8 sheep | 11,751,304 steps | ret(last 50)=+43.77 win_sr=94% cum_sr=96%] + ... [8 sheep | 11,851,304 steps | ret(last 50)=+48.78 win_sr=98% cum_sr=96%] + ... [8 sheep | 11,951,304 steps | ret(last 50)=+43.19 win_sr=94% cum_sr=96%] + ... [8 sheep | 12,051,304 steps | ret(last 50)=+42.83 win_sr=96% cum_sr=96%] +[Stage n_sheep=8] evaluating 30 eps +[Stage n_sheep=8] sr=63% mean_len=1745 mean_min_pen=1.7m mean_act=1.37 + failure modes: SUCCESS=19 PARTIAL_4of8=9 PARTIAL_1of8=1 PARTIAL_6of8=1 + reward/step: progress=+0.1198 alignment=+0.0134 compact=+0.0000 wall_touch=-0.0107 pen_bonus=+0.0373 step_cost=-0.0200 complete=+0.0363 + +[Stage n_sheep=9] training 1,500,000 steps + ... [9 sheep | 12,058,632 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [9 sheep | 12,158,632 steps | ret(last 50)=+49.04 win_sr=98% cum_sr=97%] + ... [9 sheep | 12,258,632 steps | ret(last 50)=+47.01 win_sr=96% cum_sr=97%] + ... [9 sheep | 12,358,632 steps | ret(last 50)=+48.47 win_sr=90% cum_sr=95%] + ... [9 sheep | 12,458,632 steps | ret(last 50)=+46.43 win_sr=88% cum_sr=94%] + ... [9 sheep | 12,558,632 steps | ret(last 50)=+44.78 win_sr=94% cum_sr=94%] + ... [9 sheep | 12,658,632 steps | ret(last 50)=+49.15 win_sr=100% cum_sr=95%] + ... [9 sheep | 12,758,632 steps | ret(last 50)=+47.87 win_sr=94% cum_sr=95%] + ... [9 sheep | 12,858,632 steps | ret(last 50)=+50.32 win_sr=96% cum_sr=95%] + ... [9 sheep | 12,958,632 steps | ret(last 50)=+47.07 win_sr=94% cum_sr=95%] + ... [9 sheep | 13,058,632 steps | ret(last 50)=+48.71 win_sr=100% cum_sr=96%] + ... [9 sheep | 13,158,632 steps | ret(last 50)=+47.69 win_sr=96% cum_sr=96%] + ... [9 sheep | 13,258,632 steps | ret(last 50)=+46.83 win_sr=98% cum_sr=96%] + ... [9 sheep | 13,358,632 steps | ret(last 50)=+48.27 win_sr=94% cum_sr=96%] + ... [9 sheep | 13,458,632 steps | ret(last 50)=+47.61 win_sr=88% cum_sr=95%] + ... [9 sheep | 13,558,632 steps | ret(last 50)=+47.29 win_sr=96% cum_sr=95%] +[Stage n_sheep=9] evaluating 30 eps +[Stage n_sheep=9] sr=83% mean_len=1723 mean_min_pen=1.8m mean_act=1.38 + failure modes: SUCCESS=25 PARTIAL_5of9=3 NEVER_COMPACT=1 PARTIAL_6of9=1 + reward/step: progress=+0.1562 alignment=+0.0155 compact=+0.0000 wall_touch=-0.0073 pen_bonus=+0.0480 step_cost=-0.0200 complete=+0.0484 + +[Stage n_sheep=10] training 1,500,000 steps + ... [10 sheep | 13,565,960 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [10 sheep | 13,665,960 steps | ret(last 50)=+50.02 win_sr=94% cum_sr=95%] + ... [10 sheep | 13,765,960 steps | ret(last 50)=+48.46 win_sr=84% cum_sr=90%] + ... [10 sheep | 13,865,960 steps | ret(last 50)=+48.24 win_sr=84% cum_sr=87%] + ... [10 sheep | 13,965,960 steps | ret(last 50)=+50.64 win_sr=82% cum_sr=86%] + ... [10 sheep | 14,065,960 steps | ret(last 50)=+53.92 win_sr=86% cum_sr=86%] + ... [10 sheep | 14,165,960 steps | ret(last 50)=+51.33 win_sr=80% cum_sr=85%] + ... [10 sheep | 14,265,960 steps | ret(last 50)=+49.55 win_sr=76% cum_sr=84%] + ... [10 sheep | 14,365,960 steps | ret(last 50)=+51.05 win_sr=92% cum_sr=85%] + ... [10 sheep | 14,465,960 steps | ret(last 50)=+47.58 win_sr=86% cum_sr=85%] + ... [10 sheep | 14,565,960 steps | ret(last 50)=+47.98 win_sr=74% cum_sr=84%] + ... [10 sheep | 14,665,960 steps | ret(last 50)=+50.60 win_sr=82% cum_sr=84%] + ... [10 sheep | 14,765,960 steps | ret(last 50)=+51.25 win_sr=88% cum_sr=84%] + ... [10 sheep | 14,865,960 steps | ret(last 50)=+50.54 win_sr=92% cum_sr=85%] + ... [10 sheep | 14,965,960 steps | ret(last 50)=+50.94 win_sr=92% cum_sr=86%] + ... [10 sheep | 15,065,960 steps | ret(last 50)=+50.54 win_sr=90% cum_sr=86%] +[Stage n_sheep=10] evaluating 30 eps +[Stage n_sheep=10] sr=27% mean_len=2267 mean_min_pen=2.2m mean_act=1.38 + failure modes: PARTIAL_6of10=16 SUCCESS=8 COMPACT_CANT_DRIVE=2 PARTIAL_7of10=1 PARTIAL_9of10=1 PARTIAL_5of10=1 PARTIAL_8of10=1 + reward/step: progress=+0.1360 alignment=+0.0134 compact=+0.0000 wall_touch=-0.0122 pen_bonus=+0.0301 step_cost=-0.0200 complete=+0.0118 + +====================================================================== + TRAINING SUMMARY +====================================================================== + n_sheep=1 sr=100% len= 243 min_pen= 3.7m act=0.39 + n_sheep=2 sr= 63% len= 1325 min_pen= 3.1m act=0.42 + n_sheep=3 sr= 97% len= 828 min_pen= 2.7m act=1.15 + n_sheep=4 sr= 57% len= 1686 min_pen= 1.8m act=1.01 + n_sheep=5 sr= 63% len= 1654 min_pen= 1.6m act=1.36 + n_sheep=6 sr= 33% len= 2161 min_pen= 1.8m act=1.37 + n_sheep=7 sr= 20% len= 2211 min_pen= 1.8m act=1.36 + n_sheep=8 sr= 63% len= 1745 min_pen= 1.7m act=1.37 + n_sheep=9 sr= 83% len= 1723 min_pen= 1.8m act=1.38 + n_sheep=10 sr= 27% len= 2267 min_pen= 2.2m act=1.38 + + Total time: 97.6 min + Artefacts: runs/v1/ + Plots: runs/v1/success_rate.png, runs/v1/eval/ diff --git a/training/runs/v1/config.json b/training/runs/v1/config.json new file mode 100644 index 0000000..2ac8343 --- /dev/null +++ b/training/runs/v1/config.json @@ -0,0 +1,14 @@ +{ + "W_PER_SHEEP": 2.0, + "W_ALIGN": 0.05, + "W_PEN_BONUS": 10.0, + "W_COMPLETE": 100.0, + "W_STEP_COST": 0.02, + "W_COMPACT": 0.0, + "W_WALL_TOUCH": 0.01, + "WALL_TOUCH_BUFFER": 0.4, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": true, + "ENTRY_AWARE": true, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/v1/eval/traj_10s_fail.png b/training/runs/v1/eval/traj_10s_fail.png new file mode 100644 index 0000000..427a6f4 Binary files /dev/null and b/training/runs/v1/eval/traj_10s_fail.png differ diff --git a/training/runs/v1/eval/traj_1s_success.png b/training/runs/v1/eval/traj_1s_success.png new file mode 100644 index 0000000..2658807 Binary files /dev/null and b/training/runs/v1/eval/traj_1s_success.png differ diff --git a/training/runs/v1/eval/traj_2s_success.png b/training/runs/v1/eval/traj_2s_success.png new file mode 100644 index 0000000..bac6823 Binary files /dev/null and b/training/runs/v1/eval/traj_2s_success.png differ diff --git a/training/runs/v1/eval/traj_3s_success.png b/training/runs/v1/eval/traj_3s_success.png new file mode 100644 index 0000000..22e76c8 Binary files /dev/null and b/training/runs/v1/eval/traj_3s_success.png differ diff --git a/training/runs/v1/eval/traj_4s_success.png b/training/runs/v1/eval/traj_4s_success.png new file mode 100644 index 0000000..6d9574d Binary files /dev/null and b/training/runs/v1/eval/traj_4s_success.png differ diff --git a/training/runs/v1/eval/traj_5s_fail.png b/training/runs/v1/eval/traj_5s_fail.png new file mode 100644 index 0000000..e7b7026 Binary files /dev/null and b/training/runs/v1/eval/traj_5s_fail.png differ diff --git a/training/runs/v1/eval/traj_6s_fail.png b/training/runs/v1/eval/traj_6s_fail.png new file mode 100644 index 0000000..1e07f4f Binary files /dev/null and b/training/runs/v1/eval/traj_6s_fail.png differ diff --git a/training/runs/v1/eval/traj_7s_fail.png b/training/runs/v1/eval/traj_7s_fail.png new file mode 100644 index 0000000..5ed880c Binary files /dev/null and b/training/runs/v1/eval/traj_7s_fail.png differ diff --git a/training/runs/v1/eval/traj_8s_fail.png b/training/runs/v1/eval/traj_8s_fail.png new file mode 100644 index 0000000..0001a69 Binary files /dev/null and b/training/runs/v1/eval/traj_8s_fail.png differ diff --git a/training/runs/v1/eval/traj_9s_success.png b/training/runs/v1/eval/traj_9s_success.png new file mode 100644 index 0000000..9b2cd89 Binary files /dev/null and b/training/runs/v1/eval/traj_9s_success.png differ diff --git a/training/runs/v1/eval/ts_10s_fail.png b/training/runs/v1/eval/ts_10s_fail.png new file mode 100644 index 0000000..b598241 Binary files /dev/null and b/training/runs/v1/eval/ts_10s_fail.png differ diff --git a/training/runs/v1/eval/ts_1s_success.png b/training/runs/v1/eval/ts_1s_success.png new file mode 100644 index 0000000..df6dc3a Binary files /dev/null and b/training/runs/v1/eval/ts_1s_success.png differ diff --git a/training/runs/v1/eval/ts_2s_success.png b/training/runs/v1/eval/ts_2s_success.png new file mode 100644 index 0000000..deb1d46 Binary files /dev/null and b/training/runs/v1/eval/ts_2s_success.png differ diff --git a/training/runs/v1/eval/ts_3s_success.png b/training/runs/v1/eval/ts_3s_success.png new file mode 100644 index 0000000..ac27553 Binary files /dev/null and b/training/runs/v1/eval/ts_3s_success.png differ diff --git a/training/runs/v1/eval/ts_4s_success.png b/training/runs/v1/eval/ts_4s_success.png new file mode 100644 index 0000000..fc75c1f Binary files /dev/null and b/training/runs/v1/eval/ts_4s_success.png differ diff --git a/training/runs/v1/eval/ts_5s_fail.png b/training/runs/v1/eval/ts_5s_fail.png new file mode 100644 index 0000000..0125796 Binary files /dev/null and b/training/runs/v1/eval/ts_5s_fail.png differ diff --git a/training/runs/v1/eval/ts_6s_fail.png b/training/runs/v1/eval/ts_6s_fail.png new file mode 100644 index 0000000..d8e4ca1 Binary files /dev/null and b/training/runs/v1/eval/ts_6s_fail.png differ diff --git a/training/runs/v1/eval/ts_7s_fail.png b/training/runs/v1/eval/ts_7s_fail.png new file mode 100644 index 0000000..e17d272 Binary files /dev/null and b/training/runs/v1/eval/ts_7s_fail.png differ diff --git a/training/runs/v1/eval/ts_8s_fail.png b/training/runs/v1/eval/ts_8s_fail.png new file mode 100644 index 0000000..4e16ce9 Binary files /dev/null and b/training/runs/v1/eval/ts_8s_fail.png differ diff --git a/training/runs/v1/eval/ts_9s_success.png b/training/runs/v1/eval/ts_9s_success.png new file mode 100644 index 0000000..8f72338 Binary files /dev/null and b/training/runs/v1/eval/ts_9s_success.png differ diff --git a/training/runs/v1/final_model.zip b/training/runs/v1/final_model.zip new file mode 100644 index 0000000..af81e54 Binary files /dev/null and b/training/runs/v1/final_model.zip differ diff --git a/training/runs/v1/stage_results.json b/training/runs/v1/stage_results.json new file mode 100644 index 0000000..700d5db --- /dev/null +++ b/training/runs/v1/stage_results.json @@ -0,0 +1,218 @@ +[ + { + "sr": 1.0, + "mean_len": 243.0, + "mean_min_pen": 3.7120999256769815, + "mean_act": 0.3930775734995823, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.11411363949746262, + "alignment": 0.00034729298515464674, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.0411522633744856, + "step_cost": -0.020000000000000108, + "complete": 0.411522633744856 + }, + "n_sheep": 1 + }, + { + "sr": 0.6333333333333333, + "mean_len": 1324.9333333333334, + "mean_min_pen": 3.108120004336039, + "mean_act": 0.41626948835668365, + "failure_modes": { + "SUCCESS": 19, + "PARTIAL_1of2": 10, + "COMPACT_CANT_DRIVE": 1 + }, + "reward_per_step": { + "progress": 0.045259184195888084, + "alignment": 0.006548802090560675, + "compact": 0.0, + "wall_touch": -0.005243643148915256, + "pen_bonus": 0.012327664284995472, + "step_cost": -0.019999999999989106, + "complete": 0.04780114722753346 + }, + "n_sheep": 2 + }, + { + "sr": 0.9666666666666667, + "mean_len": 827.7, + "mean_min_pen": 2.727696478366852, + "mean_act": 1.1521936838813016, + "failure_modes": { + "PARTIAL_1of3": 1, + "SUCCESS": 29 + }, + "reward_per_step": { + "progress": 0.10166334638295625, + "alignment": 0.013859153429505626, + "compact": 0.0, + "wall_touch": -0.0022604217500245883, + "pen_bonus": 0.03543957150336273, + "step_cost": -0.019999999999993488, + "complete": 0.11678949699971809 + }, + "n_sheep": 3 + }, + { + "sr": 0.5666666666666667, + "mean_len": 1686.0333333333333, + "mean_min_pen": 1.7675368865331014, + "mean_act": 1.0093803780622697, + "failure_modes": { + "PARTIAL_1of4": 10, + "SUCCESS": 17, + "PARTIAL_2of4": 3 + }, + "reward_per_step": { + "progress": 0.07213990871824405, + "alignment": 0.008500170591885925, + "compact": 0.0, + "wall_touch": -0.01096873654520888, + "pen_bonus": 0.016607026353769202, + "step_cost": -0.019999999999987545, + "complete": 0.03360945809691386 + }, + "n_sheep": 4 + }, + { + "sr": 0.6333333333333333, + "mean_len": 1653.8333333333333, + "mean_min_pen": 1.6310479640960693, + "mean_act": 1.3572492104366454, + "failure_modes": { + "PARTIAL_2of5": 9, + "SUCCESS": 19, + "PARTIAL_3of5": 2 + }, + "reward_per_step": { + "progress": 0.10426509678994506, + "alignment": 0.010847962450905363, + "compact": 0.0, + "wall_touch": -0.010001784418012447, + "pen_bonus": 0.02398468205179885, + "step_cost": -0.019999999999987656, + "complete": 0.038294870502872114 + }, + "n_sheep": 5 + }, + { + "sr": 0.3333333333333333, + "mean_len": 2161.0333333333333, + "mean_min_pen": 1.7910769859949747, + "mean_act": 1.3728399181766682, + "failure_modes": { + "SUCCESS": 10, + "PARTIAL_5of6": 14, + "PARTIAL_4of6": 4, + "COMPACT_CANT_DRIVE": 2 + }, + "reward_per_step": { + "progress": 0.09152597398477412, + "alignment": 0.010169068168091603, + "compact": 0.0, + "wall_touch": -0.006849364742307595, + "pen_bonus": 0.022520090697351575, + "step_cost": -0.019999999999986286, + "complete": 0.015424719655720258 + }, + "n_sheep": 6 + }, + { + "sr": 0.2, + "mean_len": 2211.2, + "mean_min_pen": 1.8339664101600648, + "mean_act": 1.3635542380694952, + "failure_modes": { + "PARTIAL_5of7": 2, + "SUCCESS": 6, + "PARTIAL_6of7": 3, + "PARTIAL_3of7": 8, + "PARTIAL_4of7": 11 + }, + "reward_per_step": { + "progress": 0.08794138462674025, + "alignment": 0.008588877237149285, + "compact": 0.0, + "wall_touch": -0.014176997336213705, + "pen_bonus": 0.020803183791606367, + "step_cost": -0.019999999999986185, + "complete": 0.009044862518089725 + }, + "n_sheep": 7 + }, + { + "sr": 0.6333333333333333, + "mean_len": 1744.5666666666666, + "mean_min_pen": 1.7331914146741232, + "mean_act": 1.366222499606064, + "failure_modes": { + "SUCCESS": 19, + "PARTIAL_4of8": 9, + "PARTIAL_1of8": 1, + "PARTIAL_6of8": 1 + }, + "reward_per_step": { + "progress": 0.11981066786559799, + "alignment": 0.013385751275637974, + "compact": 0.0, + "wall_touch": -0.010737474453107049, + "pen_bonus": 0.037258536026138295, + "step_cost": -0.019999999999987354, + "complete": 0.03630318894854501 + }, + "n_sheep": 8 + }, + { + "sr": 0.8333333333333334, + "mean_len": 1723.1333333333334, + "mean_min_pen": 1.7584208091100058, + "mean_act": 1.3848404770822742, + "failure_modes": { + "NEVER_COMPACT": 1, + "SUCCESS": 25, + "PARTIAL_5of9": 3, + "PARTIAL_6of9": 1 + }, + "reward_per_step": { + "progress": 0.1561655017464111, + "alignment": 0.01548957874142236, + "compact": 0.0, + "wall_touch": -0.00728069638883058, + "pen_bonus": 0.04797461987851588, + "step_cost": -0.019999999999987424, + "complete": 0.048361511974310364 + }, + "n_sheep": 9 + }, + { + "sr": 0.26666666666666666, + "mean_len": 2266.5333333333333, + "mean_min_pen": 2.1789512236913047, + "mean_act": 1.3836169439830954, + "failure_modes": { + "PARTIAL_6of10": 16, + "SUCCESS": 8, + "PARTIAL_7of10": 1, + "PARTIAL_9of10": 1, + "PARTIAL_5of10": 1, + "COMPACT_CANT_DRIVE": 2, + "PARTIAL_8of10": 1 + }, + "reward_per_step": { + "progress": 0.13603502511276877, + "alignment": 0.013359252519942029, + "compact": 0.0, + "wall_touch": -0.012232639033891629, + "pen_bonus": 0.030148832284252015, + "step_cost": -0.019999999999986078, + "complete": 0.011765397964586153 + }, + "n_sheep": 10 + } +] \ No newline at end of file diff --git a/training/runs/v1/success_rate.png b/training/runs/v1/success_rate.png new file mode 100644 index 0000000..ec3f546 Binary files /dev/null and b/training/runs/v1/success_rate.png differ diff --git a/training/runs/v1/vecnorm.pkl b/training/runs/v1/vecnorm.pkl new file mode 100644 index 0000000..1f6e51e Binary files /dev/null and b/training/runs/v1/vecnorm.pkl differ diff --git a/training/train.py b/training/train.py index dc19371..6f07838 100644 --- a/training/train.py +++ b/training/train.py @@ -384,7 +384,7 @@ DEFAULT_CONFIG = { "WALL_TOUCH_BUFFER": 0.3, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": True, - "ENTRY_AWARE": False, + "ENTRY_AWARE": True, "ent_coef": 0.02, }