diff --git a/training/runs/v2.log b/training/runs/v2.log
new file mode 100644
index 0000000..4fe7c39
--- /dev/null
+++ b/training/runs/v2.log
@@ -0,0 +1,242 @@
+Config loaded from config.json
+Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
+Run dir: runs/v2
+Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
+
+
+[Stage n_sheep=1] training 1,500,000 steps
+           ... [1 sheep | 100,000 steps | ret(last 40)=-23.39  win_sr=8%  cum_sr=8%]
+           ... [1 sheep | 200,000 steps | ret(last 50)=-22.10  win_sr=10%  cum_sr=9%]
+           ... [1 sheep | 300,000 steps | ret(last 50)=-23.02  win_sr=10%  cum_sr=10%]
+           ... [1 sheep | 400,000 steps | ret(last 50)=-18.97  win_sr=18%  cum_sr=12%]
+           ... [1 sheep | 500,000 steps | ret(last 50)=-20.01  win_sr=8%  cum_sr=11%]
+           ... [1 sheep | 600,000 steps | ret(last 50)=-18.57  win_sr=14%  cum_sr=12%]
+           ... [1 sheep | 700,000 steps | ret(last 50)=-17.55  win_sr=22%  cum_sr=14%]
+           ... [1 sheep | 800,000 steps | ret(last 50)=+7.41  win_sr=66%  cum_sr=23%]
+           ... [1 sheep | 900,000 steps | ret(last 50)=+17.61  win_sr=100%  cum_sr=47%]
+           ... [1 sheep | 1,000,000 steps | ret(last 50)=+16.11  win_sr=100%  cum_sr=65%]
+           ... [1 sheep | 1,100,000 steps | ret(last 50)=+15.82  win_sr=100%  cum_sr=74%]
+           ... [1 sheep | 1,200,000 steps | ret(last 50)=+14.33  win_sr=100%  cum_sr=80%]
+           ... [1 sheep | 1,300,000 steps | ret(last 50)=+14.19  win_sr=100%  cum_sr=84%]
+           ... [1 sheep | 1,400,000 steps | ret(last 50)=+14.00  win_sr=100%  cum_sr=87%]
+           ... [1 sheep | 1,500,000 steps | ret(last 50)=+13.96  win_sr=100%  cum_sr=89%]
+[Stage n_sheep=1] evaluating 30 eps
+[Stage n_sheep=1] sr=100%  mean_len=234  mean_min_pen=3.7m  mean_act=0.41
+  failure modes: SUCCESS=30
+  reward/step: progress=+0.1118  alignment=+0.0003  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0427  step_cost=-0.0200  complete=+0.4274
+
+[Stage n_sheep=2] training 1,500,000 steps
+           ... [2 sheep | 1,507,336 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [2 sheep | 1,607,336 steps | ret(last 40)=-4.45  win_sr=8%  cum_sr=8%]
+           ... [2 sheep | 1,707,336 steps | ret(last 50)=-4.56  win_sr=8%  cum_sr=9%]
+           ... [2 sheep | 1,807,336 steps | ret(last 50)=-2.33  win_sr=12%  cum_sr=10%]
+           ... [2 sheep | 1,907,336 steps | ret(last 50)=+1.93  win_sr=24%  cum_sr=14%]
+           ... [2 sheep | 2,007,336 steps | ret(last 50)=+7.32  win_sr=52%  cum_sr=24%]
+           ... [2 sheep | 2,107,336 steps | ret(last 50)=+10.52  win_sr=58%  cum_sr=30%]
+           ... [2 sheep | 2,207,336 steps | ret(last 50)=+15.67  win_sr=76%  cum_sr=39%]
+           ... [2 sheep | 2,307,336 steps | ret(last 50)=+16.91  win_sr=78%  cum_sr=46%]
+           ... [2 sheep | 2,407,336 steps | ret(last 50)=+21.91  win_sr=96%  cum_sr=53%]
+           ... [2 sheep | 2,507,336 steps | ret(last 50)=+21.08  win_sr=94%  cum_sr=60%]
+           ... [2 sheep | 2,607,336 steps | ret(last 50)=+20.24  win_sr=92%  cum_sr=65%]
+           ... [2 sheep | 2,707,336 steps | ret(last 50)=+21.40  win_sr=96%  cum_sr=70%]
+           ... [2 sheep | 2,807,336 steps | ret(last 50)=+21.95  win_sr=100%  cum_sr=73%]
+           ... [2 sheep | 2,907,336 steps | ret(last 50)=+20.73  win_sr=100%  cum_sr=76%]
+           ... [2 sheep | 3,007,336 steps | ret(last 50)=+21.25  win_sr=100%  cum_sr=79%]
+[Stage n_sheep=2] evaluating 30 eps
+[Stage n_sheep=2] sr=87%  mean_len=1064  mean_min_pen=4.1m  mean_act=0.59
+  failure modes: SUCCESS=26  COMPACT_CANT_DRIVE=4
+  reward/step: progress=+0.0565  alignment=+0.0071  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0163  step_cost=-0.0200  complete=+0.0815
+
+[Stage n_sheep=3] training 1,500,000 steps
+           ... [3 sheep | 3,014,664 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [3 sheep | 3,114,664 steps | ret(last 50)=+17.60  win_sr=72%  cum_sr=73%]
+           ... [3 sheep | 3,214,664 steps | ret(last 50)=+25.44  win_sr=98%  cum_sr=87%]
+           ... [3 sheep | 3,314,664 steps | ret(last 50)=+25.73  win_sr=92%  cum_sr=90%]
+           ... [3 sheep | 3,414,664 steps | ret(last 50)=+28.01  win_sr=98%  cum_sr=92%]
+           ... [3 sheep | 3,514,664 steps | ret(last 50)=+25.71  win_sr=94%  cum_sr=93%]
+           ... [3 sheep | 3,614,664 steps | ret(last 50)=+24.73  win_sr=94%  cum_sr=93%]
+           ... [3 sheep | 3,714,664 steps | ret(last 50)=+23.51  win_sr=88%  cum_sr=92%]
+           ... [3 sheep | 3,814,664 steps | ret(last 50)=+25.11  win_sr=96%  cum_sr=93%]
+           ... [3 sheep | 3,914,664 steps | ret(last 50)=+27.02  win_sr=100%  cum_sr=93%]
+           ... [3 sheep | 4,014,664 steps | ret(last 50)=+24.67  win_sr=94%  cum_sr=94%]
+           ... [3 sheep | 4,114,664 steps | ret(last 50)=+26.08  win_sr=98%  cum_sr=94%]
+           ... [3 sheep | 4,214,664 steps | ret(last 50)=+26.69  win_sr=98%  cum_sr=94%]
+           ... [3 sheep | 4,314,664 steps | ret(last 50)=+24.01  win_sr=92%  cum_sr=94%]
+           ... [3 sheep | 4,414,664 steps | ret(last 50)=+25.74  win_sr=98%  cum_sr=94%]
+           ... [3 sheep | 4,514,664 steps | ret(last 50)=+27.43  win_sr=100%  cum_sr=95%]
+[Stage n_sheep=3] evaluating 30 eps
+[Stage n_sheep=3] sr=100%  mean_len=769  mean_min_pen=3.5m  mean_act=0.72
+  failure modes: SUCCESS=30
+  reward/step: progress=+0.1121  alignment=+0.0078  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0390  step_cost=-0.0200  complete=+0.1301
+
+[Stage n_sheep=4] training 1,500,000 steps
+           ... [4 sheep | 4,521,992 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [4 sheep | 4,621,992 steps | ret(last 50)=+32.50  win_sr=100%  cum_sr=96%]
+           ... [4 sheep | 4,721,992 steps | ret(last 50)=+31.21  win_sr=100%  cum_sr=98%]
+           ... [4 sheep | 4,821,992 steps | ret(last 50)=+34.05  win_sr=100%  cum_sr=99%]
+           ... [4 sheep | 4,921,992 steps | ret(last 50)=+32.04  win_sr=100%  cum_sr=99%]
+           ... [4 sheep | 5,021,992 steps | ret(last 50)=+29.20  win_sr=100%  cum_sr=99%]
+           ... [4 sheep | 5,121,992 steps | ret(last 50)=+31.56  win_sr=100%  cum_sr=99%]
+           ... [4 sheep | 5,221,992 steps | ret(last 50)=+31.25  win_sr=100%  cum_sr=100%]
+           ... [4 sheep | 5,321,992 steps | ret(last 50)=+30.62  win_sr=100%  cum_sr=100%]
+           ... [4 sheep | 5,421,992 steps | ret(last 50)=+30.44  win_sr=100%  cum_sr=100%]
+           ... [4 sheep | 5,521,992 steps | ret(last 50)=+32.84  win_sr=100%  cum_sr=100%]
+           ... [4 sheep | 5,621,992 steps | ret(last 50)=+30.98  win_sr=100%  cum_sr=100%]
+           ... [4 sheep | 5,721,992 steps | ret(last 50)=+28.77  win_sr=98%  cum_sr=100%]
+           ... [4 sheep | 5,821,992 steps | ret(last 50)=+29.24  win_sr=100%  cum_sr=100%]
+           ... [4 sheep | 5,921,992 steps | ret(last 50)=+30.83  win_sr=100%  cum_sr=100%]
+           ... [4 sheep | 6,021,992 steps | ret(last 50)=+30.06  win_sr=100%  cum_sr=100%]
+[Stage n_sheep=4] evaluating 30 eps
+[Stage n_sheep=4] sr=100%  mean_len=750  mean_min_pen=3.5m  mean_act=1.23
+  failure modes: SUCCESS=30
+  reward/step: progress=+0.1586  alignment=+0.0113  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0533  step_cost=-0.0200  complete=+0.1334
+
+[Stage n_sheep=5] training 1,500,000 steps
+           ... [5 sheep | 6,029,320 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [5 sheep | 6,129,320 steps | ret(last 50)=+31.97  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,229,320 steps | ret(last 50)=+32.32  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,329,320 steps | ret(last 50)=+34.26  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,429,320 steps | ret(last 50)=+33.75  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,529,320 steps | ret(last 50)=+34.77  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,629,320 steps | ret(last 50)=+34.06  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,729,320 steps | ret(last 50)=+32.39  win_sr=96%  cum_sr=100%]
+           ... [5 sheep | 6,829,320 steps | ret(last 50)=+32.33  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,929,320 steps | ret(last 50)=+33.29  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 7,029,320 steps | ret(last 50)=+32.12  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 7,129,320 steps | ret(last 50)=+32.58  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 7,229,320 steps | ret(last 50)=+33.27  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 7,329,320 steps | ret(last 50)=+33.64  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 7,429,320 steps | ret(last 50)=+32.67  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 7,529,320 steps | ret(last 50)=+32.79  win_sr=100%  cum_sr=100%]
+[Stage n_sheep=5] evaluating 30 eps
+[Stage n_sheep=5] sr=97%  mean_len=921  mean_min_pen=3.2m  mean_act=1.33
+  failure modes: SUCCESS=29  PARTIAL_3of5=1
+  reward/step: progress=+0.1565  alignment=+0.0135  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0536  step_cost=-0.0200  complete=+0.1050
+
+[Stage n_sheep=6] training 1,500,000 steps
+           ... [6 sheep | 7,536,648 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [6 sheep | 7,636,648 steps | ret(last 50)=+35.93  win_sr=100%  cum_sr=96%]
+           ... [6 sheep | 7,736,648 steps | ret(last 50)=+37.56  win_sr=100%  cum_sr=97%]
+           ... [6 sheep | 7,836,648 steps | ret(last 50)=+34.93  win_sr=100%  cum_sr=98%]
+           ... [6 sheep | 7,936,648 steps | ret(last 50)=+32.71  win_sr=98%  cum_sr=98%]
+           ... [6 sheep | 8,036,648 steps | ret(last 50)=+36.84  win_sr=100%  cum_sr=99%]
+           ... [6 sheep | 8,136,648 steps | ret(last 50)=+35.11  win_sr=100%  cum_sr=99%]
+           ... [6 sheep | 8,236,648 steps | ret(last 50)=+36.54  win_sr=100%  cum_sr=99%]
+           ... [6 sheep | 8,336,648 steps | ret(last 50)=+34.67  win_sr=100%  cum_sr=99%]
+           ... [6 sheep | 8,436,648 steps | ret(last 50)=+36.14  win_sr=100%  cum_sr=99%]
+           ... [6 sheep | 8,536,648 steps | ret(last 50)=+36.95  win_sr=100%  cum_sr=99%]
+           ... [6 sheep | 8,636,648 steps | ret(last 50)=+35.42  win_sr=100%  cum_sr=99%]
+           ... [6 sheep | 8,736,648 steps | ret(last 50)=+33.44  win_sr=100%  cum_sr=100%]
+           ... [6 sheep | 8,836,648 steps | ret(last 50)=+36.70  win_sr=100%  cum_sr=100%]
+           ... [6 sheep | 8,936,648 steps | ret(last 50)=+34.03  win_sr=100%  cum_sr=100%]
+           ... [6 sheep | 9,036,648 steps | ret(last 50)=+34.53  win_sr=100%  cum_sr=100%]
+[Stage n_sheep=6] evaluating 30 eps
+[Stage n_sheep=6] sr=97%  mean_len=1193  mean_min_pen=3.4m  mean_act=1.36
+  failure modes: SUCCESS=29  COMPACT_CANT_DRIVE=1
+  reward/step: progress=+0.1597  alignment=+0.0173  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0492  step_cost=-0.0200  complete=+0.0810
+
+[Stage n_sheep=7] training 1,500,000 steps
+           ... [7 sheep | 9,043,976 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [7 sheep | 9,143,976 steps | ret(last 50)=+40.54  win_sr=100%  cum_sr=100%]
+           ... [7 sheep | 9,243,976 steps | ret(last 50)=+38.70  win_sr=98%  cum_sr=99%]
+           ... [7 sheep | 9,343,976 steps | ret(last 50)=+38.13  win_sr=100%  cum_sr=100%]
+           ... [7 sheep | 9,443,976 steps | ret(last 50)=+40.37  win_sr=100%  cum_sr=100%]
+           ... [7 sheep | 9,543,976 steps | ret(last 50)=+39.40  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 9,643,976 steps | ret(last 50)=+40.44  win_sr=98%  cum_sr=99%]
+           ... [7 sheep | 9,743,976 steps | ret(last 50)=+37.74  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 9,843,976 steps | ret(last 50)=+39.91  win_sr=98%  cum_sr=99%]
+           ... [7 sheep | 9,943,976 steps | ret(last 50)=+40.67  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 10,043,976 steps | ret(last 50)=+35.38  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 10,143,976 steps | ret(last 50)=+38.31  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 10,243,976 steps | ret(last 50)=+40.86  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 10,343,976 steps | ret(last 50)=+40.95  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 10,443,976 steps | ret(last 50)=+37.90  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 10,543,976 steps | ret(last 50)=+39.07  win_sr=100%  cum_sr=99%]
+[Stage n_sheep=7] evaluating 30 eps
+[Stage n_sheep=7] sr=100%  mean_len=1209  mean_min_pen=3.2m  mean_act=1.37
+  failure modes: SUCCESS=30
+  reward/step: progress=+0.1774  alignment=+0.0179  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0579  step_cost=-0.0200  complete=+0.0827
+
+[Stage n_sheep=8] training 1,500,000 steps
+           ... [8 sheep | 10,551,304 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [8 sheep | 10,651,304 steps | ret(last 50)=+42.81  win_sr=100%  cum_sr=100%]
+           ... [8 sheep | 10,751,304 steps | ret(last 50)=+44.59  win_sr=100%  cum_sr=100%]
+           ... [8 sheep | 10,851,304 steps | ret(last 50)=+45.59  win_sr=98%  cum_sr=99%]
+           ... [8 sheep | 10,951,304 steps | ret(last 50)=+42.27  win_sr=98%  cum_sr=99%]
+           ... [8 sheep | 11,051,304 steps | ret(last 50)=+45.05  win_sr=98%  cum_sr=99%]
+           ... [8 sheep | 11,151,304 steps | ret(last 50)=+45.50  win_sr=100%  cum_sr=99%]
+           ... [8 sheep | 11,251,304 steps | ret(last 50)=+43.60  win_sr=100%  cum_sr=99%]
+           ... [8 sheep | 11,351,304 steps | ret(last 50)=+40.26  win_sr=100%  cum_sr=99%]
+           ... [8 sheep | 11,451,304 steps | ret(last 50)=+43.00  win_sr=100%  cum_sr=99%]
+           ... [8 sheep | 11,551,304 steps | ret(last 50)=+43.16  win_sr=100%  cum_sr=100%]
+           ... [8 sheep | 11,651,304 steps | ret(last 50)=+42.78  win_sr=100%  cum_sr=100%]
+           ... [8 sheep | 11,751,304 steps | ret(last 50)=+42.32  win_sr=98%  cum_sr=99%]
+           ... [8 sheep | 11,851,304 steps | ret(last 50)=+41.62  win_sr=100%  cum_sr=99%]
+           ... [8 sheep | 11,951,304 steps | ret(last 50)=+42.56  win_sr=98%  cum_sr=99%]
+           ... [8 sheep | 12,051,304 steps | ret(last 50)=+41.83  win_sr=100%  cum_sr=99%]
+[Stage n_sheep=8] evaluating 30 eps
+[Stage n_sheep=8] sr=100%  mean_len=1492  mean_min_pen=3.2m  mean_act=1.38
+  failure modes: SUCCESS=30
+  reward/step: progress=+0.1916  alignment=+0.0190  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0536  step_cost=-0.0200  complete=+0.0670
+
+[Stage n_sheep=9] training 1,500,000 steps
+           ... [9 sheep | 12,058,632 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [9 sheep | 12,158,632 steps | ret(last 50)=+46.03  win_sr=100%  cum_sr=100%]
+           ... [9 sheep | 12,258,632 steps | ret(last 50)=+46.87  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,358,632 steps | ret(last 50)=+45.48  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,458,632 steps | ret(last 50)=+47.02  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,558,632 steps | ret(last 50)=+44.66  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,658,632 steps | ret(last 50)=+46.60  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,758,632 steps | ret(last 50)=+41.85  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,858,632 steps | ret(last 50)=+47.81  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,958,632 steps | ret(last 50)=+44.92  win_sr=90%  cum_sr=96%]
+           ... [9 sheep | 13,058,632 steps | ret(last 50)=+47.40  win_sr=90%  cum_sr=96%]
+           ... [9 sheep | 13,158,632 steps | ret(last 50)=+47.16  win_sr=92%  cum_sr=95%]
+           ... [9 sheep | 13,258,632 steps | ret(last 50)=+45.55  win_sr=98%  cum_sr=96%]
+           ... [9 sheep | 13,358,632 steps | ret(last 50)=+46.87  win_sr=96%  cum_sr=96%]
+           ... [9 sheep | 13,458,632 steps | ret(last 50)=+47.69  win_sr=98%  cum_sr=96%]
+           ... [9 sheep | 13,558,632 steps | ret(last 50)=+45.17  win_sr=94%  cum_sr=96%]
+[Stage n_sheep=9] evaluating 30 eps
+[Stage n_sheep=9] sr=90%  mean_len=1628  mean_min_pen=3.2m  mean_act=1.38
+  failure modes: SUCCESS=27  COMPACT_CANT_DRIVE=3
+  reward/step: progress=+0.1802  alignment=+0.0204  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0514  step_cost=-0.0200  complete=+0.0553
+
+[Stage n_sheep=10] training 1,500,000 steps
+           ... [10 sheep | 13,565,960 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [10 sheep | 13,665,960 steps | ret(last 50)=+49.00  win_sr=82%  cum_sr=82%]
+           ... [10 sheep | 13,765,960 steps | ret(last 50)=+48.55  win_sr=86%  cum_sr=84%]
+           ... [10 sheep | 13,865,960 steps | ret(last 50)=+46.53  win_sr=80%  cum_sr=83%]
+           ... [10 sheep | 13,965,960 steps | ret(last 50)=+44.70  win_sr=82%  cum_sr=83%]
+           ... [10 sheep | 14,065,960 steps | ret(last 50)=+52.57  win_sr=92%  cum_sr=85%]
+           ... [10 sheep | 14,165,960 steps | ret(last 50)=+50.20  win_sr=82%  cum_sr=85%]
+           ... [10 sheep | 14,265,960 steps | ret(last 50)=+50.34  win_sr=90%  cum_sr=85%]
+           ... [10 sheep | 14,365,960 steps | ret(last 50)=+50.24  win_sr=90%  cum_sr=86%]
+           ... [10 sheep | 14,465,960 steps | ret(last 50)=+48.40  win_sr=86%  cum_sr=86%]
+           ... [10 sheep | 14,565,960 steps | ret(last 50)=+48.74  win_sr=88%  cum_sr=87%]
+           ... [10 sheep | 14,665,960 steps | ret(last 50)=+48.46  win_sr=80%  cum_sr=86%]
+           ... [10 sheep | 14,765,960 steps | ret(last 50)=+51.46  win_sr=70%  cum_sr=85%]
+           ... [10 sheep | 14,865,960 steps | ret(last 50)=+49.28  win_sr=92%  cum_sr=85%]
+           ... [10 sheep | 14,965,960 steps | ret(last 50)=+51.12  win_sr=88%  cum_sr=86%]
+           ... [10 sheep | 15,065,960 steps | ret(last 50)=+52.03  win_sr=84%  cum_sr=85%]
+[Stage n_sheep=10] evaluating 30 eps
+[Stage n_sheep=10] sr=93%  mean_len=1870  mean_min_pen=3.1m  mean_act=1.38
+  failure modes: SUCCESS=28  COMPACT_CANT_DRIVE=2
+  reward/step: progress=+0.1727  alignment=+0.0219  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0522  step_cost=-0.0200  complete=+0.0499
+
+======================================================================
+  TRAINING SUMMARY
+======================================================================
+  n_sheep=1  sr=100%  len=  234  min_pen=  3.7m  act=0.41
+  n_sheep=2  sr= 87%  len= 1064  min_pen=  4.1m  act=0.59
+  n_sheep=3  sr=100%  len=  769  min_pen=  3.5m  act=0.72
+  n_sheep=4  sr=100%  len=  750  min_pen=  3.5m  act=1.23
+  n_sheep=5  sr= 97%  len=  921  min_pen=  3.2m  act=1.33
+  n_sheep=6  sr= 97%  len= 1193  min_pen=  3.4m  act=1.36
+  n_sheep=7  sr=100%  len= 1209  min_pen=  3.2m  act=1.37
+  n_sheep=8  sr=100%  len= 1492  min_pen=  3.2m  act=1.38
+  n_sheep=9  sr= 90%  len= 1628  min_pen=  3.2m  act=1.38
+  n_sheep=10  sr= 93%  len= 1870  min_pen=  3.1m  act=1.38
+
+  Total time: 92.0 min
+  Artefacts:  runs/v2/
+  Plots:      runs/v2/success_rate.png, runs/v2/eval/
diff --git a/training/runs/v2/config.json b/training/runs/v2/config.json
new file mode 100644
index 0000000..89f2062
--- /dev/null
+++ b/training/runs/v2/config.json
@@ -0,0 +1,14 @@
+{
+  "W_PER_SHEEP": 2.0,
+  "W_ALIGN": 0.05,
+  "W_PEN_BONUS": 10.0,
+  "W_COMPLETE": 100.0,
+  "W_STEP_COST": 0.02,
+  "W_COMPACT": 0.0,
+  "W_WALL_TOUCH": 0.0,
+  "WALL_TOUCH_BUFFER": 0.4,
+  "ALIGN_SHAPE": "standoff",
+  "ALIGN_GATED": true,
+  "ENTRY_AWARE": true,
+  "ent_coef": 0.02
+}
\ No newline at end of file
diff --git a/training/runs/v2/eval/ep_10s_success.gif b/training/runs/v2/eval/ep_10s_success.gif
new file mode 100644
index 0000000..c6ffdd0
Binary files /dev/null and b/training/runs/v2/eval/ep_10s_success.gif differ
diff --git a/training/runs/v2/eval/ep_1s_success.gif b/training/runs/v2/eval/ep_1s_success.gif
new file mode 100644
index 0000000..12967b0
Binary files /dev/null and b/training/runs/v2/eval/ep_1s_success.gif differ
diff --git a/training/runs/v2/eval/ep_2s_success.gif b/training/runs/v2/eval/ep_2s_success.gif
new file mode 100644
index 0000000..4b35be6
Binary files /dev/null and b/training/runs/v2/eval/ep_2s_success.gif differ
diff --git a/training/runs/v2/eval/ep_3s_success.gif b/training/runs/v2/eval/ep_3s_success.gif
new file mode 100644
index 0000000..8695631
Binary files /dev/null and b/training/runs/v2/eval/ep_3s_success.gif differ
diff --git a/training/runs/v2/eval/ep_4s_success.gif b/training/runs/v2/eval/ep_4s_success.gif
new file mode 100644
index 0000000..8af261e
Binary files /dev/null and b/training/runs/v2/eval/ep_4s_success.gif differ
diff --git a/training/runs/v2/eval/ep_5s_success.gif b/training/runs/v2/eval/ep_5s_success.gif
new file mode 100644
index 0000000..d4c1523
Binary files /dev/null and b/training/runs/v2/eval/ep_5s_success.gif differ
diff --git a/training/runs/v2/eval/ep_6s_success.gif b/training/runs/v2/eval/ep_6s_success.gif
new file mode 100644
index 0000000..375beda
Binary files /dev/null and b/training/runs/v2/eval/ep_6s_success.gif differ
diff --git a/training/runs/v2/eval/ep_7s_success.gif b/training/runs/v2/eval/ep_7s_success.gif
new file mode 100644
index 0000000..f78c97b
Binary files /dev/null and b/training/runs/v2/eval/ep_7s_success.gif differ
diff --git a/training/runs/v2/eval/ep_8s_success.gif b/training/runs/v2/eval/ep_8s_success.gif
new file mode 100644
index 0000000..c4d3e49
Binary files /dev/null and b/training/runs/v2/eval/ep_8s_success.gif differ
diff --git a/training/runs/v2/eval/ep_9s_success.gif b/training/runs/v2/eval/ep_9s_success.gif
new file mode 100644
index 0000000..ae6dc44
Binary files /dev/null and b/training/runs/v2/eval/ep_9s_success.gif differ
diff --git a/training/runs/v2/eval/traj_10s_success.png b/training/runs/v2/eval/traj_10s_success.png
new file mode 100644
index 0000000..0b6dc1a
Binary files /dev/null and b/training/runs/v2/eval/traj_10s_success.png differ
diff --git a/training/runs/v2/eval/traj_1s_success.png b/training/runs/v2/eval/traj_1s_success.png
new file mode 100644
index 0000000..ad315fb
Binary files /dev/null and b/training/runs/v2/eval/traj_1s_success.png differ
diff --git a/training/runs/v2/eval/traj_2s_success.png b/training/runs/v2/eval/traj_2s_success.png
new file mode 100644
index 0000000..6af9635
Binary files /dev/null and b/training/runs/v2/eval/traj_2s_success.png differ
diff --git a/training/runs/v2/eval/traj_3s_success.png b/training/runs/v2/eval/traj_3s_success.png
new file mode 100644
index 0000000..d8d0f25
Binary files /dev/null and b/training/runs/v2/eval/traj_3s_success.png differ
diff --git a/training/runs/v2/eval/traj_4s_success.png b/training/runs/v2/eval/traj_4s_success.png
new file mode 100644
index 0000000..41070d1
Binary files /dev/null and b/training/runs/v2/eval/traj_4s_success.png differ
diff --git a/training/runs/v2/eval/traj_5s_success.png b/training/runs/v2/eval/traj_5s_success.png
new file mode 100644
index 0000000..c052565
Binary files /dev/null and b/training/runs/v2/eval/traj_5s_success.png differ
diff --git a/training/runs/v2/eval/traj_6s_success.png b/training/runs/v2/eval/traj_6s_success.png
new file mode 100644
index 0000000..aa7f69b
Binary files /dev/null and b/training/runs/v2/eval/traj_6s_success.png differ
diff --git a/training/runs/v2/eval/traj_7s_success.png b/training/runs/v2/eval/traj_7s_success.png
new file mode 100644
index 0000000..0bd27ba
Binary files /dev/null and b/training/runs/v2/eval/traj_7s_success.png differ
diff --git a/training/runs/v2/eval/traj_8s_success.png b/training/runs/v2/eval/traj_8s_success.png
new file mode 100644
index 0000000..de89f95
Binary files /dev/null and b/training/runs/v2/eval/traj_8s_success.png differ
diff --git a/training/runs/v2/eval/traj_9s_success.png b/training/runs/v2/eval/traj_9s_success.png
new file mode 100644
index 0000000..d9075cc
Binary files /dev/null and b/training/runs/v2/eval/traj_9s_success.png differ
diff --git a/training/runs/v2/eval/ts_10s_success.png b/training/runs/v2/eval/ts_10s_success.png
new file mode 100644
index 0000000..ab13e1d
Binary files /dev/null and b/training/runs/v2/eval/ts_10s_success.png differ
diff --git a/training/runs/v2/eval/ts_1s_success.png b/training/runs/v2/eval/ts_1s_success.png
new file mode 100644
index 0000000..63eb7d9
Binary files /dev/null and b/training/runs/v2/eval/ts_1s_success.png differ
diff --git a/training/runs/v2/eval/ts_2s_success.png b/training/runs/v2/eval/ts_2s_success.png
new file mode 100644
index 0000000..cd7970f
Binary files /dev/null and b/training/runs/v2/eval/ts_2s_success.png differ
diff --git a/training/runs/v2/eval/ts_3s_success.png b/training/runs/v2/eval/ts_3s_success.png
new file mode 100644
index 0000000..f4d29b7
Binary files /dev/null and b/training/runs/v2/eval/ts_3s_success.png differ
diff --git a/training/runs/v2/eval/ts_4s_success.png b/training/runs/v2/eval/ts_4s_success.png
new file mode 100644
index 0000000..f982b19
Binary files /dev/null and b/training/runs/v2/eval/ts_4s_success.png differ
diff --git a/training/runs/v2/eval/ts_5s_success.png b/training/runs/v2/eval/ts_5s_success.png
new file mode 100644
index 0000000..9166e19
Binary files /dev/null and b/training/runs/v2/eval/ts_5s_success.png differ
diff --git a/training/runs/v2/eval/ts_6s_success.png b/training/runs/v2/eval/ts_6s_success.png
new file mode 100644
index 0000000..05e743e
Binary files /dev/null and b/training/runs/v2/eval/ts_6s_success.png differ
diff --git a/training/runs/v2/eval/ts_7s_success.png b/training/runs/v2/eval/ts_7s_success.png
new file mode 100644
index 0000000..5802086
Binary files /dev/null and b/training/runs/v2/eval/ts_7s_success.png differ
diff --git a/training/runs/v2/eval/ts_8s_success.png b/training/runs/v2/eval/ts_8s_success.png
new file mode 100644
index 0000000..e3a3be2
Binary files /dev/null and b/training/runs/v2/eval/ts_8s_success.png differ
diff --git a/training/runs/v2/eval/ts_9s_success.png b/training/runs/v2/eval/ts_9s_success.png
new file mode 100644
index 0000000..a8cc747
Binary files /dev/null and b/training/runs/v2/eval/ts_9s_success.png differ
diff --git a/training/runs/v2/final_model.zip b/training/runs/v2/final_model.zip
new file mode 100644
index 0000000..c3f69ce
Binary files /dev/null and b/training/runs/v2/final_model.zip differ
diff --git a/training/runs/v2/stage_results.json b/training/runs/v2/stage_results.json
new file mode 100644
index 0000000..97dcbc3
--- /dev/null
+++ b/training/runs/v2/stage_results.json
@@ -0,0 +1,197 @@
+[
+  {
+    "sr": 1.0,
+    "mean_len": 234.0,
+    "mean_min_pen": 3.6668872674306234,
+    "mean_act": 0.4068990752695293,
+    "failure_modes": {
+      "SUCCESS": 30
+    },
+    "reward_per_step": {
+      "progress": 0.11183513424165568,
+      "alignment": 0.0002786317654047819,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.042735042735042736,
+      "step_cost": -0.019999999999999716,
+      "complete": 0.42735042735042733
+    },
+    "n_sheep": 1
+  },
+  {
+    "sr": 0.8666666666666667,
+    "mean_len": 1063.6666666666667,
+    "mean_min_pen": 4.120940693219503,
+    "mean_act": 0.5870139278816712,
+    "failure_modes": {
+      "SUCCESS": 26,
+      "COMPACT_CANT_DRIVE": 4
+    },
+    "reward_per_step": {
+      "progress": 0.05651345582855781,
+      "alignment": 0.007121706701510673,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.01629583202757756,
+      "step_cost": -0.0199999999999909,
+      "complete": 0.08147916013788781
+    },
+    "n_sheep": 2
+  },
+  {
+    "sr": 1.0,
+    "mean_len": 768.6,
+    "mean_min_pen": 3.4802104949951174,
+    "mean_act": 0.7173416881465967,
+    "failure_modes": {
+      "SUCCESS": 30
+    },
+    "reward_per_step": {
+      "progress": 0.11210350058336283,
+      "alignment": 0.007752684222105381,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.039032006245121,
+      "step_cost": -0.019999999999994387,
+      "complete": 0.13010668748373666
+    },
+    "n_sheep": 3
+  },
+  {
+    "sr": 1.0,
+    "mean_len": 749.8666666666667,
+    "mean_min_pen": 3.491257842381795,
+    "mean_act": 1.2302732761302806,
+    "failure_modes": {
+      "SUCCESS": 30
+    },
+    "reward_per_step": {
+      "progress": 0.15859288932254823,
+      "alignment": 0.011327628562653137,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.05334281650071124,
+      "step_cost": -0.0199999999999947,
+      "complete": 0.13335704125177808
+    },
+    "n_sheep": 4
+  },
+  {
+    "sr": 0.9666666666666667,
+    "mean_len": 920.5666666666667,
+    "mean_min_pen": 3.2368871172269187,
+    "mean_act": 1.329068384219205,
+    "failure_modes": {
+      "SUCCESS": 29,
+      "PARTIAL_3of5": 1
+    },
+    "reward_per_step": {
+      "progress": 0.15654392868672135,
+      "alignment": 0.013497823599666012,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.05359017996161784,
+      "step_cost": -0.019999999999992312,
+      "complete": 0.10500778505992686
+    },
+    "n_sheep": 5
+  },
+  {
+    "sr": 0.9666666666666667,
+    "mean_len": 1193.2333333333333,
+    "mean_min_pen": 3.4217512369155885,
+    "mean_act": 1.3575613093489967,
+    "failure_modes": {
+      "COMPACT_CANT_DRIVE": 1,
+      "SUCCESS": 29
+    },
+    "reward_per_step": {
+      "progress": 0.15969395095863717,
+      "alignment": 0.017340700156353795,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.049166131240048046,
+      "step_cost": -0.01999999999998991,
+      "complete": 0.08101237533871554
+    },
+    "n_sheep": 6
+  },
+  {
+    "sr": 1.0,
+    "mean_len": 1209.4666666666667,
+    "mean_min_pen": 3.2339003403981526,
+    "mean_act": 1.3714931576761524,
+    "failure_modes": {
+      "SUCCESS": 30
+    },
+    "reward_per_step": {
+      "progress": 0.17738547200352864,
+      "alignment": 0.017914342656107935,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.057876750082681075,
+      "step_cost": -0.019999999999989804,
+      "complete": 0.08268107154668725
+    },
+    "n_sheep": 7
+  },
+  {
+    "sr": 1.0,
+    "mean_len": 1491.7666666666667,
+    "mean_min_pen": 3.216744065284729,
+    "mean_act": 1.3783802580111435,
+    "failure_modes": {
+      "SUCCESS": 30
+    },
+    "reward_per_step": {
+      "progress": 0.19162546125035912,
+      "alignment": 0.018971863842493202,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.05362768976381472,
+      "step_cost": -0.01999999999998829,
+      "complete": 0.06703461220476839
+    },
+    "n_sheep": 8
+  },
+  {
+    "sr": 0.9,
+    "mean_len": 1627.5666666666666,
+    "mean_min_pen": 3.23857311407725,
+    "mean_act": 1.3832202011732966,
+    "failure_modes": {
+      "SUCCESS": 27,
+      "COMPACT_CANT_DRIVE": 3
+    },
+    "reward_per_step": {
+      "progress": 0.18015228593205654,
+      "alignment": 0.020407598899987247,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.05140598439388044,
+      "step_cost": -0.01999999999998775,
+      "complete": 0.055297274049194094
+    },
+    "n_sheep": 9
+  },
+  {
+    "sr": 0.9333333333333333,
+    "mean_len": 1869.9666666666667,
+    "mean_min_pen": 3.1344878753026326,
+    "mean_act": 1.3841143385300063,
+    "failure_modes": {
+      "SUCCESS": 28,
+      "COMPACT_CANT_DRIVE": 2
+    },
+    "reward_per_step": {
+      "progress": 0.17267533684098152,
+      "alignment": 0.021850885374692264,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.05222909499278062,
+      "step_cost": -0.019999999999986983,
+      "complete": 0.04991176313303267
+    },
+    "n_sheep": 10
+  }
+]
\ No newline at end of file
diff --git a/training/runs/v2/success_rate.png b/training/runs/v2/success_rate.png
new file mode 100644
index 0000000..030c861
Binary files /dev/null and b/training/runs/v2/success_rate.png differ
diff --git a/training/runs/v2/vecnorm.pkl b/training/runs/v2/vecnorm.pkl
new file mode 100644
index 0000000..776703e
Binary files /dev/null and b/training/runs/v2/vecnorm.pkl differ