diff --git a/training/config.json b/training/config.json
index 1f25dd3..b1e5149 100644
--- a/training/config.json
+++ b/training/config.json
@@ -9,6 +9,6 @@
     "WALL_TOUCH_BUFFER": 0.4,
     "ALIGN_SHAPE": "standoff",
     "ALIGN_GATED": true,
-    "ENTRY_AWARE": false,
+    "ENTRY_AWARE": true,
     "ent_coef": 0.02
 }
diff --git a/training/runs/v1.log b/training/runs/v1.log
new file mode 100644
index 0000000..1a4260e
--- /dev/null
+++ b/training/runs/v1.log
@@ -0,0 +1,242 @@
+Config loaded from config.json
+Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.01, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
+Run dir: runs/v1
+Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
+
+
+[Stage n_sheep=1] training 1,500,000 steps
+           ... [1 sheep | 100,000 steps | ret(last 40)=-19.52  win_sr=2%  cum_sr=2%]
+           ... [1 sheep | 200,000 steps | ret(last 50)=-21.11  win_sr=4%  cum_sr=4%]
+           ... [1 sheep | 300,000 steps | ret(last 50)=-7.06  win_sr=12%  cum_sr=7%]
+           ... [1 sheep | 400,000 steps | ret(last 50)=+18.18  win_sr=90%  cum_sr=40%]
+           ... [1 sheep | 500,000 steps | ret(last 50)=+16.17  win_sr=100%  cum_sr=69%]
+           ... [1 sheep | 600,000 steps | ret(last 50)=+14.68  win_sr=100%  cum_sr=82%]
+           ... [1 sheep | 700,000 steps | ret(last 50)=+14.33  win_sr=100%  cum_sr=88%]
+           ... [1 sheep | 800,000 steps | ret(last 50)=+14.20  win_sr=100%  cum_sr=91%]
+           ... [1 sheep | 900,000 steps | ret(last 50)=+13.82  win_sr=100%  cum_sr=93%]
+           ... [1 sheep | 1,000,000 steps | ret(last 50)=+13.76  win_sr=100%  cum_sr=94%]
+           ... [1 sheep | 1,100,000 steps | ret(last 50)=+13.72  win_sr=100%  cum_sr=95%]
+           ... [1 sheep | 1,200,000 steps | ret(last 50)=+13.41  win_sr=100%  cum_sr=95%]
+           ... [1 sheep | 1,300,000 steps | ret(last 50)=+13.42  win_sr=100%  cum_sr=96%]
+           ... [1 sheep | 1,400,000 steps | ret(last 50)=+13.40  win_sr=100%  cum_sr=96%]
+           ... [1 sheep | 1,500,000 steps | ret(last 50)=+13.24  win_sr=100%  cum_sr=97%]
+[Stage n_sheep=1] evaluating 30 eps
+[Stage n_sheep=1] sr=100%  mean_len=243  mean_min_pen=3.7m  mean_act=0.39
+  failure modes: SUCCESS=30
+  reward/step: progress=+0.1141  alignment=+0.0003  compact=+0.0000  wall_touch=+0.0000  pen_bonus=+0.0412  step_cost=-0.0200  complete=+0.4115
+
+[Stage n_sheep=2] training 1,500,000 steps
+           ... [2 sheep | 1,507,336 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [2 sheep | 1,607,336 steps | ret(last 42)=-4.07  win_sr=7%  cum_sr=7%]
+           ... [2 sheep | 1,707,336 steps | ret(last 50)=-6.10  win_sr=0%  cum_sr=4%]
+           ... [2 sheep | 1,807,336 steps | ret(last 50)=-5.57  win_sr=2%  cum_sr=3%]
+           ... [2 sheep | 1,907,336 steps | ret(last 50)=-5.04  win_sr=4%  cum_sr=4%]
+           ... [2 sheep | 2,007,336 steps | ret(last 50)=-4.27  win_sr=10%  cum_sr=5%]
+           ... [2 sheep | 2,107,336 steps | ret(last 50)=-4.42  win_sr=6%  cum_sr=5%]
+           ... [2 sheep | 2,207,336 steps | ret(last 50)=+4.57  win_sr=50%  cum_sr=13%]
+           ... [2 sheep | 2,307,336 steps | ret(last 50)=+11.35  win_sr=70%  cum_sr=24%]
+           ... [2 sheep | 2,407,336 steps | ret(last 50)=+15.75  win_sr=86%  cum_sr=32%]
+           ... [2 sheep | 2,507,336 steps | ret(last 50)=+19.97  win_sr=100%  cum_sr=44%]
+           ... [2 sheep | 2,607,336 steps | ret(last 50)=+20.73  win_sr=100%  cum_sr=54%]
+           ... [2 sheep | 2,707,336 steps | ret(last 50)=+19.81  win_sr=100%  cum_sr=62%]
+           ... [2 sheep | 2,807,336 steps | ret(last 50)=+20.83  win_sr=100%  cum_sr=67%]
+           ... [2 sheep | 2,907,336 steps | ret(last 50)=+20.43  win_sr=100%  cum_sr=72%]
+           ... [2 sheep | 3,007,336 steps | ret(last 50)=+19.65  win_sr=100%  cum_sr=75%]
+[Stage n_sheep=2] evaluating 30 eps
+[Stage n_sheep=2] sr=63%  mean_len=1325  mean_min_pen=3.1m  mean_act=0.42
+  failure modes: SUCCESS=19  PARTIAL_1of2=10  COMPACT_CANT_DRIVE=1
+  reward/step: progress=+0.0453  alignment=+0.0065  compact=+0.0000  wall_touch=-0.0052  pen_bonus=+0.0123  step_cost=-0.0200  complete=+0.0478
+
+[Stage n_sheep=3] training 1,500,000 steps
+           ... [3 sheep | 3,014,664 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [3 sheep | 3,114,664 steps | ret(last 50)=+28.02  win_sr=100%  cum_sr=100%]
+           ... [3 sheep | 3,214,664 steps | ret(last 50)=+24.04  win_sr=96%  cum_sr=99%]
+           ... [3 sheep | 3,314,664 steps | ret(last 50)=+27.02  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 3,414,664 steps | ret(last 50)=+25.53  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 3,514,664 steps | ret(last 50)=+25.13  win_sr=96%  cum_sr=99%]
+           ... [3 sheep | 3,614,664 steps | ret(last 50)=+26.45  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 3,714,664 steps | ret(last 50)=+25.83  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 3,814,664 steps | ret(last 50)=+26.07  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 3,914,664 steps | ret(last 50)=+25.03  win_sr=96%  cum_sr=99%]
+           ... [3 sheep | 4,014,664 steps | ret(last 50)=+24.53  win_sr=98%  cum_sr=99%]
+           ... [3 sheep | 4,114,664 steps | ret(last 50)=+24.98  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 4,214,664 steps | ret(last 50)=+26.81  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 4,314,664 steps | ret(last 50)=+24.78  win_sr=98%  cum_sr=99%]
+           ... [3 sheep | 4,414,664 steps | ret(last 50)=+26.79  win_sr=100%  cum_sr=99%]
+           ... [3 sheep | 4,514,664 steps | ret(last 50)=+26.26  win_sr=100%  cum_sr=99%]
+[Stage n_sheep=3] evaluating 30 eps
+[Stage n_sheep=3] sr=97%  mean_len=828  mean_min_pen=2.7m  mean_act=1.15
+  failure modes: SUCCESS=29  PARTIAL_1of3=1
+  reward/step: progress=+0.1017  alignment=+0.0139  compact=+0.0000  wall_touch=-0.0023  pen_bonus=+0.0354  step_cost=-0.0200  complete=+0.1168
+
+[Stage n_sheep=4] training 1,500,000 steps
+           ... [4 sheep | 4,521,992 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [4 sheep | 4,621,992 steps | ret(last 50)=+28.27  win_sr=90%  cum_sr=93%]
+           ... [4 sheep | 4,721,992 steps | ret(last 50)=+31.16  win_sr=98%  cum_sr=95%]
+           ... [4 sheep | 4,821,992 steps | ret(last 50)=+30.45  win_sr=100%  cum_sr=96%]
+           ... [4 sheep | 4,921,992 steps | ret(last 50)=+31.12  win_sr=100%  cum_sr=96%]
+           ... [4 sheep | 5,021,992 steps | ret(last 50)=+30.78  win_sr=100%  cum_sr=97%]
+           ... [4 sheep | 5,121,992 steps | ret(last 50)=+30.42  win_sr=100%  cum_sr=97%]
+           ... [4 sheep | 5,221,992 steps | ret(last 50)=+31.14  win_sr=100%  cum_sr=98%]
+           ... [4 sheep | 5,321,992 steps | ret(last 50)=+31.20  win_sr=100%  cum_sr=98%]
+           ... [4 sheep | 5,421,992 steps | ret(last 50)=+30.47  win_sr=98%  cum_sr=98%]
+           ... [4 sheep | 5,521,992 steps | ret(last 50)=+30.13  win_sr=100%  cum_sr=98%]
+           ... [4 sheep | 5,621,992 steps | ret(last 50)=+28.52  win_sr=98%  cum_sr=98%]
+           ... [4 sheep | 5,721,992 steps | ret(last 50)=+31.40  win_sr=100%  cum_sr=98%]
+           ... [4 sheep | 5,821,992 steps | ret(last 50)=+30.30  win_sr=100%  cum_sr=99%]
+           ... [4 sheep | 5,921,992 steps | ret(last 50)=+29.45  win_sr=100%  cum_sr=99%]
+           ... [4 sheep | 6,021,992 steps | ret(last 50)=+30.26  win_sr=100%  cum_sr=99%]
+[Stage n_sheep=4] evaluating 30 eps
+[Stage n_sheep=4] sr=57%  mean_len=1686  mean_min_pen=1.8m  mean_act=1.01
+  failure modes: SUCCESS=17  PARTIAL_1of4=10  PARTIAL_2of4=3
+  reward/step: progress=+0.0721  alignment=+0.0085  compact=+0.0000  wall_touch=-0.0110  pen_bonus=+0.0166  step_cost=-0.0200  complete=+0.0336
+
+[Stage n_sheep=5] training 1,500,000 steps
+           ... [5 sheep | 6,029,320 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [5 sheep | 6,129,320 steps | ret(last 50)=+34.59  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,229,320 steps | ret(last 50)=+35.53  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,329,320 steps | ret(last 50)=+34.77  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,429,320 steps | ret(last 50)=+34.30  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,529,320 steps | ret(last 50)=+35.12  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,629,320 steps | ret(last 50)=+33.76  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,729,320 steps | ret(last 50)=+34.81  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,829,320 steps | ret(last 50)=+31.82  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 6,929,320 steps | ret(last 50)=+33.69  win_sr=98%  cum_sr=100%]
+           ... [5 sheep | 7,029,320 steps | ret(last 50)=+31.65  win_sr=100%  cum_sr=100%]
+           ... [5 sheep | 7,129,320 steps | ret(last 50)=+31.83  win_sr=96%  cum_sr=99%]
+           ... [5 sheep | 7,229,320 steps | ret(last 50)=+33.96  win_sr=100%  cum_sr=99%]
+           ... [5 sheep | 7,329,320 steps | ret(last 50)=+33.65  win_sr=98%  cum_sr=99%]
+           ... [5 sheep | 7,429,320 steps | ret(last 50)=+34.20  win_sr=100%  cum_sr=99%]
+           ... [5 sheep | 7,529,320 steps | ret(last 50)=+35.27  win_sr=98%  cum_sr=99%]
+[Stage n_sheep=5] evaluating 30 eps
+[Stage n_sheep=5] sr=63%  mean_len=1654  mean_min_pen=1.6m  mean_act=1.36
+  failure modes: SUCCESS=19  PARTIAL_2of5=9  PARTIAL_3of5=2
+  reward/step: progress=+0.1043  alignment=+0.0108  compact=+0.0000  wall_touch=-0.0100  pen_bonus=+0.0240  step_cost=-0.0200  complete=+0.0383
+
+[Stage n_sheep=6] training 1,500,000 steps
+           ... [6 sheep | 7,536,648 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [6 sheep | 7,636,648 steps | ret(last 50)=+37.79  win_sr=98%  cum_sr=95%]
+           ... [6 sheep | 7,736,648 steps | ret(last 50)=+38.87  win_sr=94%  cum_sr=95%]
+           ... [6 sheep | 7,836,648 steps | ret(last 50)=+37.50  win_sr=98%  cum_sr=96%]
+           ... [6 sheep | 7,936,648 steps | ret(last 50)=+39.42  win_sr=94%  cum_sr=96%]
+           ... [6 sheep | 8,036,648 steps | ret(last 50)=+38.28  win_sr=98%  cum_sr=96%]
+           ... [6 sheep | 8,136,648 steps | ret(last 50)=+36.39  win_sr=100%  cum_sr=97%]
+           ... [6 sheep | 8,236,648 steps | ret(last 50)=+39.29  win_sr=100%  cum_sr=97%]
+           ... [6 sheep | 8,336,648 steps | ret(last 50)=+37.92  win_sr=98%  cum_sr=97%]
+           ... [6 sheep | 8,436,648 steps | ret(last 50)=+38.64  win_sr=98%  cum_sr=97%]
+           ... [6 sheep | 8,536,648 steps | ret(last 50)=+38.46  win_sr=98%  cum_sr=97%]
+           ... [6 sheep | 8,636,648 steps | ret(last 50)=+38.08  win_sr=98%  cum_sr=97%]
+           ... [6 sheep | 8,736,648 steps | ret(last 50)=+36.78  win_sr=100%  cum_sr=97%]
+           ... [6 sheep | 8,836,648 steps | ret(last 50)=+36.81  win_sr=98%  cum_sr=98%]
+           ... [6 sheep | 8,936,648 steps | ret(last 50)=+37.89  win_sr=98%  cum_sr=98%]
+           ... [6 sheep | 9,036,648 steps | ret(last 50)=+36.17  win_sr=98%  cum_sr=98%]
+[Stage n_sheep=6] evaluating 30 eps
+[Stage n_sheep=6] sr=33%  mean_len=2161  mean_min_pen=1.8m  mean_act=1.37
+  failure modes: PARTIAL_5of6=14  SUCCESS=10  PARTIAL_4of6=4  COMPACT_CANT_DRIVE=2
+  reward/step: progress=+0.0915  alignment=+0.0102  compact=+0.0000  wall_touch=-0.0068  pen_bonus=+0.0225  step_cost=-0.0200  complete=+0.0154
+
+[Stage n_sheep=7] training 1,500,000 steps
+           ... [7 sheep | 9,043,976 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [7 sheep | 9,143,976 steps | ret(last 50)=+39.61  win_sr=98%  cum_sr=97%]
+           ... [7 sheep | 9,243,976 steps | ret(last 50)=+42.39  win_sr=100%  cum_sr=99%]
+           ... [7 sheep | 9,343,976 steps | ret(last 50)=+39.89  win_sr=96%  cum_sr=98%]
+           ... [7 sheep | 9,443,976 steps | ret(last 50)=+42.48  win_sr=98%  cum_sr=98%]
+           ... [7 sheep | 9,543,976 steps | ret(last 50)=+39.16  win_sr=98%  cum_sr=98%]
+           ... [7 sheep | 9,643,976 steps | ret(last 50)=+38.80  win_sr=96%  cum_sr=98%]
+           ... [7 sheep | 9,743,976 steps | ret(last 50)=+43.06  win_sr=96%  cum_sr=98%]
+           ... [7 sheep | 9,843,976 steps | ret(last 50)=+40.04  win_sr=94%  cum_sr=98%]
+           ... [7 sheep | 9,943,976 steps | ret(last 50)=+40.45  win_sr=98%  cum_sr=97%]
+           ... [7 sheep | 10,043,976 steps | ret(last 50)=+39.21  win_sr=96%  cum_sr=97%]
+           ... [7 sheep | 10,143,976 steps | ret(last 50)=+40.23  win_sr=100%  cum_sr=97%]
+           ... [7 sheep | 10,243,976 steps | ret(last 50)=+41.51  win_sr=96%  cum_sr=97%]
+           ... [7 sheep | 10,343,976 steps | ret(last 50)=+40.05  win_sr=98%  cum_sr=97%]
+           ... [7 sheep | 10,443,976 steps | ret(last 50)=+39.17  win_sr=96%  cum_sr=97%]
+           ... [7 sheep | 10,543,976 steps | ret(last 50)=+41.80  win_sr=98%  cum_sr=97%]
+[Stage n_sheep=7] evaluating 30 eps
+[Stage n_sheep=7] sr=20%  mean_len=2211  mean_min_pen=1.8m  mean_act=1.36
+  failure modes: PARTIAL_4of7=11  PARTIAL_3of7=8  SUCCESS=6  PARTIAL_6of7=3  PARTIAL_5of7=2
+  reward/step: progress=+0.0879  alignment=+0.0086  compact=+0.0000  wall_touch=-0.0142  pen_bonus=+0.0208  step_cost=-0.0200  complete=+0.0090
+
+[Stage n_sheep=8] training 1,500,000 steps
+           ... [8 sheep | 10,551,304 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [8 sheep | 10,651,304 steps | ret(last 50)=+48.67  win_sr=96%  cum_sr=97%]
+           ... [8 sheep | 10,751,304 steps | ret(last 50)=+46.60  win_sr=96%  cum_sr=97%]
+           ... [8 sheep | 10,851,304 steps | ret(last 50)=+41.39  win_sr=98%  cum_sr=96%]
+           ... [8 sheep | 10,951,304 steps | ret(last 50)=+41.47  win_sr=96%  cum_sr=96%]
+           ... [8 sheep | 11,051,304 steps | ret(last 50)=+40.29  win_sr=96%  cum_sr=95%]
+           ... [8 sheep | 11,151,304 steps | ret(last 50)=+42.96  win_sr=100%  cum_sr=96%]
+           ... [8 sheep | 11,251,304 steps | ret(last 50)=+42.87  win_sr=94%  cum_sr=96%]
+           ... [8 sheep | 11,351,304 steps | ret(last 50)=+44.71  win_sr=100%  cum_sr=96%]
+           ... [8 sheep | 11,451,304 steps | ret(last 50)=+45.20  win_sr=96%  cum_sr=96%]
+           ... [8 sheep | 11,551,304 steps | ret(last 50)=+46.82  win_sr=96%  cum_sr=96%]
+           ... [8 sheep | 11,651,304 steps | ret(last 50)=+43.23  win_sr=96%  cum_sr=96%]
+           ... [8 sheep | 11,751,304 steps | ret(last 50)=+43.77  win_sr=94%  cum_sr=96%]
+           ... [8 sheep | 11,851,304 steps | ret(last 50)=+48.78  win_sr=98%  cum_sr=96%]
+           ... [8 sheep | 11,951,304 steps | ret(last 50)=+43.19  win_sr=94%  cum_sr=96%]
+           ... [8 sheep | 12,051,304 steps | ret(last 50)=+42.83  win_sr=96%  cum_sr=96%]
+[Stage n_sheep=8] evaluating 30 eps
+[Stage n_sheep=8] sr=63%  mean_len=1745  mean_min_pen=1.7m  mean_act=1.37
+  failure modes: SUCCESS=19  PARTIAL_4of8=9  PARTIAL_1of8=1  PARTIAL_6of8=1
+  reward/step: progress=+0.1198  alignment=+0.0134  compact=+0.0000  wall_touch=-0.0107  pen_bonus=+0.0373  step_cost=-0.0200  complete=+0.0363
+
+[Stage n_sheep=9] training 1,500,000 steps
+           ... [9 sheep | 12,058,632 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [9 sheep | 12,158,632 steps | ret(last 50)=+49.04  win_sr=98%  cum_sr=97%]
+           ... [9 sheep | 12,258,632 steps | ret(last 50)=+47.01  win_sr=96%  cum_sr=97%]
+           ... [9 sheep | 12,358,632 steps | ret(last 50)=+48.47  win_sr=90%  cum_sr=95%]
+           ... [9 sheep | 12,458,632 steps | ret(last 50)=+46.43  win_sr=88%  cum_sr=94%]
+           ... [9 sheep | 12,558,632 steps | ret(last 50)=+44.78  win_sr=94%  cum_sr=94%]
+           ... [9 sheep | 12,658,632 steps | ret(last 50)=+49.15  win_sr=100%  cum_sr=95%]
+           ... [9 sheep | 12,758,632 steps | ret(last 50)=+47.87  win_sr=94%  cum_sr=95%]
+           ... [9 sheep | 12,858,632 steps | ret(last 50)=+50.32  win_sr=96%  cum_sr=95%]
+           ... [9 sheep | 12,958,632 steps | ret(last 50)=+47.07  win_sr=94%  cum_sr=95%]
+           ... [9 sheep | 13,058,632 steps | ret(last 50)=+48.71  win_sr=100%  cum_sr=96%]
+           ... [9 sheep | 13,158,632 steps | ret(last 50)=+47.69  win_sr=96%  cum_sr=96%]
+           ... [9 sheep | 13,258,632 steps | ret(last 50)=+46.83  win_sr=98%  cum_sr=96%]
+           ... [9 sheep | 13,358,632 steps | ret(last 50)=+48.27  win_sr=94%  cum_sr=96%]
+           ... [9 sheep | 13,458,632 steps | ret(last 50)=+47.61  win_sr=88%  cum_sr=95%]
+           ... [9 sheep | 13,558,632 steps | ret(last 50)=+47.29  win_sr=96%  cum_sr=95%]
+[Stage n_sheep=9] evaluating 30 eps
+[Stage n_sheep=9] sr=83%  mean_len=1723  mean_min_pen=1.8m  mean_act=1.38
+  failure modes: SUCCESS=25  PARTIAL_5of9=3  NEVER_COMPACT=1  PARTIAL_6of9=1
+  reward/step: progress=+0.1562  alignment=+0.0155  compact=+0.0000  wall_touch=-0.0073  pen_bonus=+0.0480  step_cost=-0.0200  complete=+0.0484
+
+[Stage n_sheep=10] training 1,500,000 steps
+           ... [10 sheep | 13,565,960 steps | ret(last 0)=+nan  win_sr=nan%  cum_sr=nan%]
+           ... [10 sheep | 13,665,960 steps | ret(last 50)=+50.02  win_sr=94%  cum_sr=95%]
+           ... [10 sheep | 13,765,960 steps | ret(last 50)=+48.46  win_sr=84%  cum_sr=90%]
+           ... [10 sheep | 13,865,960 steps | ret(last 50)=+48.24  win_sr=84%  cum_sr=87%]
+           ... [10 sheep | 13,965,960 steps | ret(last 50)=+50.64  win_sr=82%  cum_sr=86%]
+           ... [10 sheep | 14,065,960 steps | ret(last 50)=+53.92  win_sr=86%  cum_sr=86%]
+           ... [10 sheep | 14,165,960 steps | ret(last 50)=+51.33  win_sr=80%  cum_sr=85%]
+           ... [10 sheep | 14,265,960 steps | ret(last 50)=+49.55  win_sr=76%  cum_sr=84%]
+           ... [10 sheep | 14,365,960 steps | ret(last 50)=+51.05  win_sr=92%  cum_sr=85%]
+           ... [10 sheep | 14,465,960 steps | ret(last 50)=+47.58  win_sr=86%  cum_sr=85%]
+           ... [10 sheep | 14,565,960 steps | ret(last 50)=+47.98  win_sr=74%  cum_sr=84%]
+           ... [10 sheep | 14,665,960 steps | ret(last 50)=+50.60  win_sr=82%  cum_sr=84%]
+           ... [10 sheep | 14,765,960 steps | ret(last 50)=+51.25  win_sr=88%  cum_sr=84%]
+           ... [10 sheep | 14,865,960 steps | ret(last 50)=+50.54  win_sr=92%  cum_sr=85%]
+           ... [10 sheep | 14,965,960 steps | ret(last 50)=+50.94  win_sr=92%  cum_sr=86%]
+           ... [10 sheep | 15,065,960 steps | ret(last 50)=+50.54  win_sr=90%  cum_sr=86%]
+[Stage n_sheep=10] evaluating 30 eps
+[Stage n_sheep=10] sr=27%  mean_len=2267  mean_min_pen=2.2m  mean_act=1.38
+  failure modes: PARTIAL_6of10=16  SUCCESS=8  COMPACT_CANT_DRIVE=2  PARTIAL_7of10=1  PARTIAL_9of10=1  PARTIAL_5of10=1  PARTIAL_8of10=1
+  reward/step: progress=+0.1360  alignment=+0.0134  compact=+0.0000  wall_touch=-0.0122  pen_bonus=+0.0301  step_cost=-0.0200  complete=+0.0118
+
+======================================================================
+  TRAINING SUMMARY
+======================================================================
+  n_sheep=1  sr=100%  len=  243  min_pen=  3.7m  act=0.39
+  n_sheep=2  sr= 63%  len= 1325  min_pen=  3.1m  act=0.42
+  n_sheep=3  sr= 97%  len=  828  min_pen=  2.7m  act=1.15
+  n_sheep=4  sr= 57%  len= 1686  min_pen=  1.8m  act=1.01
+  n_sheep=5  sr= 63%  len= 1654  min_pen=  1.6m  act=1.36
+  n_sheep=6  sr= 33%  len= 2161  min_pen=  1.8m  act=1.37
+  n_sheep=7  sr= 20%  len= 2211  min_pen=  1.8m  act=1.36
+  n_sheep=8  sr= 63%  len= 1745  min_pen=  1.7m  act=1.37
+  n_sheep=9  sr= 83%  len= 1723  min_pen=  1.8m  act=1.38
+  n_sheep=10  sr= 27%  len= 2267  min_pen=  2.2m  act=1.38
+
+  Total time: 97.6 min
+  Artefacts:  runs/v1/
+  Plots:      runs/v1/success_rate.png, runs/v1/eval/
diff --git a/training/runs/v1/config.json b/training/runs/v1/config.json
new file mode 100644
index 0000000..2ac8343
--- /dev/null
+++ b/training/runs/v1/config.json
@@ -0,0 +1,14 @@
+{
+  "W_PER_SHEEP": 2.0,
+  "W_ALIGN": 0.05,
+  "W_PEN_BONUS": 10.0,
+  "W_COMPLETE": 100.0,
+  "W_STEP_COST": 0.02,
+  "W_COMPACT": 0.0,
+  "W_WALL_TOUCH": 0.01,
+  "WALL_TOUCH_BUFFER": 0.4,
+  "ALIGN_SHAPE": "standoff",
+  "ALIGN_GATED": true,
+  "ENTRY_AWARE": true,
+  "ent_coef": 0.02
+}
\ No newline at end of file
diff --git a/training/runs/v1/eval/traj_10s_fail.png b/training/runs/v1/eval/traj_10s_fail.png
new file mode 100644
index 0000000..427a6f4
Binary files /dev/null and b/training/runs/v1/eval/traj_10s_fail.png differ
diff --git a/training/runs/v1/eval/traj_1s_success.png b/training/runs/v1/eval/traj_1s_success.png
new file mode 100644
index 0000000..2658807
Binary files /dev/null and b/training/runs/v1/eval/traj_1s_success.png differ
diff --git a/training/runs/v1/eval/traj_2s_success.png b/training/runs/v1/eval/traj_2s_success.png
new file mode 100644
index 0000000..bac6823
Binary files /dev/null and b/training/runs/v1/eval/traj_2s_success.png differ
diff --git a/training/runs/v1/eval/traj_3s_success.png b/training/runs/v1/eval/traj_3s_success.png
new file mode 100644
index 0000000..22e76c8
Binary files /dev/null and b/training/runs/v1/eval/traj_3s_success.png differ
diff --git a/training/runs/v1/eval/traj_4s_success.png b/training/runs/v1/eval/traj_4s_success.png
new file mode 100644
index 0000000..6d9574d
Binary files /dev/null and b/training/runs/v1/eval/traj_4s_success.png differ
diff --git a/training/runs/v1/eval/traj_5s_fail.png b/training/runs/v1/eval/traj_5s_fail.png
new file mode 100644
index 0000000..e7b7026
Binary files /dev/null and b/training/runs/v1/eval/traj_5s_fail.png differ
diff --git a/training/runs/v1/eval/traj_6s_fail.png b/training/runs/v1/eval/traj_6s_fail.png
new file mode 100644
index 0000000..1e07f4f
Binary files /dev/null and b/training/runs/v1/eval/traj_6s_fail.png differ
diff --git a/training/runs/v1/eval/traj_7s_fail.png b/training/runs/v1/eval/traj_7s_fail.png
new file mode 100644
index 0000000..5ed880c
Binary files /dev/null and b/training/runs/v1/eval/traj_7s_fail.png differ
diff --git a/training/runs/v1/eval/traj_8s_fail.png b/training/runs/v1/eval/traj_8s_fail.png
new file mode 100644
index 0000000..0001a69
Binary files /dev/null and b/training/runs/v1/eval/traj_8s_fail.png differ
diff --git a/training/runs/v1/eval/traj_9s_success.png b/training/runs/v1/eval/traj_9s_success.png
new file mode 100644
index 0000000..9b2cd89
Binary files /dev/null and b/training/runs/v1/eval/traj_9s_success.png differ
diff --git a/training/runs/v1/eval/ts_10s_fail.png b/training/runs/v1/eval/ts_10s_fail.png
new file mode 100644
index 0000000..b598241
Binary files /dev/null and b/training/runs/v1/eval/ts_10s_fail.png differ
diff --git a/training/runs/v1/eval/ts_1s_success.png b/training/runs/v1/eval/ts_1s_success.png
new file mode 100644
index 0000000..df6dc3a
Binary files /dev/null and b/training/runs/v1/eval/ts_1s_success.png differ
diff --git a/training/runs/v1/eval/ts_2s_success.png b/training/runs/v1/eval/ts_2s_success.png
new file mode 100644
index 0000000..deb1d46
Binary files /dev/null and b/training/runs/v1/eval/ts_2s_success.png differ
diff --git a/training/runs/v1/eval/ts_3s_success.png b/training/runs/v1/eval/ts_3s_success.png
new file mode 100644
index 0000000..ac27553
Binary files /dev/null and b/training/runs/v1/eval/ts_3s_success.png differ
diff --git a/training/runs/v1/eval/ts_4s_success.png b/training/runs/v1/eval/ts_4s_success.png
new file mode 100644
index 0000000..fc75c1f
Binary files /dev/null and b/training/runs/v1/eval/ts_4s_success.png differ
diff --git a/training/runs/v1/eval/ts_5s_fail.png b/training/runs/v1/eval/ts_5s_fail.png
new file mode 100644
index 0000000..0125796
Binary files /dev/null and b/training/runs/v1/eval/ts_5s_fail.png differ
diff --git a/training/runs/v1/eval/ts_6s_fail.png b/training/runs/v1/eval/ts_6s_fail.png
new file mode 100644
index 0000000..d8e4ca1
Binary files /dev/null and b/training/runs/v1/eval/ts_6s_fail.png differ
diff --git a/training/runs/v1/eval/ts_7s_fail.png b/training/runs/v1/eval/ts_7s_fail.png
new file mode 100644
index 0000000..e17d272
Binary files /dev/null and b/training/runs/v1/eval/ts_7s_fail.png differ
diff --git a/training/runs/v1/eval/ts_8s_fail.png b/training/runs/v1/eval/ts_8s_fail.png
new file mode 100644
index 0000000..4e16ce9
Binary files /dev/null and b/training/runs/v1/eval/ts_8s_fail.png differ
diff --git a/training/runs/v1/eval/ts_9s_success.png b/training/runs/v1/eval/ts_9s_success.png
new file mode 100644
index 0000000..8f72338
Binary files /dev/null and b/training/runs/v1/eval/ts_9s_success.png differ
diff --git a/training/runs/v1/final_model.zip b/training/runs/v1/final_model.zip
new file mode 100644
index 0000000..af81e54
Binary files /dev/null and b/training/runs/v1/final_model.zip differ
diff --git a/training/runs/v1/stage_results.json b/training/runs/v1/stage_results.json
new file mode 100644
index 0000000..700d5db
--- /dev/null
+++ b/training/runs/v1/stage_results.json
@@ -0,0 +1,218 @@
+[
+  {
+    "sr": 1.0,
+    "mean_len": 243.0,
+    "mean_min_pen": 3.7120999256769815,
+    "mean_act": 0.3930775734995823,
+    "failure_modes": {
+      "SUCCESS": 30
+    },
+    "reward_per_step": {
+      "progress": 0.11411363949746262,
+      "alignment": 0.00034729298515464674,
+      "compact": 0.0,
+      "wall_touch": 0.0,
+      "pen_bonus": 0.0411522633744856,
+      "step_cost": -0.020000000000000108,
+      "complete": 0.411522633744856
+    },
+    "n_sheep": 1
+  },
+  {
+    "sr": 0.6333333333333333,
+    "mean_len": 1324.9333333333334,
+    "mean_min_pen": 3.108120004336039,
+    "mean_act": 0.41626948835668365,
+    "failure_modes": {
+      "SUCCESS": 19,
+      "PARTIAL_1of2": 10,
+      "COMPACT_CANT_DRIVE": 1
+    },
+    "reward_per_step": {
+      "progress": 0.045259184195888084,
+      "alignment": 0.006548802090560675,
+      "compact": 0.0,
+      "wall_touch": -0.005243643148915256,
+      "pen_bonus": 0.012327664284995472,
+      "step_cost": -0.019999999999989106,
+      "complete": 0.04780114722753346
+    },
+    "n_sheep": 2
+  },
+  {
+    "sr": 0.9666666666666667,
+    "mean_len": 827.7,
+    "mean_min_pen": 2.727696478366852,
+    "mean_act": 1.1521936838813016,
+    "failure_modes": {
+      "PARTIAL_1of3": 1,
+      "SUCCESS": 29
+    },
+    "reward_per_step": {
+      "progress": 0.10166334638295625,
+      "alignment": 0.013859153429505626,
+      "compact": 0.0,
+      "wall_touch": -0.0022604217500245883,
+      "pen_bonus": 0.03543957150336273,
+      "step_cost": -0.019999999999993488,
+      "complete": 0.11678949699971809
+    },
+    "n_sheep": 3
+  },
+  {
+    "sr": 0.5666666666666667,
+    "mean_len": 1686.0333333333333,
+    "mean_min_pen": 1.7675368865331014,
+    "mean_act": 1.0093803780622697,
+    "failure_modes": {
+      "PARTIAL_1of4": 10,
+      "SUCCESS": 17,
+      "PARTIAL_2of4": 3
+    },
+    "reward_per_step": {
+      "progress": 0.07213990871824405,
+      "alignment": 0.008500170591885925,
+      "compact": 0.0,
+      "wall_touch": -0.01096873654520888,
+      "pen_bonus": 0.016607026353769202,
+      "step_cost": -0.019999999999987545,
+      "complete": 0.03360945809691386
+    },
+    "n_sheep": 4
+  },
+  {
+    "sr": 0.6333333333333333,
+    "mean_len": 1653.8333333333333,
+    "mean_min_pen": 1.6310479640960693,
+    "mean_act": 1.3572492104366454,
+    "failure_modes": {
+      "PARTIAL_2of5": 9,
+      "SUCCESS": 19,
+      "PARTIAL_3of5": 2
+    },
+    "reward_per_step": {
+      "progress": 0.10426509678994506,
+      "alignment": 0.010847962450905363,
+      "compact": 0.0,
+      "wall_touch": -0.010001784418012447,
+      "pen_bonus": 0.02398468205179885,
+      "step_cost": -0.019999999999987656,
+      "complete": 0.038294870502872114
+    },
+    "n_sheep": 5
+  },
+  {
+    "sr": 0.3333333333333333,
+    "mean_len": 2161.0333333333333,
+    "mean_min_pen": 1.7910769859949747,
+    "mean_act": 1.3728399181766682,
+    "failure_modes": {
+      "SUCCESS": 10,
+      "PARTIAL_5of6": 14,
+      "PARTIAL_4of6": 4,
+      "COMPACT_CANT_DRIVE": 2
+    },
+    "reward_per_step": {
+      "progress": 0.09152597398477412,
+      "alignment": 0.010169068168091603,
+      "compact": 0.0,
+      "wall_touch": -0.006849364742307595,
+      "pen_bonus": 0.022520090697351575,
+      "step_cost": -0.019999999999986286,
+      "complete": 0.015424719655720258
+    },
+    "n_sheep": 6
+  },
+  {
+    "sr": 0.2,
+    "mean_len": 2211.2,
+    "mean_min_pen": 1.8339664101600648,
+    "mean_act": 1.3635542380694952,
+    "failure_modes": {
+      "PARTIAL_5of7": 2,
+      "SUCCESS": 6,
+      "PARTIAL_6of7": 3,
+      "PARTIAL_3of7": 8,
+      "PARTIAL_4of7": 11
+    },
+    "reward_per_step": {
+      "progress": 0.08794138462674025,
+      "alignment": 0.008588877237149285,
+      "compact": 0.0,
+      "wall_touch": -0.014176997336213705,
+      "pen_bonus": 0.020803183791606367,
+      "step_cost": -0.019999999999986185,
+      "complete": 0.009044862518089725
+    },
+    "n_sheep": 7
+  },
+  {
+    "sr": 0.6333333333333333,
+    "mean_len": 1744.5666666666666,
+    "mean_min_pen": 1.7331914146741232,
+    "mean_act": 1.366222499606064,
+    "failure_modes": {
+      "SUCCESS": 19,
+      "PARTIAL_4of8": 9,
+      "PARTIAL_1of8": 1,
+      "PARTIAL_6of8": 1
+    },
+    "reward_per_step": {
+      "progress": 0.11981066786559799,
+      "alignment": 0.013385751275637974,
+      "compact": 0.0,
+      "wall_touch": -0.010737474453107049,
+      "pen_bonus": 0.037258536026138295,
+      "step_cost": -0.019999999999987354,
+      "complete": 0.03630318894854501
+    },
+    "n_sheep": 8
+  },
+  {
+    "sr": 0.8333333333333334,
+    "mean_len": 1723.1333333333334,
+    "mean_min_pen": 1.7584208091100058,
+    "mean_act": 1.3848404770822742,
+    "failure_modes": {
+      "NEVER_COMPACT": 1,
+      "SUCCESS": 25,
+      "PARTIAL_5of9": 3,
+      "PARTIAL_6of9": 1
+    },
+    "reward_per_step": {
+      "progress": 0.1561655017464111,
+      "alignment": 0.01548957874142236,
+      "compact": 0.0,
+      "wall_touch": -0.00728069638883058,
+      "pen_bonus": 0.04797461987851588,
+      "step_cost": -0.019999999999987424,
+      "complete": 0.048361511974310364
+    },
+    "n_sheep": 9
+  },
+  {
+    "sr": 0.26666666666666666,
+    "mean_len": 2266.5333333333333,
+    "mean_min_pen": 2.1789512236913047,
+    "mean_act": 1.3836169439830954,
+    "failure_modes": {
+      "PARTIAL_6of10": 16,
+      "SUCCESS": 8,
+      "PARTIAL_7of10": 1,
+      "PARTIAL_9of10": 1,
+      "PARTIAL_5of10": 1,
+      "COMPACT_CANT_DRIVE": 2,
+      "PARTIAL_8of10": 1
+    },
+    "reward_per_step": {
+      "progress": 0.13603502511276877,
+      "alignment": 0.013359252519942029,
+      "compact": 0.0,
+      "wall_touch": -0.012232639033891629,
+      "pen_bonus": 0.030148832284252015,
+      "step_cost": -0.019999999999986078,
+      "complete": 0.011765397964586153
+    },
+    "n_sheep": 10
+  }
+]
\ No newline at end of file
diff --git a/training/runs/v1/success_rate.png b/training/runs/v1/success_rate.png
new file mode 100644
index 0000000..ec3f546
Binary files /dev/null and b/training/runs/v1/success_rate.png differ
diff --git a/training/runs/v1/vecnorm.pkl b/training/runs/v1/vecnorm.pkl
new file mode 100644
index 0000000..1f6e51e
Binary files /dev/null and b/training/runs/v1/vecnorm.pkl differ
diff --git a/training/train.py b/training/train.py
index dc19371..6f07838 100644
--- a/training/train.py
+++ b/training/train.py
@@ -384,7 +384,7 @@ DEFAULT_CONFIG = {
     "WALL_TOUCH_BUFFER": 0.3,
     "ALIGN_SHAPE": "standoff",
     "ALIGN_GATED": True,
-    "ENTRY_AWARE": False,
+    "ENTRY_AWARE": True,
     "ent_coef": 0.02,
 }