diff --git a/training/config.json b/training/config.json
index d29068c..1f25dd3 100644
--- a/training/config.json
+++ b/training/config.json
@@ -5,8 +5,8 @@
     "W_COMPLETE": 100.0,
     "W_STEP_COST": 0.02,
     "W_COMPACT": 0.0,
-    "W_WALL_TOUCH": 0.04,
-    "WALL_TOUCH_BUFFER": 0.3,
+    "W_WALL_TOUCH": 0.01,
+    "WALL_TOUCH_BUFFER": 0.4,
     "ALIGN_SHAPE": "standoff",
     "ALIGN_GATED": true,
     "ENTRY_AWARE": false,
diff --git a/training/herding_env.py b/training/herding_env.py
index dd647e7..1e1fb4c 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -61,11 +61,11 @@ class HerdingEnv(gym.Env):
     W_COMPLETE  = 100.0  # all sheep penned
     W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing
     W_COMPACT   = 0.0    # reward for flock-radius reduction (off by default)
-    W_WALL_TOUCH = 0.04  # per-sheep max penalty at wall surface. Linear ramp
-                         # within WALL_TOUCH_BUFFER nudges the agent to avoid
-                         # pinning sheep against pen walls. 0.04 ≈ 2× step_cost
-                         # — noticeable but never dominates progress reward.
-    WALL_TOUCH_BUFFER = 0.3   # metres from wall where penalty starts ramping
+    W_WALL_TOUCH = 0.01  # per-sheep max penalty at wall surface. Linear ramp
+                         # within WALL_TOUCH_BUFFER. Covers field outer walls and
+                         # pen W/E/S walls. Kept small (≈ step_cost/2) so it
+                         # nudges away from walls without dominating progress.
+    WALL_TOUCH_BUFFER = 0.4   # metres from wall where penalty starts ramping
     ALIGN_SHAPE = "standoff"   # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
     ALIGN_GATED = True   # gate alignment on action magnitude
     ENTRY_AWARE = False  # When True, targets PEN_ENTRY (entrance face) instead
@@ -406,23 +406,32 @@ class HerdingEnv(gym.Env):
         else:
             alignment = 0.0
 
-        # Wall-touch penalty: distance-based gradient covering all 3 solid pen
-        # walls (west, east, south). Linearly ramps from 0 at buffer edge to
-        # W_WALL_TOUCH at the wall surface — gives the agent a smooth signal
-        # to avoid pinning sheep against walls.
+        # Wall-touch penalty: distance-based gradient covering ALL solid surfaces
+        # the sheep can hit — the four field outer walls (always present) plus
+        # the three solid pen walls (west, east, south). Linearly ramps from 0
+        # at buffer edge to W_WALL_TOUCH at the wall surface. Goal: sheep should
+        # never end up pinned against any wall (transfer concern: Webots fences
+        # have pillars that can physically trap sheep).
         if self.W_WALL_TOUCH and active.any():
             pts = self.sheep_pos[:self.n_sheep][active]
             px0, px1 = self.PEN_X
             py0, py1 = self.PEN_Y
+            F   = self.FIELD
             buf = self.WALL_TOUCH_BUFFER
             far = buf + 1.0
-            d_w = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
-                           px0 - pts[:, 0], far)
-            d_e = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
-                           pts[:, 0] - px1, far)
-            d_s = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
-                           py0 - pts[:, 1], far)
-            d_min = np.minimum(np.minimum(d_w, d_e), d_s)
+            # Field outer walls — sheep is always inside [-F, F]^2.
+            d_fw = pts[:, 0] - (-F)        # distance to west field wall
+            d_fe = F - pts[:, 0]           # east field wall
+            d_fs = pts[:, 1] - (-F)        # south field wall
+            d_fn = F - pts[:, 1]           # north field wall
+            # Pen W/E/S walls — only relevant approached from outside.
+            d_pw = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
+                            px0 - pts[:, 0], far)
+            d_pe = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
+                            pts[:, 0] - px1, far)
+            d_ps = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
+                            py0 - pts[:, 1], far)
+            d_min = np.minimum.reduce([d_fw, d_fe, d_fs, d_fn, d_pw, d_pe, d_ps])
             penalties = np.maximum(0.0, 1.0 - d_min / buf) * self.W_WALL_TOUCH
             r_wall_touch = -float(penalties.sum())
         else: