diff --git a/training/herding_env.py b/training/herding_env.py
index d462cf3..8423032 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -61,6 +61,11 @@ class HerdingEnv(gym.Env):
     W_COMPLETE  = 100.0  # all sheep penned
     W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing
     W_COMPACT   = 0.0    # reward for flock-radius reduction (off by default)
+    W_WALL_TOUCH = 0.05  # per-sheep, per-step penalty when an active sheep is
+                         # pinned against the outside of a pen W/E wall. Direct
+                         # signal against the wall-corraling exploit so the
+                         # policy generalises better to Webots pillared walls.
+    WALL_TOUCH_BUFFER = 0.5   # metres outside the wall counted as "touching"
     ALIGN_SHAPE = "standoff"   # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
     ALIGN_GATED = True   # gate alignment on action magnitude
     ENTRY_AWARE = True   # progress reward targets PEN_ENTRY (entrance face), not
@@ -401,6 +406,19 @@ class HerdingEnv(gym.Env):
         else:
             alignment = 0.0
 
+        # Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
+        if self.W_WALL_TOUCH and active.any():
+            pts = self.sheep_pos[:self.n_sheep][active]
+            px0, px1 = self.PEN_X
+            py0, py1 = self.PEN_Y
+            in_y     = (pts[:, 1] > py0) & (pts[:, 1] < py1)
+            near_w   = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
+            near_e   = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
+            n_touch  = int(((near_w | near_e) & in_y).sum())
+            r_wall_touch = -n_touch * self.W_WALL_TOUCH
+        else:
+            r_wall_touch = 0.0
+
         # Compactness shaping: reward decreases in flock radius (active sheep only)
         if self.W_COMPACT and active.any():
             cur_radius = float(np.linalg.norm(
@@ -414,15 +432,16 @@ class HerdingEnv(gym.Env):
         r_pen_bonus  = newly_penned * self.W_PEN_BONUS
         r_step_cost  = -self.W_STEP_COST
         r_complete   = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
-        reward = (r_progress + alignment + r_compact + r_pen_bonus
-                  + r_step_cost + r_complete)
+        reward = (r_progress + alignment + r_compact + r_wall_touch
+                  + r_pen_bonus + r_step_cost + r_complete)
         rcomps = {
-            "progress":  float(r_progress),
-            "alignment": float(alignment),
-            "compact":   float(r_compact),
-            "pen_bonus": float(r_pen_bonus),
-            "step_cost": float(r_step_cost),
-            "complete":  float(r_complete),
+            "progress":   float(r_progress),
+            "alignment":  float(alignment),
+            "compact":    float(r_compact),
+            "wall_touch": float(r_wall_touch),
+            "pen_bonus":  float(r_pen_bonus),
+            "step_cost":  float(r_step_cost),
+            "complete":   float(r_complete),
         }
         return reward, rcomps