From e2883212c5600aee5cc128d46f149e7087ca4ea6 Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Sun, 26 Apr 2026 15:26:24 +0100
Subject: [PATCH] Approach v3 w/ south penalty fix

---
 training/herding_env.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/training/herding_env.py b/training/herding_env.py
index 84ea458..4338f9d 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -443,12 +443,15 @@ class HerdingEnv(gym.Env):
             r_wall_touch = 0.0
 
         # South penalty: discourage active sheep from drifting below the pen
-        # entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction
-        # (move north) to enter — very hard for the dog to recover from.
+        # entrance (y < PEN_Y[1]) while OUTSIDE the pen's x-range. Sheep at
+        # y<-8 with x∈[PEN_X] are entering through the gate — that's desired.
+        # The dead zone is y<-8 and x outside [PEN_X]: stuck against pen walls,
+        # must reverse direction (north) to reach the entrance — hard to recover.
         if self.W_SOUTH and active.any():
             pts = self.sheep_pos[:self.n_sheep][active]
-            depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1])  # metres below entrance
-            r_south = -float(depth.sum()) * self.W_SOUTH
+            depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1])
+            outside_pen_x = (pts[:, 0] < self.PEN_X[0]) | (pts[:, 0] > self.PEN_X[1])
+            r_south = -float((depth * outside_pen_x).sum()) * self.W_SOUTH
         else:
             r_south = 0.0