Approach v3 w/ south penalty fix

2026-04-26 15:26:24 +01:00
parent 11e13c6980
commit e2883212c5
1 changed files with 7 additions and 4 deletions
@@ -443,12 +443,15 @@ class HerdingEnv(gym.Env):
            r_wall_touch = 0.0

        # South penalty: discourage active sheep from drifting below the pen
-        # entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction
-        # (move north) to enter — very hard for the dog to recover from.
+        # entrance (y < PEN_Y[1]) while OUTSIDE the pen's x-range. Sheep at
+        # y<-8 with x∈[PEN_X] are entering through the gate — that's desired.
+        # The dead zone is y<-8 and x outside [PEN_X]: stuck against pen walls,
+        # must reverse direction (north) to reach the entrance — hard to recover.
        if self.W_SOUTH and active.any():
            pts = self.sheep_pos[:self.n_sheep][active]
-            depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1])  # metres below entrance
-            r_south = -float(depth.sum()) * self.W_SOUTH
+            depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1])
+            outside_pen_x = (pts[:, 0] < self.PEN_X[0]) | (pts[:, 0] > self.PEN_X[1])
+            r_south = -float((depth * outside_pen_x).sum()) * self.W_SOUTH
        else:
            r_south = 0.0