From e2883212c5600aee5cc128d46f149e7087ca4ea6 Mon Sep 17 00:00:00 2001 From: Johnny Fernandes Date: Sun, 26 Apr 2026 15:26:24 +0100 Subject: [PATCH] Approach v3 w/ south penalty fix --- training/herding_env.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/training/herding_env.py b/training/herding_env.py index 84ea458..4338f9d 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -443,12 +443,15 @@ class HerdingEnv(gym.Env): r_wall_touch = 0.0 # South penalty: discourage active sheep from drifting below the pen - # entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction - # (move north) to enter — very hard for the dog to recover from. + # entrance (y < PEN_Y[1]) while OUTSIDE the pen's x-range. Sheep at + # y<-8 with x∈[PEN_X] are entering through the gate — that's desired. + # The dead zone is y<-8 and x outside [PEN_X]: stuck against pen walls, + # must reverse direction (north) to reach the entrance — hard to recover. if self.W_SOUTH and active.any(): pts = self.sheep_pos[:self.n_sheep][active] - depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1]) # metres below entrance - r_south = -float(depth.sum()) * self.W_SOUTH + depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1]) + outside_pen_x = (pts[:, 0] < self.PEN_X[0]) | (pts[:, 0] > self.PEN_X[1]) + r_south = -float((depth * outside_pen_x).sum()) * self.W_SOUTH else: r_south = 0.0