Approach v3 w/ south penalty fix

This commit is contained in:
Johnny Fernandes
2026-04-26 15:26:24 +01:00
parent 11e13c6980
commit e2883212c5
+7 -4
View File
@@ -443,12 +443,15 @@ class HerdingEnv(gym.Env):
r_wall_touch = 0.0 r_wall_touch = 0.0
# South penalty: discourage active sheep from drifting below the pen # South penalty: discourage active sheep from drifting below the pen
# entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction # entrance (y < PEN_Y[1]) while OUTSIDE the pen's x-range. Sheep at
# (move north) to enter — very hard for the dog to recover from. # y<-8 with x∈[PEN_X] are entering through the gate — that's desired.
# The dead zone is y<-8 and x outside [PEN_X]: stuck against pen walls,
# must reverse direction (north) to reach the entrance — hard to recover.
if self.W_SOUTH and active.any(): if self.W_SOUTH and active.any():
pts = self.sheep_pos[:self.n_sheep][active] pts = self.sheep_pos[:self.n_sheep][active]
depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1]) # metres below entrance depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1])
r_south = -float(depth.sum()) * self.W_SOUTH outside_pen_x = (pts[:, 0] < self.PEN_X[0]) | (pts[:, 0] > self.PEN_X[1])
r_south = -float((depth * outside_pen_x).sum()) * self.W_SOUTH
else: else:
r_south = 0.0 r_south = 0.0