Approach v3 w/ south penalty fix
This commit is contained in:
@@ -443,12 +443,15 @@ class HerdingEnv(gym.Env):
|
||||
r_wall_touch = 0.0
|
||||
|
||||
# South penalty: discourage active sheep from drifting below the pen
|
||||
# entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction
|
||||
# (move north) to enter — very hard for the dog to recover from.
|
||||
# entrance (y < PEN_Y[1]) while OUTSIDE the pen's x-range. Sheep at
|
||||
# y<-8 with x∈[PEN_X] are entering through the gate — that's desired.
|
||||
# The dead zone is y<-8 and x outside [PEN_X]: stuck against pen walls,
|
||||
# must reverse direction (north) to reach the entrance — hard to recover.
|
||||
if self.W_SOUTH and active.any():
|
||||
pts = self.sheep_pos[:self.n_sheep][active]
|
||||
depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1]) # metres below entrance
|
||||
r_south = -float(depth.sum()) * self.W_SOUTH
|
||||
depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1])
|
||||
outside_pen_x = (pts[:, 0] < self.PEN_X[0]) | (pts[:, 0] > self.PEN_X[1])
|
||||
r_south = -float((depth * outside_pen_x).sum()) * self.W_SOUTH
|
||||
else:
|
||||
r_south = 0.0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user