Approach v3 w/ south penalty fix
This commit is contained in:
@@ -443,12 +443,15 @@ class HerdingEnv(gym.Env):
|
|||||||
r_wall_touch = 0.0
|
r_wall_touch = 0.0
|
||||||
|
|
||||||
# South penalty: discourage active sheep from drifting below the pen
|
# South penalty: discourage active sheep from drifting below the pen
|
||||||
# entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction
|
# entrance (y < PEN_Y[1]) while OUTSIDE the pen's x-range. Sheep at
|
||||||
# (move north) to enter — very hard for the dog to recover from.
|
# y<-8 with x∈[PEN_X] are entering through the gate — that's desired.
|
||||||
|
# The dead zone is y<-8 and x outside [PEN_X]: stuck against pen walls,
|
||||||
|
# must reverse direction (north) to reach the entrance — hard to recover.
|
||||||
if self.W_SOUTH and active.any():
|
if self.W_SOUTH and active.any():
|
||||||
pts = self.sheep_pos[:self.n_sheep][active]
|
pts = self.sheep_pos[:self.n_sheep][active]
|
||||||
depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1]) # metres below entrance
|
depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1])
|
||||||
r_south = -float(depth.sum()) * self.W_SOUTH
|
outside_pen_x = (pts[:, 0] < self.PEN_X[0]) | (pts[:, 0] > self.PEN_X[1])
|
||||||
|
r_south = -float((depth * outside_pen_x).sum()) * self.W_SOUTH
|
||||||
else:
|
else:
|
||||||
r_south = 0.0
|
r_south = 0.0
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user