Approach v3 w/ south penalty

This commit is contained in:
Johnny Fernandes
2026-04-26 14:55:13 +01:00
parent a561f8a697
commit 11e13c6980
19 changed files with 6549 additions and 3 deletions
+17 -1
View File
@@ -60,6 +60,11 @@ class HerdingEnv(gym.Env):
W_PEN_BONUS = 10.0 # per sheep penned
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
W_SOUTH = 0.01 # per-sheep per-metre penalty for active sheep below the pen
# entrance (y < PEN_Y[1]=-8). Keeps the dog from letting
# sheep drift into the dead zone below the open face where
# they must reverse direction (north) to enter — hard to
# recover. 0.01 ≈ half step_cost per metre below per sheep.
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
W_WALL_TOUCH = 0.01 # per-sheep max penalty at wall surface. Linear ramp
# within WALL_TOUCH_BUFFER. Covers field outer walls and
@@ -437,6 +442,16 @@ class HerdingEnv(gym.Env):
else:
r_wall_touch = 0.0
# South penalty: discourage active sheep from drifting below the pen
# entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction
# (move north) to enter — very hard for the dog to recover from.
if self.W_SOUTH and active.any():
pts = self.sheep_pos[:self.n_sheep][active]
depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1]) # metres below entrance
r_south = -float(depth.sum()) * self.W_SOUTH
else:
r_south = 0.0
# Compactness shaping: reward decreases in flock radius (active sheep only)
if self.W_COMPACT and active.any():
cur_radius = float(np.linalg.norm(
@@ -450,11 +465,12 @@ class HerdingEnv(gym.Env):
r_pen_bonus = newly_penned * self.W_PEN_BONUS
r_step_cost = -self.W_STEP_COST
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
reward = (r_progress + alignment + r_compact + r_wall_touch
reward = (r_progress + alignment + r_south + r_compact + r_wall_touch
+ r_pen_bonus + r_step_cost + r_complete)
rcomps = {
"progress": float(r_progress),
"alignment": float(alignment),
"south": float(r_south),
"compact": float(r_compact),
"wall_touch": float(r_wall_touch),
"pen_bonus": float(r_pen_bonus),