Behaviour refinement - fence penalty

This commit is contained in:
Johnny Fernandes
2026-04-25 23:42:02 +01:00
parent 6612dbc1ba
commit 6253850620
+21 -2
View File
@@ -61,6 +61,11 @@ class HerdingEnv(gym.Env):
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
W_WALL_TOUCH = 0.05 # per-sheep, per-step penalty when an active sheep is
# pinned against the outside of a pen W/E wall. Direct
# signal against the wall-corraling exploit so the
# policy generalises better to Webots pillared walls.
WALL_TOUCH_BUFFER = 0.5 # metres outside the wall counted as "touching"
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
ALIGN_GATED = True # gate alignment on action magnitude
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
@@ -401,6 +406,19 @@ class HerdingEnv(gym.Env):
else:
alignment = 0.0
# Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
if self.W_WALL_TOUCH and active.any():
pts = self.sheep_pos[:self.n_sheep][active]
px0, px1 = self.PEN_X
py0, py1 = self.PEN_Y
in_y = (pts[:, 1] > py0) & (pts[:, 1] < py1)
near_w = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
near_e = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
n_touch = int(((near_w | near_e) & in_y).sum())
r_wall_touch = -n_touch * self.W_WALL_TOUCH
else:
r_wall_touch = 0.0
# Compactness shaping: reward decreases in flock radius (active sheep only)
if self.W_COMPACT and active.any():
cur_radius = float(np.linalg.norm(
@@ -414,12 +432,13 @@ class HerdingEnv(gym.Env):
r_pen_bonus = newly_penned * self.W_PEN_BONUS
r_step_cost = -self.W_STEP_COST
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
reward = (r_progress + alignment + r_compact + r_pen_bonus
+ r_step_cost + r_complete)
reward = (r_progress + alignment + r_compact + r_wall_touch
+ r_pen_bonus + r_step_cost + r_complete)
rcomps = {
"progress": float(r_progress),
"alignment": float(alignment),
"compact": float(r_compact),
"wall_touch": float(r_wall_touch),
"pen_bonus": float(r_pen_bonus),
"step_cost": float(r_step_cost),
"complete": float(r_complete),