Behaviour refinement - fence penalty
This commit is contained in:
+21
-2
@@ -61,6 +61,11 @@ class HerdingEnv(gym.Env):
|
||||
W_COMPLETE = 100.0 # all sheep penned
|
||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
||||
W_WALL_TOUCH = 0.05 # per-sheep, per-step penalty when an active sheep is
|
||||
# pinned against the outside of a pen W/E wall. Direct
|
||||
# signal against the wall-corraling exploit so the
|
||||
# policy generalises better to Webots pillared walls.
|
||||
WALL_TOUCH_BUFFER = 0.5 # metres outside the wall counted as "touching"
|
||||
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
|
||||
ALIGN_GATED = True # gate alignment on action magnitude
|
||||
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
|
||||
@@ -401,6 +406,19 @@ class HerdingEnv(gym.Env):
|
||||
else:
|
||||
alignment = 0.0
|
||||
|
||||
# Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
|
||||
if self.W_WALL_TOUCH and active.any():
|
||||
pts = self.sheep_pos[:self.n_sheep][active]
|
||||
px0, px1 = self.PEN_X
|
||||
py0, py1 = self.PEN_Y
|
||||
in_y = (pts[:, 1] > py0) & (pts[:, 1] < py1)
|
||||
near_w = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
|
||||
near_e = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
|
||||
n_touch = int(((near_w | near_e) & in_y).sum())
|
||||
r_wall_touch = -n_touch * self.W_WALL_TOUCH
|
||||
else:
|
||||
r_wall_touch = 0.0
|
||||
|
||||
# Compactness shaping: reward decreases in flock radius (active sheep only)
|
||||
if self.W_COMPACT and active.any():
|
||||
cur_radius = float(np.linalg.norm(
|
||||
@@ -414,12 +432,13 @@ class HerdingEnv(gym.Env):
|
||||
r_pen_bonus = newly_penned * self.W_PEN_BONUS
|
||||
r_step_cost = -self.W_STEP_COST
|
||||
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
|
||||
reward = (r_progress + alignment + r_compact + r_pen_bonus
|
||||
+ r_step_cost + r_complete)
|
||||
reward = (r_progress + alignment + r_compact + r_wall_touch
|
||||
+ r_pen_bonus + r_step_cost + r_complete)
|
||||
rcomps = {
|
||||
"progress": float(r_progress),
|
||||
"alignment": float(alignment),
|
||||
"compact": float(r_compact),
|
||||
"wall_touch": float(r_wall_touch),
|
||||
"pen_bonus": float(r_pen_bonus),
|
||||
"step_cost": float(r_step_cost),
|
||||
"complete": float(r_complete),
|
||||
|
||||
Reference in New Issue
Block a user