Behaviour refinement - fence penalty
This commit is contained in:
+27
-8
@@ -61,6 +61,11 @@ class HerdingEnv(gym.Env):
|
|||||||
W_COMPLETE = 100.0 # all sheep penned
|
W_COMPLETE = 100.0 # all sheep penned
|
||||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||||
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
||||||
|
W_WALL_TOUCH = 0.05 # per-sheep, per-step penalty when an active sheep is
|
||||||
|
# pinned against the outside of a pen W/E wall. Direct
|
||||||
|
# signal against the wall-corraling exploit so the
|
||||||
|
# policy generalises better to Webots pillared walls.
|
||||||
|
WALL_TOUCH_BUFFER = 0.5 # metres outside the wall counted as "touching"
|
||||||
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
|
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
|
||||||
ALIGN_GATED = True # gate alignment on action magnitude
|
ALIGN_GATED = True # gate alignment on action magnitude
|
||||||
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
|
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
|
||||||
@@ -401,6 +406,19 @@ class HerdingEnv(gym.Env):
|
|||||||
else:
|
else:
|
||||||
alignment = 0.0
|
alignment = 0.0
|
||||||
|
|
||||||
|
# Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
|
||||||
|
if self.W_WALL_TOUCH and active.any():
|
||||||
|
pts = self.sheep_pos[:self.n_sheep][active]
|
||||||
|
px0, px1 = self.PEN_X
|
||||||
|
py0, py1 = self.PEN_Y
|
||||||
|
in_y = (pts[:, 1] > py0) & (pts[:, 1] < py1)
|
||||||
|
near_w = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
|
||||||
|
near_e = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
|
||||||
|
n_touch = int(((near_w | near_e) & in_y).sum())
|
||||||
|
r_wall_touch = -n_touch * self.W_WALL_TOUCH
|
||||||
|
else:
|
||||||
|
r_wall_touch = 0.0
|
||||||
|
|
||||||
# Compactness shaping: reward decreases in flock radius (active sheep only)
|
# Compactness shaping: reward decreases in flock radius (active sheep only)
|
||||||
if self.W_COMPACT and active.any():
|
if self.W_COMPACT and active.any():
|
||||||
cur_radius = float(np.linalg.norm(
|
cur_radius = float(np.linalg.norm(
|
||||||
@@ -414,15 +432,16 @@ class HerdingEnv(gym.Env):
|
|||||||
r_pen_bonus = newly_penned * self.W_PEN_BONUS
|
r_pen_bonus = newly_penned * self.W_PEN_BONUS
|
||||||
r_step_cost = -self.W_STEP_COST
|
r_step_cost = -self.W_STEP_COST
|
||||||
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
|
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
|
||||||
reward = (r_progress + alignment + r_compact + r_pen_bonus
|
reward = (r_progress + alignment + r_compact + r_wall_touch
|
||||||
+ r_step_cost + r_complete)
|
+ r_pen_bonus + r_step_cost + r_complete)
|
||||||
rcomps = {
|
rcomps = {
|
||||||
"progress": float(r_progress),
|
"progress": float(r_progress),
|
||||||
"alignment": float(alignment),
|
"alignment": float(alignment),
|
||||||
"compact": float(r_compact),
|
"compact": float(r_compact),
|
||||||
"pen_bonus": float(r_pen_bonus),
|
"wall_touch": float(r_wall_touch),
|
||||||
"step_cost": float(r_step_cost),
|
"pen_bonus": float(r_pen_bonus),
|
||||||
"complete": float(r_complete),
|
"step_cost": float(r_step_cost),
|
||||||
|
"complete": float(r_complete),
|
||||||
}
|
}
|
||||||
return reward, rcomps
|
return reward, rcomps
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user