Cleanup and new approach
This commit is contained in:
+26
-16
@@ -61,18 +61,19 @@ class HerdingEnv(gym.Env):
|
||||
W_COMPLETE = 100.0 # all sheep penned
|
||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
||||
W_WALL_TOUCH = 0.01 # per-sheep, per-step penalty when an active sheep is
|
||||
# pinned against the outside of a pen W/E wall. Kept
|
||||
# small (<step_cost) so the dog isn't incentivised to
|
||||
# hover above the entrance to avoid the penalty.
|
||||
WALL_TOUCH_BUFFER = 0.3 # metres outside the wall counted as "touching"
|
||||
W_WALL_TOUCH = 0.15 # per-sheep max penalty at wall surface. Linear ramp
|
||||
# within WALL_TOUCH_BUFFER gives the RL agent a gradient
|
||||
# signal to avoid pinning sheep against pen walls.
|
||||
# 0.15 ≈ 7.5× step_cost — strong enough to shape behavior
|
||||
# without overwhelming progress reward.
|
||||
WALL_TOUCH_BUFFER = 0.8 # metres from wall where penalty starts ramping
|
||||
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
|
||||
ALIGN_GATED = True # gate alignment on action magnitude
|
||||
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
|
||||
# PEN_CENTER. Stops the wall-corraling exploit: when a
|
||||
# sheep is shoved south past y=-8 outside the pen x-range,
|
||||
# distance to PEN_ENTRY grows (since target is at y=-8),
|
||||
# so progress reward goes negative instead of positive.
|
||||
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead
|
||||
# of PEN_CENTER for progress/obs. Intended to fix wall-
|
||||
# corralling but collapsed n_sheep≥2 success rate.
|
||||
# The wall-touch gradient penalty handles wall avoidance
|
||||
# without breaking the core herding signal.
|
||||
|
||||
# Initial sheep spawn: first sheep placed anywhere; rest within CLUSTER_RADIUS
|
||||
# of it. Set to None for legacy uniform-scatter behaviour.
|
||||
@@ -406,16 +407,25 @@ class HerdingEnv(gym.Env):
|
||||
else:
|
||||
alignment = 0.0
|
||||
|
||||
# Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
|
||||
# Wall-touch penalty: distance-based gradient covering all 3 solid pen
|
||||
# walls (west, east, south). Linearly ramps from 0 at buffer edge to
|
||||
# W_WALL_TOUCH at the wall surface — gives the agent a smooth signal
|
||||
# to avoid pinning sheep against walls.
|
||||
if self.W_WALL_TOUCH and active.any():
|
||||
pts = self.sheep_pos[:self.n_sheep][active]
|
||||
px0, px1 = self.PEN_X
|
||||
py0, py1 = self.PEN_Y
|
||||
in_y = (pts[:, 1] > py0) & (pts[:, 1] < py1)
|
||||
near_w = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
|
||||
near_e = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
|
||||
n_touch = int(((near_w | near_e) & in_y).sum())
|
||||
r_wall_touch = -n_touch * self.W_WALL_TOUCH
|
||||
buf = self.WALL_TOUCH_BUFFER
|
||||
far = buf + 1.0
|
||||
d_w = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
|
||||
px0 - pts[:, 0], far)
|
||||
d_e = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
|
||||
pts[:, 0] - px1, far)
|
||||
d_s = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
|
||||
py0 - pts[:, 1], far)
|
||||
d_min = np.minimum(np.minimum(d_w, d_e), d_s)
|
||||
penalties = np.maximum(0.0, 1.0 - d_min / buf) * self.W_WALL_TOUCH
|
||||
r_wall_touch = -float(penalties.sum())
|
||||
else:
|
||||
r_wall_touch = 0.0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user