Approach refinement
This commit is contained in:
@@ -5,8 +5,8 @@
|
||||
"W_COMPLETE": 100.0,
|
||||
"W_STEP_COST": 0.02,
|
||||
"W_COMPACT": 0.0,
|
||||
"W_WALL_TOUCH": 0.04,
|
||||
"WALL_TOUCH_BUFFER": 0.3,
|
||||
"W_WALL_TOUCH": 0.01,
|
||||
"WALL_TOUCH_BUFFER": 0.4,
|
||||
"ALIGN_SHAPE": "standoff",
|
||||
"ALIGN_GATED": true,
|
||||
"ENTRY_AWARE": false,
|
||||
|
||||
+22
-13
@@ -61,11 +61,11 @@ class HerdingEnv(gym.Env):
|
||||
W_COMPLETE = 100.0 # all sheep penned
|
||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
||||
W_WALL_TOUCH = 0.04 # per-sheep max penalty at wall surface. Linear ramp
|
||||
# within WALL_TOUCH_BUFFER nudges the agent to avoid
|
||||
# pinning sheep against pen walls. 0.04 ≈ 2× step_cost
|
||||
# — noticeable but never dominates progress reward.
|
||||
WALL_TOUCH_BUFFER = 0.3 # metres from wall where penalty starts ramping
|
||||
W_WALL_TOUCH = 0.01 # per-sheep max penalty at wall surface. Linear ramp
|
||||
# within WALL_TOUCH_BUFFER. Covers field outer walls and
|
||||
# pen W/E/S walls. Kept small (≈ step_cost/2) so it
|
||||
# nudges away from walls without dominating progress.
|
||||
WALL_TOUCH_BUFFER = 0.4 # metres from wall where penalty starts ramping
|
||||
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
|
||||
ALIGN_GATED = True # gate alignment on action magnitude
|
||||
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead
|
||||
@@ -406,23 +406,32 @@ class HerdingEnv(gym.Env):
|
||||
else:
|
||||
alignment = 0.0
|
||||
|
||||
# Wall-touch penalty: distance-based gradient covering all 3 solid pen
|
||||
# walls (west, east, south). Linearly ramps from 0 at buffer edge to
|
||||
# W_WALL_TOUCH at the wall surface — gives the agent a smooth signal
|
||||
# to avoid pinning sheep against walls.
|
||||
# Wall-touch penalty: distance-based gradient covering ALL solid surfaces
|
||||
# the sheep can hit — the four field outer walls (always present) plus
|
||||
# the three solid pen walls (west, east, south). Linearly ramps from 0
|
||||
# at buffer edge to W_WALL_TOUCH at the wall surface. Goal: sheep should
|
||||
# never end up pinned against any wall (transfer concern: Webots fences
|
||||
# have pillars that can physically trap sheep).
|
||||
if self.W_WALL_TOUCH and active.any():
|
||||
pts = self.sheep_pos[:self.n_sheep][active]
|
||||
px0, px1 = self.PEN_X
|
||||
py0, py1 = self.PEN_Y
|
||||
F = self.FIELD
|
||||
buf = self.WALL_TOUCH_BUFFER
|
||||
far = buf + 1.0
|
||||
d_w = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
|
||||
# Field outer walls — sheep is always inside [-F, F]^2.
|
||||
d_fw = pts[:, 0] - (-F) # distance to west field wall
|
||||
d_fe = F - pts[:, 0] # east field wall
|
||||
d_fs = pts[:, 1] - (-F) # south field wall
|
||||
d_fn = F - pts[:, 1] # north field wall
|
||||
# Pen W/E/S walls — only relevant approached from outside.
|
||||
d_pw = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
|
||||
px0 - pts[:, 0], far)
|
||||
d_e = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
|
||||
d_pe = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
|
||||
pts[:, 0] - px1, far)
|
||||
d_s = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
|
||||
d_ps = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
|
||||
py0 - pts[:, 1], far)
|
||||
d_min = np.minimum(np.minimum(d_w, d_e), d_s)
|
||||
d_min = np.minimum.reduce([d_fw, d_fe, d_fs, d_fn, d_pw, d_pe, d_ps])
|
||||
penalties = np.maximum(0.0, 1.0 - d_min / buf) * self.W_WALL_TOUCH
|
||||
r_wall_touch = -float(penalties.sum())
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user