Cleanup and new approach

This commit is contained in:
Johnny Fernandes
2026-04-26 01:50:01 +01:00
parent b031473758
commit 61f8a7db15
139 changed files with 510 additions and 16170 deletions
+26 -16
View File
@@ -61,18 +61,19 @@ class HerdingEnv(gym.Env):
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
W_WALL_TOUCH = 0.01 # per-sheep, per-step penalty when an active sheep is
# pinned against the outside of a pen W/E wall. Kept
# small (<step_cost) so the dog isn't incentivised to
# hover above the entrance to avoid the penalty.
WALL_TOUCH_BUFFER = 0.3 # metres outside the wall counted as "touching"
W_WALL_TOUCH = 0.15 # per-sheep max penalty at wall surface. Linear ramp
# within WALL_TOUCH_BUFFER gives the RL agent a gradient
# signal to avoid pinning sheep against pen walls.
# 0.15 ≈ 7.5× step_cost — strong enough to shape behavior
# without overwhelming progress reward.
WALL_TOUCH_BUFFER = 0.8 # metres from wall where penalty starts ramping
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
ALIGN_GATED = True # gate alignment on action magnitude
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not
# PEN_CENTER. Stops the wall-corraling exploit: when a
# sheep is shoved south past y=-8 outside the pen x-range,
# distance to PEN_ENTRY grows (since target is at y=-8),
# so progress reward goes negative instead of positive.
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead
# of PEN_CENTER for progress/obs. Intended to fix wall-
# corralling but collapsed n_sheep≥2 success rate.
# The wall-touch gradient penalty handles wall avoidance
# without breaking the core herding signal.
# Initial sheep spawn: first sheep placed anywhere; rest within CLUSTER_RADIUS
# of it. Set to None for legacy uniform-scatter behaviour.
@@ -406,16 +407,25 @@ class HerdingEnv(gym.Env):
else:
alignment = 0.0
# Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
# Wall-touch penalty: distance-based gradient covering all 3 solid pen
# walls (west, east, south). Linearly ramps from 0 at buffer edge to
# W_WALL_TOUCH at the wall surface — gives the agent a smooth signal
# to avoid pinning sheep against walls.
if self.W_WALL_TOUCH and active.any():
pts = self.sheep_pos[:self.n_sheep][active]
px0, px1 = self.PEN_X
py0, py1 = self.PEN_Y
in_y = (pts[:, 1] > py0) & (pts[:, 1] < py1)
near_w = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
near_e = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
n_touch = int(((near_w | near_e) & in_y).sum())
r_wall_touch = -n_touch * self.W_WALL_TOUCH
buf = self.WALL_TOUCH_BUFFER
far = buf + 1.0
d_w = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
px0 - pts[:, 0], far)
d_e = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1),
pts[:, 0] - px1, far)
d_s = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1),
py0 - pts[:, 1], far)
d_min = np.minimum(np.minimum(d_w, d_e), d_s)
penalties = np.maximum(0.0, 1.0 - d_min / buf) * self.W_WALL_TOUCH
r_wall_touch = -float(penalties.sum())
else:
r_wall_touch = 0.0