diff --git a/training/config.json b/training/config.json index d29068c..1f25dd3 100644 --- a/training/config.json +++ b/training/config.json @@ -5,8 +5,8 @@ "W_COMPLETE": 100.0, "W_STEP_COST": 0.02, "W_COMPACT": 0.0, - "W_WALL_TOUCH": 0.04, - "WALL_TOUCH_BUFFER": 0.3, + "W_WALL_TOUCH": 0.01, + "WALL_TOUCH_BUFFER": 0.4, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ENTRY_AWARE": false, diff --git a/training/herding_env.py b/training/herding_env.py index dd647e7..1e1fb4c 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -61,11 +61,11 @@ class HerdingEnv(gym.Env): W_COMPLETE = 100.0 # all sheep penned W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing W_COMPACT = 0.0 # reward for flock-radius reduction (off by default) - W_WALL_TOUCH = 0.04 # per-sheep max penalty at wall surface. Linear ramp - # within WALL_TOUCH_BUFFER nudges the agent to avoid - # pinning sheep against pen walls. 0.04 ≈ 2× step_cost - # — noticeable but never dominates progress reward. - WALL_TOUCH_BUFFER = 0.3 # metres from wall where penalty starts ramping + W_WALL_TOUCH = 0.01 # per-sheep max penalty at wall surface. Linear ramp + # within WALL_TOUCH_BUFFER. Covers field outer walls and + # pen W/E/S walls. Kept small (≈ step_cost/2) so it + # nudges away from walls without dominating progress. + WALL_TOUCH_BUFFER = 0.4 # metres from wall where penalty starts ramping ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0) ALIGN_GATED = True # gate alignment on action magnitude ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead @@ -406,23 +406,32 @@ class HerdingEnv(gym.Env): else: alignment = 0.0 - # Wall-touch penalty: distance-based gradient covering all 3 solid pen - # walls (west, east, south). Linearly ramps from 0 at buffer edge to - # W_WALL_TOUCH at the wall surface — gives the agent a smooth signal - # to avoid pinning sheep against walls. + # Wall-touch penalty: distance-based gradient covering ALL solid surfaces + # the sheep can hit — the four field outer walls (always present) plus + # the three solid pen walls (west, east, south). Linearly ramps from 0 + # at buffer edge to W_WALL_TOUCH at the wall surface. Goal: sheep should + # never end up pinned against any wall (transfer concern: Webots fences + # have pillars that can physically trap sheep). if self.W_WALL_TOUCH and active.any(): pts = self.sheep_pos[:self.n_sheep][active] px0, px1 = self.PEN_X py0, py1 = self.PEN_Y + F = self.FIELD buf = self.WALL_TOUCH_BUFFER far = buf + 1.0 - d_w = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1), - px0 - pts[:, 0], far) - d_e = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1), - pts[:, 0] - px1, far) - d_s = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1), - py0 - pts[:, 1], far) - d_min = np.minimum(np.minimum(d_w, d_e), d_s) + # Field outer walls — sheep is always inside [-F, F]^2. + d_fw = pts[:, 0] - (-F) # distance to west field wall + d_fe = F - pts[:, 0] # east field wall + d_fs = pts[:, 1] - (-F) # south field wall + d_fn = F - pts[:, 1] # north field wall + # Pen W/E/S walls — only relevant approached from outside. + d_pw = np.where((pts[:, 0] < px0) & (pts[:, 1] > py0) & (pts[:, 1] < py1), + px0 - pts[:, 0], far) + d_pe = np.where((pts[:, 0] > px1) & (pts[:, 1] > py0) & (pts[:, 1] < py1), + pts[:, 0] - px1, far) + d_ps = np.where((pts[:, 1] < py0) & (pts[:, 0] > px0) & (pts[:, 0] < px1), + py0 - pts[:, 1], far) + d_min = np.minimum.reduce([d_fw, d_fe, d_fs, d_fn, d_pw, d_pe, d_ps]) penalties = np.maximum(0.0, 1.0 - d_min / buf) * self.W_WALL_TOUCH r_wall_touch = -float(penalties.sum()) else: