Behaviour refinement - fence penalty

2026-04-25 23:42:02 +01:00
parent 6612dbc1ba
commit 6253850620
1 changed files with 27 additions and 8 deletions
@@ -61,6 +61,11 @@ class HerdingEnv(gym.Env):
    W_COMPLETE  = 100.0  # all sheep penned
    W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing
    W_COMPACT   = 0.0    # reward for flock-radius reduction (off by default)
+    W_WALL_TOUCH = 0.05  # per-sheep, per-step penalty when an active sheep is
+                         # pinned against the outside of a pen W/E wall. Direct
+                         # signal against the wall-corraling exploit so the
+                         # policy generalises better to Webots pillared walls.
+    WALL_TOUCH_BUFFER = 0.5   # metres outside the wall counted as "touching"
    ALIGN_SHAPE = "standoff"   # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
    ALIGN_GATED = True   # gate alignment on action magnitude
    ENTRY_AWARE = True   # progress reward targets PEN_ENTRY (entrance face), not
@@ -401,6 +406,19 @@ class HerdingEnv(gym.Env):
        else:
            alignment = 0.0

+        # Wall-touch penalty: count active sheep pinned against outside W/E pen walls.
+        if self.W_WALL_TOUCH and active.any():
+            pts = self.sheep_pos[:self.n_sheep][active]
+            px0, px1 = self.PEN_X
+            py0, py1 = self.PEN_Y
+            in_y     = (pts[:, 1] > py0) & (pts[:, 1] < py1)
+            near_w   = (pts[:, 0] < px0) & (pts[:, 0] > px0 - self.WALL_TOUCH_BUFFER)
+            near_e   = (pts[:, 0] > px1) & (pts[:, 0] < px1 + self.WALL_TOUCH_BUFFER)
+            n_touch  = int(((near_w | near_e) & in_y).sum())
+            r_wall_touch = -n_touch * self.W_WALL_TOUCH
+        else:
+            r_wall_touch = 0.0
+
        # Compactness shaping: reward decreases in flock radius (active sheep only)
        if self.W_COMPACT and active.any():
            cur_radius = float(np.linalg.norm(
@@ -414,12 +432,13 @@ class HerdingEnv(gym.Env):
        r_pen_bonus  = newly_penned * self.W_PEN_BONUS
        r_step_cost  = -self.W_STEP_COST
        r_complete   = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
-        reward = (r_progress + alignment + r_compact + r_pen_bonus
-                  + r_step_cost + r_complete)
+        reward = (r_progress + alignment + r_compact + r_wall_touch
+                  + r_pen_bonus + r_step_cost + r_complete)
        rcomps = {
            "progress":   float(r_progress),
            "alignment":  float(alignment),
            "compact":    float(r_compact),
+            "wall_touch": float(r_wall_touch),
            "pen_bonus":  float(r_pen_bonus),
            "step_cost":  float(r_step_cost),
            "complete":   float(r_complete),