From b031473758501d956ad93613f5cc7e86300721cd Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Sun, 26 Apr 2026 01:09:50 +0100
Subject: [PATCH] Behaviour refinement - fence penalty

---
 training/herding_env.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/training/herding_env.py b/training/herding_env.py
index 8423032..b038551 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -61,11 +61,11 @@ class HerdingEnv(gym.Env):
     W_COMPLETE  = 100.0  # all sheep penned
     W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing
     W_COMPACT   = 0.0    # reward for flock-radius reduction (off by default)
-    W_WALL_TOUCH = 0.05  # per-sheep, per-step penalty when an active sheep is
-                         # pinned against the outside of a pen W/E wall. Direct
-                         # signal against the wall-corraling exploit so the
-                         # policy generalises better to Webots pillared walls.
-    WALL_TOUCH_BUFFER = 0.5   # metres outside the wall counted as "touching"
+    W_WALL_TOUCH = 0.01  # per-sheep, per-step penalty when an active sheep is
+                         # pinned against the outside of a pen W/E wall. Kept
+                         # small (<step_cost) so the dog isn't incentivised to
+                         # hover above the entrance to avoid the penalty.
+    WALL_TOUCH_BUFFER = 0.3   # metres outside the wall counted as "touching"
     ALIGN_SHAPE = "standoff"   # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
     ALIGN_GATED = True   # gate alignment on action magnitude
     ENTRY_AWARE = True   # progress reward targets PEN_ENTRY (entrance face), not