From 287743709a6b3064f692d928e45a47efda9f3847 Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Sun, 26 Apr 2026 02:02:25 +0100
Subject: [PATCH] Approach refinement

---
 training/config.json    |  4 ++--
 training/herding_env.py | 11 +++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/training/config.json b/training/config.json
index 3c0774e..d29068c 100644
--- a/training/config.json
+++ b/training/config.json
@@ -5,8 +5,8 @@
     "W_COMPLETE": 100.0,
     "W_STEP_COST": 0.02,
     "W_COMPACT": 0.0,
-    "W_WALL_TOUCH": 0.15,
-    "WALL_TOUCH_BUFFER": 0.8,
+    "W_WALL_TOUCH": 0.04,
+    "WALL_TOUCH_BUFFER": 0.3,
     "ALIGN_SHAPE": "standoff",
     "ALIGN_GATED": true,
     "ENTRY_AWARE": false,
diff --git a/training/herding_env.py b/training/herding_env.py
index 6665906..dd647e7 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -61,12 +61,11 @@ class HerdingEnv(gym.Env):
     W_COMPLETE  = 100.0  # all sheep penned
     W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing
     W_COMPACT   = 0.0    # reward for flock-radius reduction (off by default)
-    W_WALL_TOUCH = 0.15  # per-sheep max penalty at wall surface. Linear ramp
-                         # within WALL_TOUCH_BUFFER gives the RL agent a gradient
-                         # signal to avoid pinning sheep against pen walls.
-                         # 0.15 ≈ 7.5× step_cost — strong enough to shape behavior
-                         # without overwhelming progress reward.
-    WALL_TOUCH_BUFFER = 0.8   # metres from wall where penalty starts ramping
+    W_WALL_TOUCH = 0.04  # per-sheep max penalty at wall surface. Linear ramp
+                         # within WALL_TOUCH_BUFFER nudges the agent to avoid
+                         # pinning sheep against pen walls. 0.04 ≈ 2× step_cost
+                         # — noticeable but never dominates progress reward.
+    WALL_TOUCH_BUFFER = 0.3   # metres from wall where penalty starts ramping
     ALIGN_SHAPE = "standoff"   # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
     ALIGN_GATED = True   # gate alignment on action magnitude
     ENTRY_AWARE = False  # When True, targets PEN_ENTRY (entrance face) instead