From b3251fcca3e6324bde044ed196be7d47aabe59e6 Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Fri, 24 Apr 2026 22:46:51 +0100
Subject: [PATCH] Sheep training flock _ improver

---
 controllers/sheep/sheep.py | 22 ++++++++++------------
 training/herding_env.py    | 35 ++++++++++++++---------------------
 2 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/controllers/sheep/sheep.py b/controllers/sheep/sheep.py
index 010b28e..7b2f1c8 100644
--- a/controllers/sheep/sheep.py
+++ b/controllers/sheep/sheep.py
@@ -133,18 +133,16 @@ while robot.step(timestep) != -1:
 
     fx, fy = 0.0, 0.0
 
-    # Outside the pen: repel from the exterior of the side and back walls so
-    # sheep don't get pinned against them when fleeing from the dog.
-    # The pen entrance is open on the north (y > PEN_Y_MAX), so only push away
-    # from the west (x≈PEN_X_MIN), east (x≈PEN_X_MAX), and south (y≈PEN_Y_MIN) exteriors.
-    PEN_EXT_MARGIN = 1.2
-    if not penned:
-        if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x < PEN_X_MIN + PEN_EXT_MARGIN:
-            fx -= ((PEN_X_MIN + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 8.0
-        if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x > PEN_X_MAX - PEN_EXT_MARGIN:
-            fx += ((x - (PEN_X_MAX - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 8.0
-        if y < PEN_Y_MIN + PEN_EXT_MARGIN and PEN_X_MIN < x < PEN_X_MAX:
-            fy += ((PEN_Y_MIN + PEN_EXT_MARGIN - y) / PEN_EXT_MARGIN) * 8.0
+    # Repel unpenned sheep from the exterior of the pen's side walls so they
+    # don't get pinned by flee forces. Only fires when strictly outside the pen
+    # (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range).
+    # Entrance is open on the north (y > PEN_Y_MAX) — no force there.
+    PEN_EXT_MARGIN = 0.8
+    if not penned and PEN_Y_MIN < y < PEN_Y_MAX:
+        if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN:
+            fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0
+        if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN:
+            fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0
 
     if penned:
         # Inside pen: wander freely, strong boundary forces prevent exit,
diff --git a/training/herding_env.py b/training/herding_env.py
index 0d56527..84f7519 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -53,12 +53,10 @@ class HerdingEnv(gym.Env):
     # -----------------------------------------------------------------------
     # Reward weights  (simple per-sheep progress — no phases, no gating)
     # -----------------------------------------------------------------------
-    W_PER_SHEEP   = 2.0   # progress: sum of per-sheep distance-to-pen reductions
-    W_SCATTER_PEN = 0.5   # penalty per metre the active flock radius exceeds threshold
-    SCATTER_THRESH = 8.0  # metres — allow natural spread, penalise excessive scatter
-    W_PEN_BONUS   = 10.0  # per sheep penned
-    W_COMPLETE    = 100.0 # all sheep penned
-    W_STEP_COST   = 0.02  # time penalty — strong enough to punish doing nothing
+    W_PER_SHEEP = 2.0    # progress: sum of per-sheep distance-to-pen reductions
+    W_PEN_BONUS = 10.0   # per sheep penned
+    W_COMPLETE  = 100.0  # all sheep penned
+    W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing
 
     def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
                  render_mode: str = None, random_n_sheep: bool = False):
@@ -314,12 +312,7 @@ class HerdingEnv(gym.Env):
         else:
             r_progress = 0.0
 
-        # Soft scatter penalty: discourages abandoning the remaining active flock.
-        # Only fires when radius exceeds threshold so normal spread isn't punished.
-        _, radius, _ = self._flock_stats()
-        r_scatter = -max(0.0, radius - self.SCATTER_THRESH) * self.W_SCATTER_PEN
-
-        reward  = r_progress + r_scatter
+        reward  = r_progress
         reward += newly_penned * self.W_PEN_BONUS
         reward -= self.W_STEP_COST
         if n_penned == self.n_sheep:
@@ -369,17 +362,17 @@ class HerdingEnv(gym.Env):
         if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
         if pos[1] >  F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
 
-        # Pen exterior wall avoidance — mirrors sheep.py addition.
-        # Prevents sheep getting pinned against the pen side/back walls when fleeing.
-        EM = 1.2
+
+        # Pen exterior wall avoidance: mirrors sheep.py.
+        # Only fires when strictly outside the pen at pen height.
+        EM = 0.8
         px0, px1 = self.PEN_X[0], self.PEN_X[1]
         py0, py1 = self.PEN_Y[0], self.PEN_Y[1]
-        if py0 - EM < pos[1] < py1 and pos[0] < px0 + EM:
-            fx -= ((px0 + EM - pos[0]) / EM) * 8.0
-        if py0 - EM < pos[1] < py1 and pos[0] > px1 - EM:
-            fx += ((pos[0] - (px1 - EM)) / EM) * 8.0
-        if pos[1] < py0 + EM and px0 < pos[0] < px1:
-            fy += ((py0 + EM - pos[1]) / EM) * 8.0
+        if py0 < pos[1] < py1:
+            if px0 - EM < pos[0] < px0:
+                fx -= ((pos[0] - (px0 - EM)) / EM) * 6.0
+            if px1 < pos[0] < px1 + EM:
+                fx += ((px1 + EM - pos[0]) / EM) * 6.0
 
         # Hard-stop clamp: mirrors sheep.py — zero any force driving further
         # into the wall within 0.5 m so the flee force cannot pin the sheep.