From b77f36b713773390513cd2ecc6a0fee91ddc6e01 Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Fri, 24 Apr 2026 23:38:09 +0100
Subject: [PATCH] Sheep training flock _ improver

---
 training/herding_env.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/training/herding_env.py b/training/herding_env.py
index 84f7519..9beb0c8 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -54,6 +54,8 @@ class HerdingEnv(gym.Env):
     # Reward weights  (simple per-sheep progress — no phases, no gating)
     # -----------------------------------------------------------------------
     W_PER_SHEEP = 2.0    # progress: sum of per-sheep distance-to-pen reductions
+    W_ALIGN     = 0.05   # dog on anti-pen side of COM — directional hint only,
+                         # kept tiny so sit-still is never profitable vs completion
     W_PEN_BONUS = 10.0   # per sheep penned
     W_COMPLETE  = 100.0  # all sheep penned
     W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing
@@ -312,7 +314,19 @@ class HerdingEnv(gym.Env):
         else:
             r_progress = 0.0
 
-        reward  = r_progress
+        com, _, _ = self._flock_stats()
+        com_dist  = float(np.linalg.norm(com - self.PEN_CENTER))
+        d_dog_com = float(np.linalg.norm(self.dog_pos - com))
+        if d_dog_com > 0.1 and com_dist > 0.1:
+            pen_dir   = (self.PEN_CENTER - com) / com_dist
+            dog_dir   = (self.dog_pos    - com) / d_dog_com
+            cosine    = -float(np.dot(pen_dir, dog_dir))
+            proximity = max(0.0, 1.0 - d_dog_com / self.FLEE_DIST)
+            alignment = cosine * proximity * self.W_ALIGN
+        else:
+            alignment = 0.0
+
+        reward  = r_progress + alignment
         reward += newly_penned * self.W_PEN_BONUS
         reward -= self.W_STEP_COST
         if n_penned == self.n_sheep:
@@ -363,17 +377,6 @@ class HerdingEnv(gym.Env):
         if pos[1] >  F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
 
 
-        # Pen exterior wall avoidance: mirrors sheep.py.
-        # Only fires when strictly outside the pen at pen height.
-        EM = 0.8
-        px0, px1 = self.PEN_X[0], self.PEN_X[1]
-        py0, py1 = self.PEN_Y[0], self.PEN_Y[1]
-        if py0 < pos[1] < py1:
-            if px0 - EM < pos[0] < px0:
-                fx -= ((pos[0] - (px0 - EM)) / EM) * 6.0
-            if px1 < pos[0] < px1 + EM:
-                fx += ((px1 + EM - pos[0]) / EM) * 6.0
-
         # Hard-stop clamp: mirrors sheep.py — zero any force driving further
         # into the wall within 0.5 m so the flee force cannot pin the sheep.
         HS = 0.5