From 02b20fbdb48f3064a54db41330e539b4dcf444d1 Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Sat, 25 Apr 2026 12:20:42 +0100
Subject: [PATCH] Sheep training flock _ improver

---
 training/herding_env.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/training/herding_env.py b/training/herding_env.py
index 440d319..5fe5744 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -54,8 +54,9 @@ class HerdingEnv(gym.Env):
     # Reward weights  (simple per-sheep progress — no phases, no gating)
     # -----------------------------------------------------------------------
     W_PER_SHEEP = 2.0    # progress: sum of per-sheep distance-to-pen reductions
-    W_ALIGN     = 0.05   # dog on anti-pen side of COM — directional hint only,
-                         # kept tiny so sit-still is never profitable vs completion
+    W_ALIGN     = 0.0    # disabled: created a sit-still trap from n_sheep≥2.
+                         # Progress reward already encodes "be on anti-pen side"
+                         # implicitly (sheep flee toward pen → positive progress).
     W_PEN_BONUS = 10.0   # per sheep penned
     W_COMPLETE  = 100.0  # all sheep penned
     W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing