Sheep training flock _ improver

2026-04-25 12:20:42 +01:00
parent 433652cb94
commit 02b20fbdb4
1 changed files with 3 additions and 2 deletions
@@ -54,8 +54,9 @@ class HerdingEnv(gym.Env):
    # Reward weights  (simple per-sheep progress — no phases, no gating)
    # -----------------------------------------------------------------------
    W_PER_SHEEP = 2.0    # progress: sum of per-sheep distance-to-pen reductions
-    W_ALIGN     = 0.05   # dog on anti-pen side of COM — directional hint only,
+    W_ALIGN     = 0.0    # disabled: created a sit-still trap from n_sheep≥2.
-                         # kept tiny so sit-still is never profitable vs completion
+                         # Progress reward already encodes "be on anti-pen side"
                         # implicitly (sheep flee toward pen → positive progress).
    W_PEN_BONUS = 10.0   # per sheep penned
    W_COMPLETE  = 100.0  # all sheep penned
    W_STEP_COST = 0.02   # time penalty — strong enough to punish doing nothing