Sheep training flock of 10 fix?

2026-04-24 14:54:20 +01:00
parent 17eb25864e
commit fcfa2c35c8
4 changed files with 342 additions and 33 deletions
@@ -56,7 +56,7 @@ class HerdingEnv(gym.Env):
    W_DRIVE          = 2.0    # progress: COM moved toward pen (only when compact)
    W_COLLECT        = 4.0    # progress: radius shrank (2× stronger when scattered)
    W_ALIGN          = 0.5    # position: dog on anti-pen side of COM
-    W_COMPACT_BONUS  = 0.1    # per-step bonus for staying compact (sustained signal)
+    W_COMPACT_BONUS  = 0.0    # disabled: 0.1/step over 4000 steps = 400 >> W_COMPLETE=100
    W_PEN_BONUS      = 10.0   # per sheep penned
    W_COMPLETE       = 100.0  # all sheep penned
    W_STEP_COST      = 0.002  # time penalty
@@ -72,11 +72,11 @@ class HerdingEnv(gym.Env):
        self.render_mode    = render_mode
        self.random_n_sheep = random_n_sheep   # if True, randomise n_sheep each reset

-        # Fixed 13-dim observation regardless of n_sheep:
-        #   dog_pos(2) + rel_com(2) + rel_far(2) + com_to_pen(2)
-        #   + far_to_pen(2) + radius(1) + second_far_dist(1) + frac_penned(1)
+        # Fixed 17-dim observation regardless of n_sheep:
+        #   dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2)
+        #   + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1)
        self.observation_space = spaces.Box(
-            low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32
+            low=-np.inf, high=np.inf, shape=(17,), dtype=np.float32
        )

        # Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
@@ -269,29 +269,25 @@ class HerdingEnv(gym.Env):
            pts   = self.sheep_pos[:self.n_sheep][active_mask]
            dists = np.linalg.norm(pts - com, axis=1)
            sorted_idx = np.argsort(dists)[::-1]   # farthest first
-            far  = pts[sorted_idx[0]]
-            # 2nd farthest — if only 1 active sheep, reuse the same position
-            far2 = pts[sorted_idx[1]] if len(sorted_idx) > 1 else far
-            second_far_dist = float(dists[sorted_idx[1]]) if len(sorted_idx) > 1 else 0.0
+            # Top-3 stragglers; pad with COM when fewer active sheep exist
+            def nth(n):
+                return pts[sorted_idx[n]] if len(sorted_idx) > n else com
+            far1, far2, far3 = nth(0), nth(1), nth(2)
        else:
-            far = far2 = self.PEN_CENTER.copy()
-            second_far_dist = 0.0
+            far1 = far2 = far3 = self.PEN_CENTER.copy()

        S = self.FIELD
        D = 2 * self.FIELD

        return np.array([
            self.dog_pos[0] / S,  self.dog_pos[1] / S,
-            (com[0] - self.dog_pos[0]) / D,
-            (com[1] - self.dog_pos[1]) / D,
-            (far[0] - self.dog_pos[0]) / D,
-            (far[1] - self.dog_pos[1]) / D,
-            (self.PEN_CENTER[0] - com[0]) / D,
-            (self.PEN_CENTER[1] - com[1]) / D,
-            (self.PEN_CENTER[0] - far[0]) / D,
-            (self.PEN_CENTER[1] - far[1]) / D,
-            radius          / D,
-            second_far_dist / D,   # replaced mean_disp: 2nd farthest sheep from COM
+            (com[0]  - self.dog_pos[0]) / D, (com[1]  - self.dog_pos[1]) / D,
+            (far1[0] - self.dog_pos[0]) / D, (far1[1] - self.dog_pos[1]) / D,
+            (far2[0] - self.dog_pos[0]) / D, (far2[1] - self.dog_pos[1]) / D,
+            (far3[0] - self.dog_pos[0]) / D, (far3[1] - self.dog_pos[1]) / D,
+            (self.PEN_CENTER[0] - com[0])  / D, (self.PEN_CENTER[1] - com[1])  / D,
+            (self.PEN_CENTER[0] - far1[0]) / D, (self.PEN_CENTER[1] - far1[1]) / D,
+            radius / D,
            active_mask.sum() / self.n_sheep,
        ], dtype=np.float32)