Sheep training flock of 10 fix?

2026-04-23 20:41:48 +01:00
parent 81dc2aca01
commit a13f5d0ff0
1 changed files with 18 additions and 8 deletions
@@ -55,6 +55,7 @@ class HerdingEnv(gym.Env):
    # -----------------------------------------------------------------------
    W_DRIVE     = 2.0    # flock COM moved toward pen (per metre, per step)
    W_COLLECT   = 1.0   # flock radius shrank (per metre, per step)
    W_APPROACH  = 0.3   # stable position signal: dog close to flock COM
    W_PEN_BONUS = 5.0   # per sheep penned
    W_COMPLETE  = 20.0  # all sheep penned
    W_STEP_COST = 0.002 # time penalty
@@ -294,7 +295,16 @@ class HerdingEnv(gym.Env):
        self._prev_com_dist = com_dist
        self._prev_radius   = radius
-        reward  = drive_progress + collect_progress
+        # Approach: stable position signal so the dog has a gradient toward
        # the flock even when the sheep are not actively fleeing
        active_mask = ~self.penned[:self.n_sheep]
        if active_mask.any():
            dog_to_com = float(np.linalg.norm(self.dog_pos - com))
            approach   = -(dog_to_com / (2 * self.FIELD)) * self.W_APPROACH
        else:
            approach = 0.0
        reward  = drive_progress + collect_progress + approach
        reward += newly_penned * self.W_PEN_BONUS
        reward -= self.W_STEP_COST
        if n_penned == self.n_sheep: