diff --git a/training/herding_env.py b/training/herding_env.py
index 1aa7356..2f6800d 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -55,7 +55,7 @@ class HerdingEnv(gym.Env):
     # -----------------------------------------------------------------------
     W_DRIVE          = 2.0    # progress: COM moved toward pen (only when compact)
     W_COLLECT        = 4.0    # progress: radius shrank (2× stronger when scattered)
-    W_APPROACH_FAR   = 1.0    # progress: dog moved toward farthest straggler (scatter only)
+    W_HERD_POS       = 1.5    # progress: dog moved toward ideal herding position behind far1
     W_ALIGN          = 0.5    # position: dog on anti-pen side of COM (compact only)
     W_PEN_BONUS      = 10.0   # per sheep penned
     W_COMPLETE       = 100.0  # all sheep penned
@@ -89,7 +89,7 @@ class HerdingEnv(gym.Env):
         self._prev_penned    = 0
         self._prev_com_dist  = 0.0
         self._prev_radius    = 0.0
-        self._prev_dog_to_far1 = 0.0
+        self._prev_dog_to_ideal = 0.0
         self.dog_pos       = np.zeros(2, dtype=np.float32)
         self.sheep_pos     = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
         self.penned        = np.ones(self.MAX_SHEEP, dtype=bool)
@@ -160,9 +160,11 @@ class HerdingEnv(gym.Env):
         if active_mask.any():
             pts  = self.sheep_pos[:self.n_sheep][active_mask]
             far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
-            self._prev_dog_to_far1 = float(np.linalg.norm(self.dog_pos - far1))
+            self._prev_dog_to_ideal = float(
+                np.linalg.norm(self.dog_pos - self._ideal_herd_pos(com, far1))
+            )
         else:
-            self._prev_dog_to_far1 = 0.0
+            self._prev_dog_to_ideal = 0.0
 
         return self._obs(), {}
 
@@ -300,6 +302,24 @@ class HerdingEnv(gym.Env):
             active_mask.sum() / self.n_sheep,
         ], dtype=np.float32)
 
+    def _ideal_herd_pos(self, com: np.ndarray, far1: np.ndarray) -> np.ndarray:
+        """
+        Target position for the dog to push far1 toward COM:
+        just beyond far1 on the outward radial line from COM.
+        From here, the dog's approach causes far1 to flee inward.
+        """
+        d = far1 - com
+        d_norm = float(np.linalg.norm(d))
+        if d_norm > 0.5:
+            direction = d / d_norm
+        else:
+            # Sheep all together — use anti-pen direction instead
+            to_pen = self.PEN_CENTER - com
+            tp = float(np.linalg.norm(to_pen))
+            direction = -(to_pen / tp) if tp > 0.1 else np.array([0.0, -1.0], dtype=np.float32)
+        target = far1 + direction * self.FLEE_DIST * 0.8
+        return np.clip(target, -self.FIELD, self.FIELD).astype(np.float32)
+
     def _reward(self, n_penned: int, newly_penned: int) -> float:
         com, radius, _ = self._flock_stats()
         com_dist  = float(np.linalg.norm(com - self.PEN_CENTER))
@@ -316,22 +336,26 @@ class HerdingEnv(gym.Env):
         # Drive: only when compact — prevents rewarding COM movement while scattered.
         r_drive = 0.0 if scattered else drive_delta * self.W_DRIVE
 
-        # Approach-to-straggler: reward dog for closing on farthest sheep.
-        # Only in scatter phase so it doesn't override drive positioning.
-        # Gated on there being active sheep.
+        # Herding-position reward: guides dog to the ideal position BEHIND far1
+        # (on the outward radial, FLEE_DIST beyond far1 from COM).
+        # From there, advancing toward COM pushes far1 inward.
+        # Fires in scatter phase only; gives gradient even during the outward
+        # navigation arc when raw approach reward would be zero/negative.
         active_mask = ~self.penned[:self.n_sheep]
         if scattered and active_mask.any():
             pts  = self.sheep_pos[:self.n_sheep][active_mask]
             far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
-            cur_dog_to_far1 = float(np.linalg.norm(self.dog_pos - far1))
-            r_approach = (self._prev_dog_to_far1 - cur_dog_to_far1) * self.W_APPROACH_FAR
-            self._prev_dog_to_far1 = cur_dog_to_far1
+            ideal = self._ideal_herd_pos(com, far1)
+            cur_dog_to_ideal = float(np.linalg.norm(self.dog_pos - ideal))
+            r_herd_pos = (self._prev_dog_to_ideal - cur_dog_to_ideal) * self.W_HERD_POS
+            self._prev_dog_to_ideal = cur_dog_to_ideal
         else:
-            r_approach = 0.0
+            r_herd_pos = 0.0
             if active_mask.any():
                 pts  = self.sheep_pos[:self.n_sheep][active_mask]
                 far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
-                self._prev_dog_to_far1 = float(np.linalg.norm(self.dog_pos - far1))
+                ideal = self._ideal_herd_pos(com, far1)
+                self._prev_dog_to_ideal = float(np.linalg.norm(self.dog_pos - ideal))
 
         # Alignment: dog on anti-pen side of COM — only in drive phase.
         # Disabled when scattered: chasing a straggler on the pen side would be
@@ -346,7 +370,7 @@ class HerdingEnv(gym.Env):
         else:
             alignment = 0.0
 
-        reward  = r_drive + r_collect + r_approach + alignment
+        reward  = r_drive + r_collect + r_herd_pos + alignment
         reward += newly_penned * self.W_PEN_BONUS
         reward -= self.W_STEP_COST
         if n_penned == self.n_sheep: