diff --git a/training/herding_env.py b/training/herding_env.py
index ce56cc3..2d62b7a 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -53,11 +53,12 @@ class HerdingEnv(gym.Env):
     # -----------------------------------------------------------------------
     # Reward weights  (progress-based potential shaping + sparse bonuses)
     # -----------------------------------------------------------------------
-    W_DRIVE    = 2.0     # flock COM moved toward pen (per metre, per step)
-    W_COLLECT  = 1.0     # flock radius shrank (per metre, per step)
-    W_PEN_BONUS = 5.0    # per sheep penned
-    W_COMPLETE  = 20.0   # all sheep penned
-    W_STEP_COST = 0.002  # time penalty
+    W_DRIVE     = 2.0    # flock COM moved toward pen (per metre, per step)
+    W_COLLECT   = 1.0   # flock radius shrank (per metre, per step)
+    W_APPROACH  = 0.3   # stable position signal: dog close to flock COM
+    W_PEN_BONUS = 5.0   # per sheep penned
+    W_COMPLETE  = 20.0  # all sheep penned
+    W_STEP_COST = 0.002 # time penalty
 
     def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
                  render_mode: str = None):
@@ -288,13 +289,22 @@ class HerdingEnv(gym.Env):
         com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
 
         # Progress rewards: positive when flock moves toward pen or compacts
-        drive_progress   = (self._prev_com_dist - com_dist)   * self.W_DRIVE
-        collect_progress = (self._prev_radius   - radius)     * self.W_COLLECT
+        drive_progress   = (self._prev_com_dist - com_dist) * self.W_DRIVE
+        collect_progress = (self._prev_radius   - radius)   * self.W_COLLECT
 
         self._prev_com_dist = com_dist
         self._prev_radius   = radius
 
-        reward  = drive_progress + collect_progress
+        # Approach: stable position signal so the dog has a gradient toward
+        # the flock even when the sheep are not actively fleeing
+        active_mask = ~self.penned[:self.n_sheep]
+        if active_mask.any():
+            dog_to_com = float(np.linalg.norm(self.dog_pos - com))
+            approach   = -(dog_to_com / (2 * self.FIELD)) * self.W_APPROACH
+        else:
+            approach = 0.0
+
+        reward  = drive_progress + collect_progress + approach
         reward += newly_penned * self.W_PEN_BONUS
         reward -= self.W_STEP_COST
         if n_penned == self.n_sheep: