diff --git a/training/herding_env.py b/training/herding_env.py
index 2d62b7a..f568a51 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -53,9 +53,9 @@ class HerdingEnv(gym.Env):
     # -----------------------------------------------------------------------
     # Reward weights  (progress-based potential shaping + sparse bonuses)
     # -----------------------------------------------------------------------
-    W_DRIVE     = 2.0    # flock COM moved toward pen (per metre, per step)
-    W_COLLECT   = 1.0   # flock radius shrank (per metre, per step)
-    W_APPROACH  = 0.3   # stable position signal: dog close to flock COM
+    W_DRIVE     = 2.0   # progress: flock COM moved toward pen
+    W_COLLECT   = 0.5   # progress: flock radius shrank
+    W_ALIGN     = 0.5   # position: dog on anti-pen side of flock COM
     W_PEN_BONUS = 5.0   # per sheep penned
     W_COMPLETE  = 20.0  # all sheep penned
     W_STEP_COST = 0.002 # time penalty
@@ -288,23 +288,27 @@ class HerdingEnv(gym.Env):
         com, radius, _ = self._flock_stats()
         com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
 
-        # Progress rewards: positive when flock moves toward pen or compacts
+        # Progress rewards: positive when state improves
         drive_progress   = (self._prev_com_dist - com_dist) * self.W_DRIVE
         collect_progress = (self._prev_radius   - radius)   * self.W_COLLECT
 
         self._prev_com_dist = com_dist
         self._prev_radius   = radius
 
-        # Approach: stable position signal so the dog has a gradient toward
-        # the flock even when the sheep are not actively fleeing
-        active_mask = ~self.penned[:self.n_sheep]
-        if active_mask.any():
-            dog_to_com = float(np.linalg.norm(self.dog_pos - com))
-            approach   = -(dog_to_com / (2 * self.FIELD)) * self.W_APPROACH
+        # Alignment: reward dog for being on the anti-pen side of the flock
+        # COM, gated by proximity so only nearby positioning counts.
+        # +1 = dog directly behind flock, -1 = dog on pen side (wrong).
+        d_dog_com = float(np.linalg.norm(self.dog_pos - com))
+        if d_dog_com > 0.1 and com_dist > 0.1:
+            pen_dir = (self.PEN_CENTER - com) / com_dist       # COM → pen
+            dog_dir = (self.dog_pos    - com) / d_dog_com      # COM → dog
+            cosine    = -float(np.dot(pen_dir, dog_dir))       # +1 when opposite
+            proximity = max(0.0, 1.0 - d_dog_com / self.FLEE_DIST)
+            alignment = cosine * proximity * self.W_ALIGN
         else:
-            approach = 0.0
+            alignment = 0.0
 
-        reward  = drive_progress + collect_progress + approach
+        reward  = drive_progress + collect_progress + alignment
         reward += newly_penned * self.W_PEN_BONUS
         reward -= self.W_STEP_COST
         if n_penned == self.n_sheep: