diff --git a/training/herding_env.py b/training/herding_env.py
index 2a3a1c8..a1d334d 100644
--- a/training/herding_env.py
+++ b/training/herding_env.py
@@ -52,8 +52,9 @@ class HerdingEnv(gym.Env):
     # -----------------------------------------------------------------------
     # Reward weights
     # -----------------------------------------------------------------------
-    W_APPROACH   = 0.3     # dense: dog distance to nearest active sheep
-    W_SHAPING    = 0.5     # dense: mean sheep distance to pen  (was 0.01)
+    W_ALIGN      = 0.4     # dense: dog on anti-pen side of each active sheep
+    W_SHAPING    = 0.5     # dense: mean sheep distance to pen
+    W_APPROACH   = 0.1     # dense: dog within flee range of nearest sheep
     W_PEN_BONUS  = 5.0     # sparse: per sheep successfully penned
     W_COMPLETE   = 20.0    # bonus when ALL active sheep are penned
     W_STEP_COST  = 0.002   # penalty per step (encourages efficiency)
@@ -106,11 +107,8 @@ class HerdingEnv(gym.Env):
         self._step_count  = 0
         self._prev_penned = 0
 
-        # Dog: random start in the open field (not near the pen)
-        self.dog_pos = self.np_random.uniform(-8.0, 5.0, size=(2,)).astype(np.float32)
-
         # Active sheep (0 .. n_sheep-1): random non-pen positions
-        self.sheep_pos[:] = self.PEN_CENTER   # default all to pen centre
+        self.sheep_pos[:] = self.PEN_CENTER
         self.penned[:]    = True
 
         placed = 0
@@ -121,6 +119,25 @@ class HerdingEnv(gym.Env):
                 self.penned[placed]    = False
                 placed += 1
 
+        # Dog: 50 % of the time start already on the anti-pen side of the
+        # nearest sheep (within flee range) so early training gets aligned
+        # starts; the other 50 % is fully random to ensure generalisation.
+        if self.np_random.random() < 0.5:
+            # Place dog behind the first active sheep relative to the pen
+            ref = self.sheep_pos[0]
+            away = ref - self.PEN_CENTER                       # sheep→anti-pen
+            dist = float(np.linalg.norm(away))
+            if dist > 0.1:
+                away = away / dist
+            offset = away * self.np_random.uniform(2.0, self.FLEE_DIST * 0.8)
+            self.dog_pos = np.clip(
+                (ref + offset).astype(np.float32), -self.FIELD, self.FIELD
+            )
+        else:
+            self.dog_pos = self.np_random.uniform(
+                -self.FIELD * 0.8, self.FIELD * 0.8, size=(2,)
+            ).astype(np.float32)
+
         # Inactive slots (n_sheep .. MAX_SHEEP-1): already at pen centre, penned=True
 
         self.wander_ang = self.np_random.uniform(
@@ -237,20 +254,37 @@ class HerdingEnv(gym.Env):
         active_mask = ~self.penned[:self.n_sheep]
         if active_mask.any():
             active_pos = self.sheep_pos[:self.n_sheep][active_mask]
+            dists_pen  = np.linalg.norm(active_pos - self.PEN_CENTER, axis=1)
+            dists_dog  = np.linalg.norm(active_pos - self.dog_pos, axis=1)
 
-            # Sheep-to-pen shaping: encourages moving sheep toward pen
-            dists_pen = np.linalg.norm(active_pos - self.PEN_CENTER, axis=1)
-            shaping   = -(dists_pen.mean() / (2 * self.FIELD))  # ∈ [-1, 0]
+            # Sheep-to-pen shaping
+            shaping = -(dists_pen.mean() / (2 * self.FIELD))
 
-            # Dog-to-nearest-sheep approach: incentivises the dog to stay
-            # within flee range (FLEE_DIST=7m) rather than wandering away
-            dists_dog = np.linalg.norm(active_pos - self.dog_pos, axis=1)
-            approach  = -(dists_dog.min() / (2 * self.FIELD))   # ∈ [-1, 0]
+            # Approach: dog penalised for being far from nearest sheep
+            approach = -(dists_dog.min() / (2 * self.FIELD))
+
+            # Alignment: reward dog for being on the anti-pen side of each sheep.
+            # When the dog is opposite the pen relative to a sheep, that sheep
+            # flees toward the pen.  Score ∈ [-1, 1] per sheep, weighted by
+            # a proximity gate so only nearby dogs count.
+            align_scores = []
+            for s_pos, d_pen, d_dog in zip(active_pos, dists_pen, dists_dog):
+                if d_pen < 0.1 or d_dog < 0.1:
+                    continue
+                pen_dir = (self.PEN_CENTER - s_pos) / d_pen   # sheep → pen
+                dog_dir = (self.dog_pos    - s_pos) / d_dog   # sheep → dog
+                # cos(angle): +1 → dog behind sheep, -1 → dog on pen side
+                cosine    = -float(np.dot(pen_dir, dog_dir))
+                # gate: full credit inside flee range, fades beyond
+                proximity = max(0.0, 1.0 - d_dog / self.FLEE_DIST)
+                align_scores.append(cosine * proximity)
+            alignment = float(np.mean(align_scores)) if align_scores else 0.0
         else:
-            shaping = approach = 0.0
+            shaping = approach = alignment = 0.0
 
-        reward  = shaping  * self.W_SHAPING
-        reward += approach * self.W_APPROACH
+        reward  = shaping   * self.W_SHAPING
+        reward += approach  * self.W_APPROACH
+        reward += alignment * self.W_ALIGN
         reward += newly_penned * self.W_PEN_BONUS
         reward -= self.W_STEP_COST
         if n_penned == self.n_sheep: