From b77f36b713773390513cd2ecc6a0fee91ddc6e01 Mon Sep 17 00:00:00 2001 From: Johnny Fernandes Date: Fri, 24 Apr 2026 23:38:09 +0100 Subject: [PATCH] Sheep training flock _ improver --- training/herding_env.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/training/herding_env.py b/training/herding_env.py index 84f7519..9beb0c8 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -54,6 +54,8 @@ class HerdingEnv(gym.Env): # Reward weights (simple per-sheep progress — no phases, no gating) # ----------------------------------------------------------------------- W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions + W_ALIGN = 0.05 # dog on anti-pen side of COM — directional hint only, + # kept tiny so sit-still is never profitable vs completion W_PEN_BONUS = 10.0 # per sheep penned W_COMPLETE = 100.0 # all sheep penned W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing @@ -312,7 +314,19 @@ class HerdingEnv(gym.Env): else: r_progress = 0.0 - reward = r_progress + com, _, _ = self._flock_stats() + com_dist = float(np.linalg.norm(com - self.PEN_CENTER)) + d_dog_com = float(np.linalg.norm(self.dog_pos - com)) + if d_dog_com > 0.1 and com_dist > 0.1: + pen_dir = (self.PEN_CENTER - com) / com_dist + dog_dir = (self.dog_pos - com) / d_dog_com + cosine = -float(np.dot(pen_dir, dog_dir)) + proximity = max(0.0, 1.0 - d_dog_com / self.FLEE_DIST) + alignment = cosine * proximity * self.W_ALIGN + else: + alignment = 0.0 + + reward = r_progress + alignment reward += newly_penned * self.W_PEN_BONUS reward -= self.W_STEP_COST if n_penned == self.n_sheep: @@ -363,17 +377,6 @@ class HerdingEnv(gym.Env): if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0 - # Pen exterior wall avoidance: mirrors sheep.py. - # Only fires when strictly outside the pen at pen height. - EM = 0.8 - px0, px1 = self.PEN_X[0], self.PEN_X[1] - py0, py1 = self.PEN_Y[0], self.PEN_Y[1] - if py0 < pos[1] < py1: - if px0 - EM < pos[0] < px0: - fx -= ((pos[0] - (px0 - EM)) / EM) * 6.0 - if px1 < pos[0] < px1 + EM: - fx += ((px1 + EM - pos[0]) / EM) * 6.0 - # Hard-stop clamp: mirrors sheep.py — zero any force driving further # into the wall within 0.5 m so the flee force cannot pin the sheep. HS = 0.5