Sheep training flock of 10 fix?

This commit is contained in:
Johnny Fernandes
2026-04-24 17:31:11 +01:00
parent c2da9c10e4
commit 4d7f365358
3 changed files with 4 additions and 18 deletions
+2 -16
View File
@@ -54,10 +54,9 @@ class HerdingEnv(gym.Env):
# Reward weights (simple per-sheep progress — no phases, no gating)
# -----------------------------------------------------------------------
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
W_ALIGN = 0.3 # position: dog on anti-pen side of COM (small, directional hint)
W_PEN_BONUS = 10.0 # per sheep penned
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.002 # time penalty
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
render_mode: str = None, random_n_sheep: bool = False):
@@ -309,20 +308,7 @@ class HerdingEnv(gym.Env):
else:
r_progress = 0.0
# Small alignment hint: reward dog for being on anti-pen side of COM.
com, _, _ = self._flock_stats()
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
d_dog_com = float(np.linalg.norm(self.dog_pos - com))
if d_dog_com > 0.1 and com_dist > 0.1:
pen_dir = (self.PEN_CENTER - com) / com_dist
dog_dir = (self.dog_pos - com) / d_dog_com
cosine = -float(np.dot(pen_dir, dog_dir))
proximity = max(0.0, 1.0 - d_dog_com / self.FLEE_DIST)
alignment = cosine * proximity * self.W_ALIGN
else:
alignment = 0.0
reward = r_progress + alignment
reward = r_progress
reward += newly_penned * self.W_PEN_BONUS
reward -= self.W_STEP_COST
if n_penned == self.n_sheep: