Sheep training flock _ improver

This commit is contained in:
Johnny Fernandes
2026-04-25 11:31:39 +01:00
parent 062de676c9
commit fbe76a0d04
3 changed files with 190 additions and 24 deletions
+16 -9
View File
@@ -179,10 +179,11 @@ class HerdingEnv(gym.Env):
newly_penned = n_penned - self._prev_penned
self._prev_penned = n_penned
reward = self._reward(n_penned, newly_penned)
reward, rcomps = self._reward(n_penned, newly_penned)
terminated = n_penned == self.n_sheep
truncated = self._step_count >= self.max_steps
info = {"n_penned": n_penned, "n_sheep": self.n_sheep}
info = {"n_penned": n_penned, "n_sheep": self.n_sheep,
"rcomps": rcomps}
if self.render_mode == "human":
self.render()
@@ -297,7 +298,7 @@ class HerdingEnv(gym.Env):
active_mask.sum() / self.n_sheep,
], dtype=np.float32)
def _reward(self, n_penned: int, newly_penned: int) -> float:
def _reward(self, n_penned: int, newly_penned: int):
active = ~self.penned[:self.n_sheep]
# Per-sheep progress toward pen: fires whenever any sheep moves closer.
@@ -326,12 +327,18 @@ class HerdingEnv(gym.Env):
else:
alignment = 0.0
reward = r_progress + alignment
reward += newly_penned * self.W_PEN_BONUS
reward -= self.W_STEP_COST
if n_penned == self.n_sheep:
reward += self.W_COMPLETE
return reward
r_pen_bonus = newly_penned * self.W_PEN_BONUS
r_step_cost = -self.W_STEP_COST
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
reward = r_progress + alignment + r_pen_bonus + r_step_cost + r_complete
rcomps = {
"progress": float(r_progress),
"alignment": float(alignment),
"pen_bonus": float(r_pen_bonus),
"step_cost": float(r_step_cost),
"complete": float(r_complete),
}
return reward, rcomps
def _step_sheep(self, i: int) -> np.ndarray:
"""Apply one timestep of boid dynamics to sheep i (mirrors sheep.py)."""