Sheep training flock _ improver
This commit is contained in:
+16
-9
@@ -179,10 +179,11 @@ class HerdingEnv(gym.Env):
|
||||
newly_penned = n_penned - self._prev_penned
|
||||
self._prev_penned = n_penned
|
||||
|
||||
reward = self._reward(n_penned, newly_penned)
|
||||
reward, rcomps = self._reward(n_penned, newly_penned)
|
||||
terminated = n_penned == self.n_sheep
|
||||
truncated = self._step_count >= self.max_steps
|
||||
info = {"n_penned": n_penned, "n_sheep": self.n_sheep}
|
||||
info = {"n_penned": n_penned, "n_sheep": self.n_sheep,
|
||||
"rcomps": rcomps}
|
||||
|
||||
if self.render_mode == "human":
|
||||
self.render()
|
||||
@@ -297,7 +298,7 @@ class HerdingEnv(gym.Env):
|
||||
active_mask.sum() / self.n_sheep,
|
||||
], dtype=np.float32)
|
||||
|
||||
def _reward(self, n_penned: int, newly_penned: int) -> float:
|
||||
def _reward(self, n_penned: int, newly_penned: int):
|
||||
active = ~self.penned[:self.n_sheep]
|
||||
|
||||
# Per-sheep progress toward pen: fires whenever any sheep moves closer.
|
||||
@@ -326,12 +327,18 @@ class HerdingEnv(gym.Env):
|
||||
else:
|
||||
alignment = 0.0
|
||||
|
||||
reward = r_progress + alignment
|
||||
reward += newly_penned * self.W_PEN_BONUS
|
||||
reward -= self.W_STEP_COST
|
||||
if n_penned == self.n_sheep:
|
||||
reward += self.W_COMPLETE
|
||||
return reward
|
||||
r_pen_bonus = newly_penned * self.W_PEN_BONUS
|
||||
r_step_cost = -self.W_STEP_COST
|
||||
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
|
||||
reward = r_progress + alignment + r_pen_bonus + r_step_cost + r_complete
|
||||
rcomps = {
|
||||
"progress": float(r_progress),
|
||||
"alignment": float(alignment),
|
||||
"pen_bonus": float(r_pen_bonus),
|
||||
"step_cost": float(r_step_cost),
|
||||
"complete": float(r_complete),
|
||||
}
|
||||
return reward, rcomps
|
||||
|
||||
def _step_sheep(self, i: int) -> np.ndarray:
|
||||
"""Apply one timestep of boid dynamics to sheep i (mirrors sheep.py)."""
|
||||
|
||||
Reference in New Issue
Block a user