Sheep training flock of 10 fix?
This commit is contained in:
+18
-8
@@ -53,11 +53,12 @@ class HerdingEnv(gym.Env):
|
||||
# -----------------------------------------------------------------------
|
||||
# Reward weights (progress-based potential shaping + sparse bonuses)
|
||||
# -----------------------------------------------------------------------
|
||||
W_DRIVE = 2.0 # flock COM moved toward pen (per metre, per step)
|
||||
W_COLLECT = 1.0 # flock radius shrank (per metre, per step)
|
||||
W_PEN_BONUS = 5.0 # per sheep penned
|
||||
W_COMPLETE = 20.0 # all sheep penned
|
||||
W_STEP_COST = 0.002 # time penalty
|
||||
W_DRIVE = 2.0 # flock COM moved toward pen (per metre, per step)
|
||||
W_COLLECT = 1.0 # flock radius shrank (per metre, per step)
|
||||
W_APPROACH = 0.3 # stable position signal: dog close to flock COM
|
||||
W_PEN_BONUS = 5.0 # per sheep penned
|
||||
W_COMPLETE = 20.0 # all sheep penned
|
||||
W_STEP_COST = 0.002 # time penalty
|
||||
|
||||
def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
|
||||
render_mode: str = None):
|
||||
@@ -288,13 +289,22 @@ class HerdingEnv(gym.Env):
|
||||
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
|
||||
|
||||
# Progress rewards: positive when flock moves toward pen or compacts
|
||||
drive_progress = (self._prev_com_dist - com_dist) * self.W_DRIVE
|
||||
collect_progress = (self._prev_radius - radius) * self.W_COLLECT
|
||||
drive_progress = (self._prev_com_dist - com_dist) * self.W_DRIVE
|
||||
collect_progress = (self._prev_radius - radius) * self.W_COLLECT
|
||||
|
||||
self._prev_com_dist = com_dist
|
||||
self._prev_radius = radius
|
||||
|
||||
reward = drive_progress + collect_progress
|
||||
# Approach: stable position signal so the dog has a gradient toward
|
||||
# the flock even when the sheep are not actively fleeing
|
||||
active_mask = ~self.penned[:self.n_sheep]
|
||||
if active_mask.any():
|
||||
dog_to_com = float(np.linalg.norm(self.dog_pos - com))
|
||||
approach = -(dog_to_com / (2 * self.FIELD)) * self.W_APPROACH
|
||||
else:
|
||||
approach = 0.0
|
||||
|
||||
reward = drive_progress + collect_progress + approach
|
||||
reward += newly_penned * self.W_PEN_BONUS
|
||||
reward -= self.W_STEP_COST
|
||||
if n_penned == self.n_sheep:
|
||||
|
||||
Reference in New Issue
Block a user