Sheep training flock of 10 fix?
This commit is contained in:
+16
-12
@@ -53,9 +53,9 @@ class HerdingEnv(gym.Env):
|
|||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
# Reward weights (progress-based potential shaping + sparse bonuses)
|
# Reward weights (progress-based potential shaping + sparse bonuses)
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
W_DRIVE = 2.0 # flock COM moved toward pen (per metre, per step)
|
W_DRIVE = 2.0 # progress: flock COM moved toward pen
|
||||||
W_COLLECT = 1.0 # flock radius shrank (per metre, per step)
|
W_COLLECT = 0.5 # progress: flock radius shrank
|
||||||
W_APPROACH = 0.3 # stable position signal: dog close to flock COM
|
W_ALIGN = 0.5 # position: dog on anti-pen side of flock COM
|
||||||
W_PEN_BONUS = 5.0 # per sheep penned
|
W_PEN_BONUS = 5.0 # per sheep penned
|
||||||
W_COMPLETE = 20.0 # all sheep penned
|
W_COMPLETE = 20.0 # all sheep penned
|
||||||
W_STEP_COST = 0.002 # time penalty
|
W_STEP_COST = 0.002 # time penalty
|
||||||
@@ -288,23 +288,27 @@ class HerdingEnv(gym.Env):
|
|||||||
com, radius, _ = self._flock_stats()
|
com, radius, _ = self._flock_stats()
|
||||||
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
|
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
|
||||||
|
|
||||||
# Progress rewards: positive when flock moves toward pen or compacts
|
# Progress rewards: positive when state improves
|
||||||
drive_progress = (self._prev_com_dist - com_dist) * self.W_DRIVE
|
drive_progress = (self._prev_com_dist - com_dist) * self.W_DRIVE
|
||||||
collect_progress = (self._prev_radius - radius) * self.W_COLLECT
|
collect_progress = (self._prev_radius - radius) * self.W_COLLECT
|
||||||
|
|
||||||
self._prev_com_dist = com_dist
|
self._prev_com_dist = com_dist
|
||||||
self._prev_radius = radius
|
self._prev_radius = radius
|
||||||
|
|
||||||
# Approach: stable position signal so the dog has a gradient toward
|
# Alignment: reward dog for being on the anti-pen side of the flock
|
||||||
# the flock even when the sheep are not actively fleeing
|
# COM, gated by proximity so only nearby positioning counts.
|
||||||
active_mask = ~self.penned[:self.n_sheep]
|
# +1 = dog directly behind flock, -1 = dog on pen side (wrong).
|
||||||
if active_mask.any():
|
d_dog_com = float(np.linalg.norm(self.dog_pos - com))
|
||||||
dog_to_com = float(np.linalg.norm(self.dog_pos - com))
|
if d_dog_com > 0.1 and com_dist > 0.1:
|
||||||
approach = -(dog_to_com / (2 * self.FIELD)) * self.W_APPROACH
|
pen_dir = (self.PEN_CENTER - com) / com_dist # COM → pen
|
||||||
|
dog_dir = (self.dog_pos - com) / d_dog_com # COM → dog
|
||||||
|
cosine = -float(np.dot(pen_dir, dog_dir)) # +1 when opposite
|
||||||
|
proximity = max(0.0, 1.0 - d_dog_com / self.FLEE_DIST)
|
||||||
|
alignment = cosine * proximity * self.W_ALIGN
|
||||||
else:
|
else:
|
||||||
approach = 0.0
|
alignment = 0.0
|
||||||
|
|
||||||
reward = drive_progress + collect_progress + approach
|
reward = drive_progress + collect_progress + alignment
|
||||||
reward += newly_penned * self.W_PEN_BONUS
|
reward += newly_penned * self.W_PEN_BONUS
|
||||||
reward -= self.W_STEP_COST
|
reward -= self.W_STEP_COST
|
||||||
if n_penned == self.n_sheep:
|
if n_penned == self.n_sheep:
|
||||||
|
|||||||
Reference in New Issue
Block a user