Sheep training flock of 10 fix?
This commit is contained in:
+37
-13
@@ -55,7 +55,7 @@ class HerdingEnv(gym.Env):
|
|||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
W_DRIVE = 2.0 # progress: COM moved toward pen (only when compact)
|
W_DRIVE = 2.0 # progress: COM moved toward pen (only when compact)
|
||||||
W_COLLECT = 4.0 # progress: radius shrank (2× stronger when scattered)
|
W_COLLECT = 4.0 # progress: radius shrank (2× stronger when scattered)
|
||||||
W_APPROACH_FAR = 1.0 # progress: dog moved toward farthest straggler (scatter only)
|
W_HERD_POS = 1.5 # progress: dog moved toward ideal herding position behind far1
|
||||||
W_ALIGN = 0.5 # position: dog on anti-pen side of COM (compact only)
|
W_ALIGN = 0.5 # position: dog on anti-pen side of COM (compact only)
|
||||||
W_PEN_BONUS = 10.0 # per sheep penned
|
W_PEN_BONUS = 10.0 # per sheep penned
|
||||||
W_COMPLETE = 100.0 # all sheep penned
|
W_COMPLETE = 100.0 # all sheep penned
|
||||||
@@ -89,7 +89,7 @@ class HerdingEnv(gym.Env):
|
|||||||
self._prev_penned = 0
|
self._prev_penned = 0
|
||||||
self._prev_com_dist = 0.0
|
self._prev_com_dist = 0.0
|
||||||
self._prev_radius = 0.0
|
self._prev_radius = 0.0
|
||||||
self._prev_dog_to_far1 = 0.0
|
self._prev_dog_to_ideal = 0.0
|
||||||
self.dog_pos = np.zeros(2, dtype=np.float32)
|
self.dog_pos = np.zeros(2, dtype=np.float32)
|
||||||
self.sheep_pos = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
|
self.sheep_pos = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
|
||||||
self.penned = np.ones(self.MAX_SHEEP, dtype=bool)
|
self.penned = np.ones(self.MAX_SHEEP, dtype=bool)
|
||||||
@@ -160,9 +160,11 @@ class HerdingEnv(gym.Env):
|
|||||||
if active_mask.any():
|
if active_mask.any():
|
||||||
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
||||||
far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
|
far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
|
||||||
self._prev_dog_to_far1 = float(np.linalg.norm(self.dog_pos - far1))
|
self._prev_dog_to_ideal = float(
|
||||||
|
np.linalg.norm(self.dog_pos - self._ideal_herd_pos(com, far1))
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self._prev_dog_to_far1 = 0.0
|
self._prev_dog_to_ideal = 0.0
|
||||||
|
|
||||||
return self._obs(), {}
|
return self._obs(), {}
|
||||||
|
|
||||||
@@ -300,6 +302,24 @@ class HerdingEnv(gym.Env):
|
|||||||
active_mask.sum() / self.n_sheep,
|
active_mask.sum() / self.n_sheep,
|
||||||
], dtype=np.float32)
|
], dtype=np.float32)
|
||||||
|
|
||||||
|
def _ideal_herd_pos(self, com: np.ndarray, far1: np.ndarray) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Target position for the dog to push far1 toward COM:
|
||||||
|
just beyond far1 on the outward radial line from COM.
|
||||||
|
From here, the dog's approach causes far1 to flee inward.
|
||||||
|
"""
|
||||||
|
d = far1 - com
|
||||||
|
d_norm = float(np.linalg.norm(d))
|
||||||
|
if d_norm > 0.5:
|
||||||
|
direction = d / d_norm
|
||||||
|
else:
|
||||||
|
# Sheep all together — use anti-pen direction instead
|
||||||
|
to_pen = self.PEN_CENTER - com
|
||||||
|
tp = float(np.linalg.norm(to_pen))
|
||||||
|
direction = -(to_pen / tp) if tp > 0.1 else np.array([0.0, -1.0], dtype=np.float32)
|
||||||
|
target = far1 + direction * self.FLEE_DIST * 0.8
|
||||||
|
return np.clip(target, -self.FIELD, self.FIELD).astype(np.float32)
|
||||||
|
|
||||||
def _reward(self, n_penned: int, newly_penned: int) -> float:
|
def _reward(self, n_penned: int, newly_penned: int) -> float:
|
||||||
com, radius, _ = self._flock_stats()
|
com, radius, _ = self._flock_stats()
|
||||||
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
|
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
|
||||||
@@ -316,22 +336,26 @@ class HerdingEnv(gym.Env):
|
|||||||
# Drive: only when compact — prevents rewarding COM movement while scattered.
|
# Drive: only when compact — prevents rewarding COM movement while scattered.
|
||||||
r_drive = 0.0 if scattered else drive_delta * self.W_DRIVE
|
r_drive = 0.0 if scattered else drive_delta * self.W_DRIVE
|
||||||
|
|
||||||
# Approach-to-straggler: reward dog for closing on farthest sheep.
|
# Herding-position reward: guides dog to the ideal position BEHIND far1
|
||||||
# Only in scatter phase so it doesn't override drive positioning.
|
# (on the outward radial, FLEE_DIST beyond far1 from COM).
|
||||||
# Gated on there being active sheep.
|
# From there, advancing toward COM pushes far1 inward.
|
||||||
|
# Fires in scatter phase only; gives gradient even during the outward
|
||||||
|
# navigation arc when raw approach reward would be zero/negative.
|
||||||
active_mask = ~self.penned[:self.n_sheep]
|
active_mask = ~self.penned[:self.n_sheep]
|
||||||
if scattered and active_mask.any():
|
if scattered and active_mask.any():
|
||||||
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
||||||
far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
|
far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
|
||||||
cur_dog_to_far1 = float(np.linalg.norm(self.dog_pos - far1))
|
ideal = self._ideal_herd_pos(com, far1)
|
||||||
r_approach = (self._prev_dog_to_far1 - cur_dog_to_far1) * self.W_APPROACH_FAR
|
cur_dog_to_ideal = float(np.linalg.norm(self.dog_pos - ideal))
|
||||||
self._prev_dog_to_far1 = cur_dog_to_far1
|
r_herd_pos = (self._prev_dog_to_ideal - cur_dog_to_ideal) * self.W_HERD_POS
|
||||||
|
self._prev_dog_to_ideal = cur_dog_to_ideal
|
||||||
else:
|
else:
|
||||||
r_approach = 0.0
|
r_herd_pos = 0.0
|
||||||
if active_mask.any():
|
if active_mask.any():
|
||||||
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
||||||
far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
|
far1 = pts[int(np.argmax(np.linalg.norm(pts - com, axis=1)))]
|
||||||
self._prev_dog_to_far1 = float(np.linalg.norm(self.dog_pos - far1))
|
ideal = self._ideal_herd_pos(com, far1)
|
||||||
|
self._prev_dog_to_ideal = float(np.linalg.norm(self.dog_pos - ideal))
|
||||||
|
|
||||||
# Alignment: dog on anti-pen side of COM — only in drive phase.
|
# Alignment: dog on anti-pen side of COM — only in drive phase.
|
||||||
# Disabled when scattered: chasing a straggler on the pen side would be
|
# Disabled when scattered: chasing a straggler on the pen side would be
|
||||||
@@ -346,7 +370,7 @@ class HerdingEnv(gym.Env):
|
|||||||
else:
|
else:
|
||||||
alignment = 0.0
|
alignment = 0.0
|
||||||
|
|
||||||
reward = r_drive + r_collect + r_approach + alignment
|
reward = r_drive + r_collect + r_herd_pos + alignment
|
||||||
reward += newly_penned * self.W_PEN_BONUS
|
reward += newly_penned * self.W_PEN_BONUS
|
||||||
reward -= self.W_STEP_COST
|
reward -= self.W_STEP_COST
|
||||||
if n_penned == self.n_sheep:
|
if n_penned == self.n_sheep:
|
||||||
|
|||||||
Reference in New Issue
Block a user