Sheep training flock of 10 fix?
This commit is contained in:
+49
-33
@@ -51,14 +51,17 @@ class HerdingEnv(gym.Env):
|
||||
WALL_MARGIN = 3.5
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Reward weights (progress-based potential shaping + sparse bonuses)
|
||||
# Reward weights (two-phase: collect first, then drive)
|
||||
# -----------------------------------------------------------------------
|
||||
W_DRIVE = 2.0 # progress: flock COM moved toward pen
|
||||
W_COLLECT = 2.0 # progress: flock radius shrank (was 0.5 — must match W_DRIVE)
|
||||
W_ALIGN = 0.5 # position: dog on anti-pen side of flock COM
|
||||
W_PEN_BONUS = 10.0 # per sheep penned (was 5.0)
|
||||
W_COMPLETE = 100.0 # all sheep penned (was 20.0 — must dominate dense rewards)
|
||||
W_STEP_COST = 0.002 # time penalty
|
||||
W_DRIVE = 2.0 # progress: COM moved toward pen (only when compact)
|
||||
W_COLLECT = 4.0 # progress: radius shrank (2× stronger when scattered)
|
||||
W_ALIGN = 0.5 # position: dog on anti-pen side of COM
|
||||
W_COMPACT_BONUS = 0.1 # per-step bonus for staying compact (sustained signal)
|
||||
W_PEN_BONUS = 10.0 # per sheep penned
|
||||
W_COMPLETE = 100.0 # all sheep penned
|
||||
W_STEP_COST = 0.002 # time penalty
|
||||
|
||||
DRIVE_GATE_RADIUS = 5.0 # flock must compact below this (m) before drive reward fires
|
||||
|
||||
def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
|
||||
render_mode: str = None, random_n_sheep: bool = False):
|
||||
@@ -71,7 +74,7 @@ class HerdingEnv(gym.Env):
|
||||
|
||||
# Fixed 13-dim observation regardless of n_sheep:
|
||||
# dog_pos(2) + rel_com(2) + rel_far(2) + com_to_pen(2)
|
||||
# + far_to_pen(2) + radius(1) + mean_disp(1) + frac_penned(1)
|
||||
# + far_to_pen(2) + radius(1) + second_far_dist(1) + frac_penned(1)
|
||||
self.observation_space = spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32
|
||||
)
|
||||
@@ -259,60 +262,73 @@ class HerdingEnv(gym.Env):
|
||||
return com, float(dists.max()), float(dists.mean())
|
||||
|
||||
def _obs(self) -> np.ndarray:
|
||||
com, radius, mean_disp = self._flock_stats()
|
||||
com, radius, _ = self._flock_stats()
|
||||
active_mask = ~self.penned[:self.n_sheep]
|
||||
|
||||
# Farthest active sheep from COM (outlier the dog needs to chase)
|
||||
if active_mask.any():
|
||||
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
||||
idx = int(np.argmax(np.linalg.norm(pts - com, axis=1)))
|
||||
far = pts[idx]
|
||||
dists = np.linalg.norm(pts - com, axis=1)
|
||||
sorted_idx = np.argsort(dists)[::-1] # farthest first
|
||||
far = pts[sorted_idx[0]]
|
||||
# 2nd farthest — if only 1 active sheep, reuse the same position
|
||||
far2 = pts[sorted_idx[1]] if len(sorted_idx) > 1 else far
|
||||
second_far_dist = float(dists[sorted_idx[1]]) if len(sorted_idx) > 1 else 0.0
|
||||
else:
|
||||
far = self.PEN_CENTER.copy()
|
||||
far = far2 = self.PEN_CENTER.copy()
|
||||
second_far_dist = 0.0
|
||||
|
||||
S = self.FIELD # normalisation scale for positions
|
||||
D = 2 * self.FIELD # for relative vectors that can span the whole field
|
||||
S = self.FIELD
|
||||
D = 2 * self.FIELD
|
||||
|
||||
return np.array([
|
||||
self.dog_pos[0] / S, self.dog_pos[1] / S, # dog abs pos
|
||||
(com[0] - self.dog_pos[0]) / D, # COM relative to dog
|
||||
self.dog_pos[0] / S, self.dog_pos[1] / S,
|
||||
(com[0] - self.dog_pos[0]) / D,
|
||||
(com[1] - self.dog_pos[1]) / D,
|
||||
(far[0] - self.dog_pos[0]) / D, # farthest relative to dog
|
||||
(far[0] - self.dog_pos[0]) / D,
|
||||
(far[1] - self.dog_pos[1]) / D,
|
||||
(self.PEN_CENTER[0] - com[0]) / D, # COM to pen
|
||||
(self.PEN_CENTER[0] - com[0]) / D,
|
||||
(self.PEN_CENTER[1] - com[1]) / D,
|
||||
(self.PEN_CENTER[0] - far[0]) / D, # farthest to pen
|
||||
(self.PEN_CENTER[0] - far[0]) / D,
|
||||
(self.PEN_CENTER[1] - far[1]) / D,
|
||||
radius / D, # flock compactness
|
||||
mean_disp / D, # mean spread
|
||||
active_mask.sum() / self.n_sheep, # fraction still active
|
||||
radius / D,
|
||||
second_far_dist / D, # replaced mean_disp: 2nd farthest sheep from COM
|
||||
active_mask.sum() / self.n_sheep,
|
||||
], dtype=np.float32)
|
||||
|
||||
def _reward(self, n_penned: int, newly_penned: int) -> float:
|
||||
com, radius, _ = self._flock_stats()
|
||||
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
|
||||
|
||||
# Progress rewards: positive when state improves
|
||||
drive_progress = (self._prev_com_dist - com_dist) * self.W_DRIVE
|
||||
collect_progress = (self._prev_radius - radius) * self.W_COLLECT
|
||||
drive_delta = self._prev_com_dist - com_dist
|
||||
collect_delta = self._prev_radius - radius
|
||||
|
||||
self._prev_com_dist = com_dist
|
||||
self._prev_radius = radius
|
||||
|
||||
# Alignment: reward dog for being on the anti-pen side of the flock
|
||||
# COM, gated by proximity so only nearby positioning counts.
|
||||
# +1 = dog directly behind flock, -1 = dog on pen side (wrong).
|
||||
# Alignment: dog on anti-pen side of COM, gated by proximity.
|
||||
d_dog_com = float(np.linalg.norm(self.dog_pos - com))
|
||||
if d_dog_com > 0.1 and com_dist > 0.1:
|
||||
pen_dir = (self.PEN_CENTER - com) / com_dist # COM → pen
|
||||
dog_dir = (self.dog_pos - com) / d_dog_com # COM → dog
|
||||
cosine = -float(np.dot(pen_dir, dog_dir)) # +1 when opposite
|
||||
pen_dir = (self.PEN_CENTER - com) / com_dist
|
||||
dog_dir = (self.dog_pos - com) / d_dog_com
|
||||
cosine = -float(np.dot(pen_dir, dog_dir))
|
||||
proximity = max(0.0, 1.0 - d_dog_com / self.FLEE_DIST)
|
||||
alignment = cosine * proximity * self.W_ALIGN
|
||||
else:
|
||||
alignment = 0.0
|
||||
|
||||
reward = drive_progress + collect_progress + alignment
|
||||
scattered = radius > self.DRIVE_GATE_RADIUS
|
||||
|
||||
# Collect always on; 2× scale when scattered to force collect-first.
|
||||
r_collect = collect_delta * self.W_COLLECT * (2.0 if scattered else 1.0)
|
||||
|
||||
# Drive only fires when flock is compact — prevents rewarding COM movement
|
||||
# while sheep are spread across the field.
|
||||
r_drive = 0.0 if scattered else drive_delta * self.W_DRIVE
|
||||
|
||||
# Small sustained reward for maintaining a compact flock.
|
||||
r_compact = 0.0 if scattered else self.W_COMPACT_BONUS
|
||||
|
||||
reward = r_drive + r_collect + r_compact + alignment
|
||||
reward += newly_penned * self.W_PEN_BONUS
|
||||
reward -= self.W_STEP_COST
|
||||
if n_penned == self.n_sheep:
|
||||
|
||||
Reference in New Issue
Block a user