Sheep training flock of 10 fix?

This commit is contained in:
Johnny Fernandes
2026-04-24 14:54:20 +01:00
parent 17eb25864e
commit fcfa2c35c8
4 changed files with 342 additions and 33 deletions
+17 -21
View File
@@ -56,7 +56,7 @@ class HerdingEnv(gym.Env):
W_DRIVE = 2.0 # progress: COM moved toward pen (only when compact)
W_COLLECT = 4.0 # progress: radius shrank (2× stronger when scattered)
W_ALIGN = 0.5 # position: dog on anti-pen side of COM
W_COMPACT_BONUS = 0.1 # per-step bonus for staying compact (sustained signal)
W_COMPACT_BONUS = 0.0 # disabled: 0.1/step over 4000 steps = 400 >> W_COMPLETE=100
W_PEN_BONUS = 10.0 # per sheep penned
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.002 # time penalty
@@ -72,11 +72,11 @@ class HerdingEnv(gym.Env):
self.render_mode = render_mode
self.random_n_sheep = random_n_sheep # if True, randomise n_sheep each reset
# Fixed 13-dim observation regardless of n_sheep:
# dog_pos(2) + rel_com(2) + rel_far(2) + com_to_pen(2)
# + far_to_pen(2) + radius(1) + second_far_dist(1) + frac_penned(1)
# Fixed 17-dim observation regardless of n_sheep:
# dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2)
# + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1)
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float32
)
# Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
@@ -269,29 +269,25 @@ class HerdingEnv(gym.Env):
pts = self.sheep_pos[:self.n_sheep][active_mask]
dists = np.linalg.norm(pts - com, axis=1)
sorted_idx = np.argsort(dists)[::-1] # farthest first
far = pts[sorted_idx[0]]
# 2nd farthest — if only 1 active sheep, reuse the same position
far2 = pts[sorted_idx[1]] if len(sorted_idx) > 1 else far
second_far_dist = float(dists[sorted_idx[1]]) if len(sorted_idx) > 1 else 0.0
# Top-3 stragglers; pad with COM when fewer active sheep exist
def nth(n):
return pts[sorted_idx[n]] if len(sorted_idx) > n else com
far1, far2, far3 = nth(0), nth(1), nth(2)
else:
far = far2 = self.PEN_CENTER.copy()
second_far_dist = 0.0
far1 = far2 = far3 = self.PEN_CENTER.copy()
S = self.FIELD
D = 2 * self.FIELD
return np.array([
self.dog_pos[0] / S, self.dog_pos[1] / S,
(com[0] - self.dog_pos[0]) / D,
(com[1] - self.dog_pos[1]) / D,
(far[0] - self.dog_pos[0]) / D,
(far[1] - self.dog_pos[1]) / D,
(self.PEN_CENTER[0] - com[0]) / D,
(self.PEN_CENTER[1] - com[1]) / D,
(self.PEN_CENTER[0] - far[0]) / D,
(self.PEN_CENTER[1] - far[1]) / D,
radius / D,
second_far_dist / D, # replaced mean_disp: 2nd farthest sheep from COM
(com[0] - self.dog_pos[0]) / D, (com[1] - self.dog_pos[1]) / D,
(far1[0] - self.dog_pos[0]) / D, (far1[1] - self.dog_pos[1]) / D,
(far2[0] - self.dog_pos[0]) / D, (far2[1] - self.dog_pos[1]) / D,
(far3[0] - self.dog_pos[0]) / D, (far3[1] - self.dog_pos[1]) / D,
(self.PEN_CENTER[0] - com[0]) / D, (self.PEN_CENTER[1] - com[1]) / D,
(self.PEN_CENTER[0] - far1[0]) / D, (self.PEN_CENTER[1] - far1[1]) / D,
radius / D,
active_mask.sum() / self.n_sheep,
], dtype=np.float32)