Sheep training flock of 10 fix?
This commit is contained in:
+17
-21
@@ -56,7 +56,7 @@ class HerdingEnv(gym.Env):
|
||||
W_DRIVE = 2.0 # progress: COM moved toward pen (only when compact)
|
||||
W_COLLECT = 4.0 # progress: radius shrank (2× stronger when scattered)
|
||||
W_ALIGN = 0.5 # position: dog on anti-pen side of COM
|
||||
W_COMPACT_BONUS = 0.1 # per-step bonus for staying compact (sustained signal)
|
||||
W_COMPACT_BONUS = 0.0 # disabled: 0.1/step over 4000 steps = 400 >> W_COMPLETE=100
|
||||
W_PEN_BONUS = 10.0 # per sheep penned
|
||||
W_COMPLETE = 100.0 # all sheep penned
|
||||
W_STEP_COST = 0.002 # time penalty
|
||||
@@ -72,11 +72,11 @@ class HerdingEnv(gym.Env):
|
||||
self.render_mode = render_mode
|
||||
self.random_n_sheep = random_n_sheep # if True, randomise n_sheep each reset
|
||||
|
||||
# Fixed 13-dim observation regardless of n_sheep:
|
||||
# dog_pos(2) + rel_com(2) + rel_far(2) + com_to_pen(2)
|
||||
# + far_to_pen(2) + radius(1) + second_far_dist(1) + frac_penned(1)
|
||||
# Fixed 17-dim observation regardless of n_sheep:
|
||||
# dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2)
|
||||
# + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1)
|
||||
self.observation_space = spaces.Box(
|
||||
low=-np.inf, high=np.inf, shape=(13,), dtype=np.float32
|
||||
low=-np.inf, high=np.inf, shape=(17,), dtype=np.float32
|
||||
)
|
||||
|
||||
# Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
|
||||
@@ -269,29 +269,25 @@ class HerdingEnv(gym.Env):
|
||||
pts = self.sheep_pos[:self.n_sheep][active_mask]
|
||||
dists = np.linalg.norm(pts - com, axis=1)
|
||||
sorted_idx = np.argsort(dists)[::-1] # farthest first
|
||||
far = pts[sorted_idx[0]]
|
||||
# 2nd farthest — if only 1 active sheep, reuse the same position
|
||||
far2 = pts[sorted_idx[1]] if len(sorted_idx) > 1 else far
|
||||
second_far_dist = float(dists[sorted_idx[1]]) if len(sorted_idx) > 1 else 0.0
|
||||
# Top-3 stragglers; pad with COM when fewer active sheep exist
|
||||
def nth(n):
|
||||
return pts[sorted_idx[n]] if len(sorted_idx) > n else com
|
||||
far1, far2, far3 = nth(0), nth(1), nth(2)
|
||||
else:
|
||||
far = far2 = self.PEN_CENTER.copy()
|
||||
second_far_dist = 0.0
|
||||
far1 = far2 = far3 = self.PEN_CENTER.copy()
|
||||
|
||||
S = self.FIELD
|
||||
D = 2 * self.FIELD
|
||||
|
||||
return np.array([
|
||||
self.dog_pos[0] / S, self.dog_pos[1] / S,
|
||||
(com[0] - self.dog_pos[0]) / D,
|
||||
(com[1] - self.dog_pos[1]) / D,
|
||||
(far[0] - self.dog_pos[0]) / D,
|
||||
(far[1] - self.dog_pos[1]) / D,
|
||||
(self.PEN_CENTER[0] - com[0]) / D,
|
||||
(self.PEN_CENTER[1] - com[1]) / D,
|
||||
(self.PEN_CENTER[0] - far[0]) / D,
|
||||
(self.PEN_CENTER[1] - far[1]) / D,
|
||||
radius / D,
|
||||
second_far_dist / D, # replaced mean_disp: 2nd farthest sheep from COM
|
||||
(com[0] - self.dog_pos[0]) / D, (com[1] - self.dog_pos[1]) / D,
|
||||
(far1[0] - self.dog_pos[0]) / D, (far1[1] - self.dog_pos[1]) / D,
|
||||
(far2[0] - self.dog_pos[0]) / D, (far2[1] - self.dog_pos[1]) / D,
|
||||
(far3[0] - self.dog_pos[0]) / D, (far3[1] - self.dog_pos[1]) / D,
|
||||
(self.PEN_CENTER[0] - com[0]) / D, (self.PEN_CENTER[1] - com[1]) / D,
|
||||
(self.PEN_CENTER[0] - far1[0]) / D, (self.PEN_CENTER[1] - far1[1]) / D,
|
||||
radius / D,
|
||||
active_mask.sum() / self.n_sheep,
|
||||
], dtype=np.float32)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user