diff --git a/training/herding_env.py b/training/herding_env.py index 52a0e7b..4488fa2 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -10,10 +10,10 @@ Coordinate system matches the Webots world file: field : x ∈ [-15, 15], y ∈ [-15, 15] pen : x ∈ [10, 13], y ∈ [-15, -8] (SE corner, open north) -Observation (13-dim, fixed regardless of n_sheep): - dog position (2), flock COM relative to dog (2), farthest active sheep - relative to dog (2), pen relative to COM (2), pen relative to farthest - sheep (2), flock radius (1), mean dispersion (1), fraction penned (1). +Observation (16-dim, fixed regardless of n_sheep): + dog position (2), flock COM relative to dog (2), top-3 farthest active + sheep relative to dog (6), pen relative to COM (2), pen relative to + farthest sheep (2), flock radius (1), fraction penned (1). Permutation-invariant by design: curriculum stages share the same obs dim so VecNormalize statistics transfer as n_sheep advances. @@ -72,11 +72,11 @@ class HerdingEnv(gym.Env): self.render_mode = render_mode self.random_n_sheep = random_n_sheep # if True, randomise n_sheep each reset - # Fixed 17-dim observation regardless of n_sheep: + # Fixed 16-dim observation regardless of n_sheep: # dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2) # + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1) self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=(17,), dtype=np.float32 + low=-np.inf, high=np.inf, shape=(16,), dtype=np.float32 ) # Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED