diff --git a/training/herding_env.py b/training/herding_env.py index 15d17c1..a9000bd 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -63,6 +63,11 @@ class HerdingEnv(gym.Env): ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0) ALIGN_GATED = True # gate alignment on action magnitude + # Initial sheep spawn: first sheep placed anywhere; rest within CLUSTER_RADIUS + # of it. Set to None for legacy uniform-scatter behaviour. + # Cluster radius ≤ COHESION_DIST (8m) so boid cohesion keeps the flock together. + INIT_CLUSTER_RADIUS = 5.0 + def __init__(self, n_sheep: int = 1, max_steps: int = 2000, render_mode: str = None, random_n_sheep: bool = False, reward_cfg: dict = None): @@ -129,12 +134,25 @@ class HerdingEnv(gym.Env): self.sheep_pos[:] = self.PEN_CENTER self.penned[:] = True + # Spawn first sheep anywhere; subsequent sheep clustered around it + # so boid cohesion (active within 8m) keeps the flock together. + # Without clustering, sheep can start 25m apart and never coalesce — + # task becomes intractable for n_sheep ≥ 2. placed = 0 + cluster_center = None + radius = self.INIT_CLUSTER_RADIUS while placed < self.n_sheep: - p = self.np_random.uniform(-12.0, 12.0, size=(2,)).astype(np.float32) + if placed == 0 or radius is None: + p = self.np_random.uniform(-12.0, 12.0, size=(2,)).astype(np.float32) + else: + offset = self.np_random.uniform(-radius, radius, size=(2,)) + p = (cluster_center + offset).astype(np.float32) + p = np.clip(p, -12.0, 12.0) if not self._in_pen(p): self.sheep_pos[placed] = p self.penned[placed] = False + if placed == 0: + cluster_center = p.copy() placed += 1 # Dog: 50% of resets start already behind the flock (anti-pen side,