Checkpoint 8

2026-05-12 22:41:03 +01:00
parent a01a5c9cef
commit 5c2ee4bba5
31 changed files with 3189 additions and 380 deletions
@@ -1,11 +1,12 @@
 """Gymnasium environment for the shepherd-dog herding task.

 Single-agent: the dog is the policy; sheep are env-controlled flocking
-agents (``herding.world.flocking_sim``). Differential-drive kinematics
-match the proto specs (``herding.world.diffdrive``) so a policy trained
-here transfers to Webots without re-tuning.
+agents (``herding.world.flocking_sim``). Kinematics match the proto specs
+(``herding.world.diffdrive``) so a policy trained here transfers to Webots
+without re-tuning.

-* **Action**: ``Box(-1, 1, (2,))`` — desired ``(vx, vy)`` intent.
+* **Action** (differential): ``Box(-1, 1, (2,))`` — ``(vx, vy)`` intent.
+* **Action** (mecanum):    ``Box(-1, 1, (3,))`` — ``(vx, vy, omega)`` intent.
 * **Observation**: ``Box(-inf, inf, (32·K,))`` from ``herding.perception.obs.build_obs``
  with optional frame stacking K (concatenated oldest → newest).
 * **Reset**: ``options["n_sheep"]`` overrides flock size; otherwise
@@ -26,17 +27,20 @@ import numpy as np
 from gymnasium import spaces

 from herding.world.diffdrive import (
-    heading_speed_to_wheels, kinematics_step, velocity_to_wheels,
+    heading_speed_to_wheels, kinematics_step,
+    mecanum_kinematics_step, velocity_to_mecanum_wheels, velocity_to_wheels,
 )
 from herding.world.flocking_sim import (
    FLEE_SPEED, MAX_SPEED, WANDER_SPEED, compute_heading_speed,
 )
 from herding.world.geometry import (
-    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA, DOG_SOUTH_LIMIT, DOG_WHEEL_BASE,
-    DOG_WHEEL_RADIUS, FIELD_X, FIELD_Y, GATE_X, MAX_SHEEP,
+    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
+    DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X, DOG_WHEEL_BASE_Y,
+    DOG_WHEEL_RADIUS, FIELD_SHAPE, FIELD_ROUND_R, FIELD_X, FIELD_Y,
+    GATE_X, GATE_Y, MAX_SHEEP,
    PEN_ENTRY, PEN_X, PEN_Y,
    SHEEP_MAX_WHEEL_OMEGA, SHEEP_WHEEL_BASE, SHEEP_WHEEL_RADIUS,
-    WEBOTS_DT, is_penned_position,
+    WEBOTS_DT, clip_to_field, is_penned_position,
 )
 from herding.perception.lidar_perception import detections_from_scan
 from herding.perception.lidar_sim import simulate_scan
@@ -82,6 +86,7 @@ class HerdingEnv(gym.Env):
        seed: Optional[int] = None,
        use_lidar: bool = True,
        frame_stack: int = 1,
+        drive_mode: str = "differential",
    ):
        super().__init__()
        # ``use_lidar=True`` (default): obs and imitation-reward teacher
@@ -95,7 +100,14 @@ class HerdingEnv(gym.Env):
        # giving a memoryless MLP temporal context. K=1 → single frame.
        self._frame_stack = max(1, int(frame_stack))
        self._frame_buffer: list[np.ndarray] = []
-        self.action_space = spaces.Box(-1.0, 1.0, shape=(2,), dtype=np.float32)
+
+        # Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omni).
+        self._drive_mode = drive_mode.lower()
+        if self._drive_mode not in ("differential", "mecanum"):
+            raise ValueError(f"Unknown drive_mode: {drive_mode!r}")
+        action_dim = 3 if self._drive_mode == "mecanum" else 2
+        self.action_space = spaces.Box(-1.0, 1.0, shape=(action_dim,),
+                                       dtype=np.float32)
        self._single_obs_dim = OBS_DIM
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
@@ -110,6 +122,11 @@ class HerdingEnv(gym.Env):
        # 1 = sheep spawn anywhere in the field (deployment distribution).
        self._difficulty = float(difficulty)
        self._initial_seed = seed
+        self._initial_seed_used = False
+
+        # Env-owned RNG for wander jitter, re-seeded from np_random on reset.
+        self._py_rng = random.Random()
+        self._action_dim = action_dim

        # State (initialised in reset)
        self.dog_x = self.dog_y = self.dog_heading = 0.0
@@ -119,17 +136,14 @@ class HerdingEnv(gym.Env):
        self.sheep_penned = np.zeros(0, dtype=bool)
        self.sheep_wander = np.zeros(0, dtype=np.float32)

-        self.prev_action = np.zeros(2, dtype=np.float32)
-        self.smoothed_action = np.zeros(2, dtype=np.float32)
+        self.prev_action = np.zeros(self._action_dim, dtype=np.float32)
+        self.smoothed_action = np.zeros(self._action_dim, dtype=np.float32)
        self.steps = 0
        self.n_sheep = 0
        self.prev_n_penned = 0
        self.prev_d_pen = 0.0
        self.prev_radius = 0.0

-        # Env-owned RNG for wander jitter, re-seeded from np_random on reset.
-        self._py_rng = random.Random()
-
    # --- Public knobs ---
    def set_max_n_sheep(self, value: int) -> None:
        self._max_n_sheep = int(np.clip(value, 1, MAX_SHEEP))
@@ -149,6 +163,10 @@ class HerdingEnv(gym.Env):

    # --- gym API ---
    def reset(self, *, seed=None, options=None):
+        if (seed is None and self._initial_seed is not None
+                and not self._initial_seed_used):
+            seed = self._initial_seed
+        self._initial_seed_used = True
        super().reset(seed=seed)
        self._py_rng.seed(int(self.np_random.integers(0, 2**31 - 1)))
        opts = options or {}
@@ -168,16 +186,32 @@ class HerdingEnv(gym.Env):
        # Sheep spawn region linearly interpolates with difficulty:
        # 0 → small box near the gate, 1 → full field.
        d = self._difficulty
-        sx_lo = 7.0   - d * 20.0
-        sx_hi = 14.0  - d * 1.0
-        sy_lo = -12.0 + d * 0.0
-        sy_hi = -6.0  + d * 19.0
+        if FIELD_SHAPE == "field_round":
+            # Round field: spawn in a sector near the gate (south),
+            # expanding to the full circle at difficulty=1.
+            spawn_r_lo = 3.0
+            spawn_r_hi = d * FIELD_ROUND_R * 0.8 + (1.0 - d) * 6.0
+            # Angle spread around south (±60° at d=0, full circle at d=1).
+            half_angle = math.radians(60) + d * math.radians(120)
+            angle_lo = math.pi / 2.0 - half_angle   # from south = -π/2
+            angle_hi = math.pi / 2.0 + half_angle
+        else:
+            sx_lo = 7.0   - d * 20.0
+            sx_hi = 14.0  - d * 1.0
+            sy_lo = -12.0 + d * 0.0
+            sy_hi = -6.0  + d * 19.0

        sxs, sys_, shs, sws = [], [], [], []
        for _ in range(self.n_sheep):
            for _try in range(100):
-                sx = float(self.np_random.uniform(sx_lo, sx_hi))
-                sy = float(self.np_random.uniform(sy_lo, sy_hi))
+                if FIELD_SHAPE == "field_round":
+                    r_spawn = float(self.np_random.uniform(spawn_r_lo, spawn_r_hi))
+                    a_spawn = float(self.np_random.uniform(angle_lo, angle_hi))
+                    sx = r_spawn * math.cos(a_spawn)
+                    sy = -r_spawn * math.sin(a_spawn)
+                else:
+                    sx = float(self.np_random.uniform(sx_lo, sx_hi))
+                    sy = float(self.np_random.uniform(sy_lo, sy_hi))
                # Reject if too close to the dog or another sheep, or
                # already in the gate column (would start "penned").
                if math.hypot(sx - self.dog_x, sy - self.dog_y) < 3.0:
@@ -198,8 +232,8 @@ class HerdingEnv(gym.Env):
        self.sheep_wander = np.asarray(sws, dtype=np.float32)
        self.sheep_penned = np.zeros(self.n_sheep, dtype=bool)

-        self.prev_action = np.zeros(2, dtype=np.float32)
-        self.smoothed_action = np.zeros(2, dtype=np.float32)
+        self.prev_action = np.zeros(self._action_dim, dtype=np.float32)
+        self.smoothed_action = np.zeros(self._action_dim, dtype=np.float32)
        self.steps = 0
        self.prev_n_penned = 0
        self.prev_d_pen, self.prev_radius = self._flock_metrics()
@@ -225,25 +259,46 @@ class HerdingEnv(gym.Env):
        )
        self.prev_action = self.smoothed_action.copy()
        vx, vy = float(self.smoothed_action[0]), float(self.smoothed_action[1])
+        omega = float(self.smoothed_action[2]) if self._action_dim >= 3 else 0.0

        # Safety supervisor — dog stays north of the gate.
        if self.dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
            vx, vy = 0.0, 1.0

        # Step the dog.
-        wL, wR = velocity_to_wheels(
-            vx, vy, self.dog_heading,
-            max_linear=DOG_MAX_LINEAR,
-            wheel_radius=DOG_WHEEL_RADIUS,
-            max_wheel_omega=DOG_MAX_WHEEL_OMEGA,
-            k_turn=4.0,
-        )
-        self.dog_x, self.dog_y, self.dog_heading = kinematics_step(
-            self.dog_x, self.dog_y, self.dog_heading,
-            wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT,
-        )
-        self.dog_x = float(np.clip(self.dog_x, FIELD_X[0] + 0.3, FIELD_X[1] - 0.3))
-        self.dog_y = float(np.clip(self.dog_y, DOG_SOUTH_LIMIT, FIELD_Y[1] - 0.3))
+        if self._drive_mode == "mecanum":
+            w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
+                vx, vy, omega, self.dog_heading,
+                max_linear=DOG_MAX_LINEAR,
+                wheel_radius=DOG_WHEEL_RADIUS,
+                lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
+                max_wheel_omega=DOG_MAX_WHEEL_OMEGA,
+                k_turn=4.0,
+                wheel_base=DOG_WHEEL_BASE,
+            )
+            self.dog_x, self.dog_y, self.dog_heading = mecanum_kinematics_step(
+                self.dog_x, self.dog_y, self.dog_heading,
+                w_fl, w_fr, w_rl, w_rr,
+                DOG_WHEEL_RADIUS,
+                DOG_WHEEL_BASE_X / 2.0, DOG_WHEEL_BASE_Y / 2.0,
+                WEBOTS_DT,
+            )
+        else:
+            wL, wR = velocity_to_wheels(
+                vx, vy, self.dog_heading,
+                max_linear=DOG_MAX_LINEAR,
+                wheel_radius=DOG_WHEEL_RADIUS,
+                max_wheel_omega=DOG_MAX_WHEEL_OMEGA,
+                k_turn=4.0,
+            )
+            self.dog_x, self.dog_y, self.dog_heading = kinematics_step(
+                self.dog_x, self.dog_y, self.dog_heading,
+                wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT,
+            )
+        self.dog_x, self.dog_y = clip_to_field(self.dog_x, self.dog_y, margin=0.3)
+        # Extra constraint: dog stays north of the gate area.
+        if self.dog_y < DOG_SOUTH_LIMIT:
+            self.dog_y = DOG_SOUTH_LIMIT

        # Step sheep and update penned flags (GT-based).
        for i in range(self.n_sheep):
@@ -304,13 +359,21 @@ class HerdingEnv(gym.Env):
            SHEEP_WHEEL_RADIUS, SHEEP_WHEEL_BASE, WEBOTS_DT,
        )

-        # Wall clipping (south wall absent inside the gate column).
-        nx = float(np.clip(nx, FIELD_X[0] + 0.2, FIELD_X[1] - 0.2))
-        in_gate_col = PEN_X[0] <= nx <= PEN_X[1]
-        if in_gate_col:
-            ny = float(np.clip(ny, PEN_Y[0] + 0.2, FIELD_Y[1] - 0.2))
+        # Wall clipping.
+        if FIELD_SHAPE == "field_round":
+            in_gate_col = PEN_X[0] <= nx <= PEN_X[1]
+            if in_gate_col:
+                # Allow passage through the gate column (south of the wall).
+                ny = float(np.clip(ny, PEN_Y[0] + 0.2, FIELD_Y[1] - 0.2))
+            else:
+                nx, ny = clip_to_field(nx, ny, margin=0.2)
        else:
-            ny = float(np.clip(ny, FIELD_Y[0] + 0.2, FIELD_Y[1] - 0.2))
+            nx = float(np.clip(nx, FIELD_X[0] + 0.2, FIELD_X[1] - 0.2))
+            in_gate_col = PEN_X[0] <= nx <= PEN_X[1]
+            if in_gate_col:
+                ny = float(np.clip(ny, PEN_Y[0] + 0.2, FIELD_Y[1] - 0.2))
+            else:
+                ny = float(np.clip(ny, FIELD_Y[0] + 0.2, FIELD_Y[1] - 0.2))

        self.sheep_x[i] = nx
        self.sheep_y[i] = ny
@@ -374,6 +437,7 @@ class HerdingEnv(gym.Env):
            (self.dog_x, self.dog_y), self.dog_heading,
            sheep_xy_list, sheep_penned_list,
            n_max=self._max_n_sheep,
+            n_expected=self.n_sheep,
        )

    def _build_obs(self) -> np.ndarray: