Mimic webots physics

2026-04-26 18:22:26 +01:00
parent 8110fc3143
commit 1af7d03ce2
11 changed files with 6091 additions and 24 deletions
@@ -79,11 +79,13 @@ def in_pen(x: float, y: float) -> bool:

 def build_obs(dog_pos: np.ndarray,
              sheep_dict: dict,
-              n_sheep: int) -> np.ndarray:
+              n_sheep: int,
+              dog_heading: float = 0.0) -> np.ndarray:
    """
-    Build the 13-dim flock observation — identical to HerdingEnv._obs().
+    Build the 18-dim flock observation — identical to HerdingEnv._obs().

    sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
+    dog_heading: dog's current world-frame heading in radians.
    """
    D = 2 * FIELD

@@ -119,6 +121,7 @@ def build_obs(dog_pos: np.ndarray,
        (PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
        radius / D,
        frac_active,
+        math.cos(dog_heading), math.sin(dog_heading),
    ], dtype=np.float32)


@@ -152,7 +155,7 @@ ear_phase = 0.0
 try:
    n_sheep = int(sys.argv[1])
 except (IndexError, ValueError):
-    n_sheep = 5
+    n_sheep = 3

 # ── Load model ───────────────────────────────────────────────────────────────
 print(f"[RL dog] Loading model from {MODEL_PATH}")
@@ -230,8 +233,9 @@ while robot.step(timestep) != -1:
    gps_vals = gps.getValues()
    dog_pos  = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)

-    # 3. Build and normalise observation
-    raw_obs  = build_obs(dog_pos, sheep_positions, n_sheep)
+    # 3. Build and normalise observation (heading from compass)
+    raw_obs  = build_obs(dog_pos, sheep_positions, n_sheep,
+                         dog_heading=bearing())
    obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis])  # (1, 13)

    # 4. Policy inference + smoothing
@@ -45,6 +45,15 @@ class HerdingEnv(gym.Env):
    SHEEP_WANDER_V = 0.20   # m/s
    DT             = 0.1    # seconds per step

+    # Wheeled dog dynamics — mirror the Webots controller's drive():
+    # forward speed gated by cos(heading_error); turn rate proportional to
+    # error. Without this, the env treats the dog as a particle that can
+    # change direction instantly, producing policies that bang-bang and don't
+    # transfer to the wheeled Webots robot.
+    DOG_K_TURN          = 4.0   # rad/s per rad (heading-error gain)
+    DOG_MAX_TURN_RATE   = 6.0   # rad/s (cap on turn rate)
+    DOG_STOP_THRESHOLD  = 0.05  # ||action|| below this → dog stops in place
+
    # Boid parameters — identical to sheep.py
    FLEE_DIST       = 7.0
    SEPARATION_DIST = 2.5
@@ -102,11 +111,12 @@ class HerdingEnv(gym.Env):
                    raise ValueError(f"unknown reward_cfg key: {k}")
                setattr(self, k, v)

-        # Fixed 16-dim observation regardless of n_sheep:
+        # Fixed 18-dim observation regardless of n_sheep:
        #   dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2)
        #   + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1)
+        #   + cos(heading)(1) + sin(heading)(1)   ← new, for wheeled dynamics
        self.observation_space = spaces.Box(
-            low=-np.inf, high=np.inf, shape=(16,), dtype=np.float32
+            low=-np.inf, high=np.inf, shape=(18,), dtype=np.float32
        )

        # Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
@@ -119,6 +129,7 @@ class HerdingEnv(gym.Env):
        self._prev_penned       = 0
        self._prev_pen_dist_sum = 0.0
        self.dog_pos       = np.zeros(2, dtype=np.float32)
+        self.dog_heading   = 0.0    # radians, world frame
        self.sheep_pos     = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
        self.penned        = np.ones(self.MAX_SHEEP, dtype=bool)
        self.wander_ang    = np.zeros(self.MAX_SHEEP, dtype=np.float32)
@@ -192,6 +203,9 @@ class HerdingEnv(gym.Env):
                -self.FIELD * 0.8, self.FIELD * 0.8, size=(2,)
            ).astype(np.float32)

+        # Random initial heading so the policy learns to handle any orientation.
+        self.dog_heading = float(self.np_random.uniform(-np.pi, np.pi))
+
        self.wander_ang = self.np_random.uniform(
            -np.pi, np.pi, size=(self.MAX_SHEEP,)
        ).astype(np.float32)
@@ -220,13 +234,39 @@ class HerdingEnv(gym.Env):

        act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
        old_dog = self.dog_pos.copy()
-        new_dog = np.clip(
-            self.dog_pos + act * self.DOG_SPEED * self.DT,
-            -self.FIELD, self.FIELD
+
+        # Wheeled-dog kinematics — mirrors the Webots controller's drive():
+        # interpret (vx, vy) as a desired velocity vector in world frame; the
+        # dog turns toward it at a limited rate, and forward speed is gated
+        # by cos(heading_error). Bang-bang policies still produce smooth
+        # motion (the dog can't sidestep — it has to turn first).
+        act_mag = float(np.linalg.norm(act))
+        if act_mag < self.DOG_STOP_THRESHOLD:
+            # Below threshold the Webots dog stops; treat the same way here.
+            new_dog = self.dog_pos.copy()
+        else:
+            target_heading = float(np.arctan2(act[1], act[0]))
+            err = target_heading - self.dog_heading
+            # Wrap to (-pi, pi]
+            err = (err + np.pi) % (2 * np.pi) - np.pi
+            turn_rate = np.clip(self.DOG_K_TURN * err,
+                                -self.DOG_MAX_TURN_RATE,
+                                 self.DOG_MAX_TURN_RATE)
+            self.dog_heading = float(
+                ((self.dog_heading + turn_rate * self.DT) + np.pi)
+                % (2 * np.pi) - np.pi
            )
-        # Pen wall collision — mirrors Webots geometry. West (x=PEN_X[0]) and
-        # east (x=PEN_X[1]) walls block the dog within the pen's y-range.
-        # North face (y=PEN_Y[1]=-8) is open. South is the field edge.
+            target_speed = act_mag * self.DOG_SPEED
+            fwd_speed = target_speed * max(0.0, float(np.cos(err)))
+            step_vec = np.array([np.cos(self.dog_heading),
+                                 np.sin(self.dog_heading)], dtype=np.float32)
+            new_dog = np.clip(
+                self.dog_pos + step_vec * fwd_speed * self.DT,
+                -self.FIELD, self.FIELD,
+            )
+
+        # Pen wall collision — west and east pen walls block the dog within
+        # the pen's y-range. North face is open, south is the field edge.
        px0, px1 = self.PEN_X
        py0, py1 = self.PEN_Y
        if py0 < new_dog[1] < py1:
@@ -372,6 +412,8 @@ class HerdingEnv(gym.Env):
            (pen_ref[0] - far1[0]) / D, (pen_ref[1] - far1[1]) / D,
            radius / D,
            active_mask.sum() / self.n_sheep,
+            float(np.cos(self.dog_heading)),
+            float(np.sin(self.dog_heading)),
        ], dtype=np.float32)

    def _reward(self, n_penned: int, newly_penned: int, action: np.ndarray):
@@ -0,0 +1,5 @@
+Config loaded from config.json
+Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_SOUTH': 0.01, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
+Run dir: runs/wheeled_n10
+Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
+
@@ -0,0 +1,15 @@
+{
+  "W_PER_SHEEP": 2.0,
+  "W_ALIGN": 0.05,
+  "W_PEN_BONUS": 10.0,
+  "W_COMPLETE": 100.0,
+  "W_STEP_COST": 0.02,
+  "W_SOUTH": 0.01,
+  "W_COMPACT": 0.0,
+  "W_WALL_TOUCH": 0.0,
+  "WALL_TOUCH_BUFFER": 0.4,
+  "ALIGN_SHAPE": "standoff",
+  "ALIGN_GATED": true,
+  "ENTRY_AWARE": true,
+  "ent_coef": 0.02
+}
@@ -518,13 +518,13 @@ Sheep {
  name "sheep3"
  controller "sheep"
 }
-Sheep {
-  translation 3.5 1 0.5
-  name "sheep4"
-  controller "sheep"
-}
-Sheep {
-  translation 3.5 -1 0.5
-  name "sheep5"
-  controller "sheep"
-}
+# Sheep {
+#   translation 3.5 1 0.5
+#   name "sheep4"
+#   controller "sheep"
+# }
+# Sheep {
+#   translation 3.5 -1 0.5
+#   name "sheep5"
+#   controller "sheep"
+# }