Mimic webots physics

This commit is contained in:
Johnny Fernandes
2026-04-26 18:22:26 +01:00
parent 8110fc3143
commit 1af7d03ce2
11 changed files with 6091 additions and 24 deletions
+51 -9
View File
@@ -45,6 +45,15 @@ class HerdingEnv(gym.Env):
SHEEP_WANDER_V = 0.20 # m/s
DT = 0.1 # seconds per step
# Wheeled dog dynamics — mirror the Webots controller's drive():
# forward speed gated by cos(heading_error); turn rate proportional to
# error. Without this, the env treats the dog as a particle that can
# change direction instantly, producing policies that bang-bang and don't
# transfer to the wheeled Webots robot.
DOG_K_TURN = 4.0 # rad/s per rad (heading-error gain)
DOG_MAX_TURN_RATE = 6.0 # rad/s (cap on turn rate)
DOG_STOP_THRESHOLD = 0.05 # ||action|| below this → dog stops in place
# Boid parameters — identical to sheep.py
FLEE_DIST = 7.0
SEPARATION_DIST = 2.5
@@ -102,11 +111,12 @@ class HerdingEnv(gym.Env):
raise ValueError(f"unknown reward_cfg key: {k}")
setattr(self, k, v)
# Fixed 16-dim observation regardless of n_sheep:
# Fixed 18-dim observation regardless of n_sheep:
# dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2)
# + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1)
# + cos(heading)(1) + sin(heading)(1) ← new, for wheeled dynamics
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=(16,), dtype=np.float32
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float32
)
# Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
@@ -119,6 +129,7 @@ class HerdingEnv(gym.Env):
self._prev_penned = 0
self._prev_pen_dist_sum = 0.0
self.dog_pos = np.zeros(2, dtype=np.float32)
self.dog_heading = 0.0 # radians, world frame
self.sheep_pos = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
self.penned = np.ones(self.MAX_SHEEP, dtype=bool)
self.wander_ang = np.zeros(self.MAX_SHEEP, dtype=np.float32)
@@ -192,6 +203,9 @@ class HerdingEnv(gym.Env):
-self.FIELD * 0.8, self.FIELD * 0.8, size=(2,)
).astype(np.float32)
# Random initial heading so the policy learns to handle any orientation.
self.dog_heading = float(self.np_random.uniform(-np.pi, np.pi))
self.wander_ang = self.np_random.uniform(
-np.pi, np.pi, size=(self.MAX_SHEEP,)
).astype(np.float32)
@@ -220,13 +234,39 @@ class HerdingEnv(gym.Env):
act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
old_dog = self.dog_pos.copy()
new_dog = np.clip(
self.dog_pos + act * self.DOG_SPEED * self.DT,
-self.FIELD, self.FIELD
)
# Pen wall collision — mirrors Webots geometry. West (x=PEN_X[0]) and
# east (x=PEN_X[1]) walls block the dog within the pen's y-range.
# North face (y=PEN_Y[1]=-8) is open. South is the field edge.
# Wheeled-dog kinematics — mirrors the Webots controller's drive():
# interpret (vx, vy) as a desired velocity vector in world frame; the
# dog turns toward it at a limited rate, and forward speed is gated
# by cos(heading_error). Bang-bang policies still produce smooth
# motion (the dog can't sidestep — it has to turn first).
act_mag = float(np.linalg.norm(act))
if act_mag < self.DOG_STOP_THRESHOLD:
# Below threshold the Webots dog stops; treat the same way here.
new_dog = self.dog_pos.copy()
else:
target_heading = float(np.arctan2(act[1], act[0]))
err = target_heading - self.dog_heading
# Wrap to (-pi, pi]
err = (err + np.pi) % (2 * np.pi) - np.pi
turn_rate = np.clip(self.DOG_K_TURN * err,
-self.DOG_MAX_TURN_RATE,
self.DOG_MAX_TURN_RATE)
self.dog_heading = float(
((self.dog_heading + turn_rate * self.DT) + np.pi)
% (2 * np.pi) - np.pi
)
target_speed = act_mag * self.DOG_SPEED
fwd_speed = target_speed * max(0.0, float(np.cos(err)))
step_vec = np.array([np.cos(self.dog_heading),
np.sin(self.dog_heading)], dtype=np.float32)
new_dog = np.clip(
self.dog_pos + step_vec * fwd_speed * self.DT,
-self.FIELD, self.FIELD,
)
# Pen wall collision — west and east pen walls block the dog within
# the pen's y-range. North face is open, south is the field edge.
px0, px1 = self.PEN_X
py0, py1 = self.PEN_Y
if py0 < new_dog[1] < py1:
@@ -372,6 +412,8 @@ class HerdingEnv(gym.Env):
(pen_ref[0] - far1[0]) / D, (pen_ref[1] - far1[1]) / D,
radius / D,
active_mask.sum() / self.n_sheep,
float(np.cos(self.dog_heading)),
float(np.sin(self.dog_heading)),
], dtype=np.float32)
def _reward(self, n_penned: int, newly_penned: int, action: np.ndarray):