Mimic webots physics
This commit is contained in:
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
After Width: | Height: | Size: 164 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 200 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 61 KiB |
Binary file not shown.
@@ -79,11 +79,13 @@ def in_pen(x: float, y: float) -> bool:
|
|||||||
|
|
||||||
def build_obs(dog_pos: np.ndarray,
|
def build_obs(dog_pos: np.ndarray,
|
||||||
sheep_dict: dict,
|
sheep_dict: dict,
|
||||||
n_sheep: int) -> np.ndarray:
|
n_sheep: int,
|
||||||
|
dog_heading: float = 0.0) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Build the 13-dim flock observation — identical to HerdingEnv._obs().
|
Build the 18-dim flock observation — identical to HerdingEnv._obs().
|
||||||
|
|
||||||
sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
|
sheep_dict: {name: (x, y)} for ALL known sheep (penned or not).
|
||||||
|
dog_heading: dog's current world-frame heading in radians.
|
||||||
"""
|
"""
|
||||||
D = 2 * FIELD
|
D = 2 * FIELD
|
||||||
|
|
||||||
@@ -119,6 +121,7 @@ def build_obs(dog_pos: np.ndarray,
|
|||||||
(PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
|
(PEN_CENTER[0] - far1[0]) / D, (PEN_CENTER[1] - far1[1]) / D,
|
||||||
radius / D,
|
radius / D,
|
||||||
frac_active,
|
frac_active,
|
||||||
|
math.cos(dog_heading), math.sin(dog_heading),
|
||||||
], dtype=np.float32)
|
], dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
@@ -152,7 +155,7 @@ ear_phase = 0.0
|
|||||||
try:
|
try:
|
||||||
n_sheep = int(sys.argv[1])
|
n_sheep = int(sys.argv[1])
|
||||||
except (IndexError, ValueError):
|
except (IndexError, ValueError):
|
||||||
n_sheep = 5
|
n_sheep = 3
|
||||||
|
|
||||||
# ── Load model ───────────────────────────────────────────────────────────────
|
# ── Load model ───────────────────────────────────────────────────────────────
|
||||||
print(f"[RL dog] Loading model from {MODEL_PATH}")
|
print(f"[RL dog] Loading model from {MODEL_PATH}")
|
||||||
@@ -230,8 +233,9 @@ while robot.step(timestep) != -1:
|
|||||||
gps_vals = gps.getValues()
|
gps_vals = gps.getValues()
|
||||||
dog_pos = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)
|
dog_pos = np.array([gps_vals[0], gps_vals[1]], dtype=np.float32)
|
||||||
|
|
||||||
# 3. Build and normalise observation
|
# 3. Build and normalise observation (heading from compass)
|
||||||
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep)
|
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep,
|
||||||
|
dog_heading=bearing())
|
||||||
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
|
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
|
||||||
|
|
||||||
# 4. Policy inference + smoothing
|
# 4. Policy inference + smoothing
|
||||||
|
|||||||
Binary file not shown.
+50
-8
@@ -45,6 +45,15 @@ class HerdingEnv(gym.Env):
|
|||||||
SHEEP_WANDER_V = 0.20 # m/s
|
SHEEP_WANDER_V = 0.20 # m/s
|
||||||
DT = 0.1 # seconds per step
|
DT = 0.1 # seconds per step
|
||||||
|
|
||||||
|
# Wheeled dog dynamics — mirror the Webots controller's drive():
|
||||||
|
# forward speed gated by cos(heading_error); turn rate proportional to
|
||||||
|
# error. Without this, the env treats the dog as a particle that can
|
||||||
|
# change direction instantly, producing policies that bang-bang and don't
|
||||||
|
# transfer to the wheeled Webots robot.
|
||||||
|
DOG_K_TURN = 4.0 # rad/s per rad (heading-error gain)
|
||||||
|
DOG_MAX_TURN_RATE = 6.0 # rad/s (cap on turn rate)
|
||||||
|
DOG_STOP_THRESHOLD = 0.05 # ||action|| below this → dog stops in place
|
||||||
|
|
||||||
# Boid parameters — identical to sheep.py
|
# Boid parameters — identical to sheep.py
|
||||||
FLEE_DIST = 7.0
|
FLEE_DIST = 7.0
|
||||||
SEPARATION_DIST = 2.5
|
SEPARATION_DIST = 2.5
|
||||||
@@ -102,11 +111,12 @@ class HerdingEnv(gym.Env):
|
|||||||
raise ValueError(f"unknown reward_cfg key: {k}")
|
raise ValueError(f"unknown reward_cfg key: {k}")
|
||||||
setattr(self, k, v)
|
setattr(self, k, v)
|
||||||
|
|
||||||
# Fixed 16-dim observation regardless of n_sheep:
|
# Fixed 18-dim observation regardless of n_sheep:
|
||||||
# dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2)
|
# dog_pos(2) + rel_com(2) + rel_far1(2) + rel_far2(2) + rel_far3(2)
|
||||||
# + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1)
|
# + com_to_pen(2) + far1_to_pen(2) + radius(1) + frac_penned(1)
|
||||||
|
# + cos(heading)(1) + sin(heading)(1) ← new, for wheeled dynamics
|
||||||
self.observation_space = spaces.Box(
|
self.observation_space = spaces.Box(
|
||||||
low=-np.inf, high=np.inf, shape=(16,), dtype=np.float32
|
low=-np.inf, high=np.inf, shape=(18,), dtype=np.float32
|
||||||
)
|
)
|
||||||
|
|
||||||
# Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
|
# Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
|
||||||
@@ -119,6 +129,7 @@ class HerdingEnv(gym.Env):
|
|||||||
self._prev_penned = 0
|
self._prev_penned = 0
|
||||||
self._prev_pen_dist_sum = 0.0
|
self._prev_pen_dist_sum = 0.0
|
||||||
self.dog_pos = np.zeros(2, dtype=np.float32)
|
self.dog_pos = np.zeros(2, dtype=np.float32)
|
||||||
|
self.dog_heading = 0.0 # radians, world frame
|
||||||
self.sheep_pos = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
|
self.sheep_pos = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
|
||||||
self.penned = np.ones(self.MAX_SHEEP, dtype=bool)
|
self.penned = np.ones(self.MAX_SHEEP, dtype=bool)
|
||||||
self.wander_ang = np.zeros(self.MAX_SHEEP, dtype=np.float32)
|
self.wander_ang = np.zeros(self.MAX_SHEEP, dtype=np.float32)
|
||||||
@@ -192,6 +203,9 @@ class HerdingEnv(gym.Env):
|
|||||||
-self.FIELD * 0.8, self.FIELD * 0.8, size=(2,)
|
-self.FIELD * 0.8, self.FIELD * 0.8, size=(2,)
|
||||||
).astype(np.float32)
|
).astype(np.float32)
|
||||||
|
|
||||||
|
# Random initial heading so the policy learns to handle any orientation.
|
||||||
|
self.dog_heading = float(self.np_random.uniform(-np.pi, np.pi))
|
||||||
|
|
||||||
self.wander_ang = self.np_random.uniform(
|
self.wander_ang = self.np_random.uniform(
|
||||||
-np.pi, np.pi, size=(self.MAX_SHEEP,)
|
-np.pi, np.pi, size=(self.MAX_SHEEP,)
|
||||||
).astype(np.float32)
|
).astype(np.float32)
|
||||||
@@ -220,13 +234,39 @@ class HerdingEnv(gym.Env):
|
|||||||
|
|
||||||
act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
|
act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
|
||||||
old_dog = self.dog_pos.copy()
|
old_dog = self.dog_pos.copy()
|
||||||
new_dog = np.clip(
|
|
||||||
self.dog_pos + act * self.DOG_SPEED * self.DT,
|
# Wheeled-dog kinematics — mirrors the Webots controller's drive():
|
||||||
-self.FIELD, self.FIELD
|
# interpret (vx, vy) as a desired velocity vector in world frame; the
|
||||||
|
# dog turns toward it at a limited rate, and forward speed is gated
|
||||||
|
# by cos(heading_error). Bang-bang policies still produce smooth
|
||||||
|
# motion (the dog can't sidestep — it has to turn first).
|
||||||
|
act_mag = float(np.linalg.norm(act))
|
||||||
|
if act_mag < self.DOG_STOP_THRESHOLD:
|
||||||
|
# Below threshold the Webots dog stops; treat the same way here.
|
||||||
|
new_dog = self.dog_pos.copy()
|
||||||
|
else:
|
||||||
|
target_heading = float(np.arctan2(act[1], act[0]))
|
||||||
|
err = target_heading - self.dog_heading
|
||||||
|
# Wrap to (-pi, pi]
|
||||||
|
err = (err + np.pi) % (2 * np.pi) - np.pi
|
||||||
|
turn_rate = np.clip(self.DOG_K_TURN * err,
|
||||||
|
-self.DOG_MAX_TURN_RATE,
|
||||||
|
self.DOG_MAX_TURN_RATE)
|
||||||
|
self.dog_heading = float(
|
||||||
|
((self.dog_heading + turn_rate * self.DT) + np.pi)
|
||||||
|
% (2 * np.pi) - np.pi
|
||||||
)
|
)
|
||||||
# Pen wall collision — mirrors Webots geometry. West (x=PEN_X[0]) and
|
target_speed = act_mag * self.DOG_SPEED
|
||||||
# east (x=PEN_X[1]) walls block the dog within the pen's y-range.
|
fwd_speed = target_speed * max(0.0, float(np.cos(err)))
|
||||||
# North face (y=PEN_Y[1]=-8) is open. South is the field edge.
|
step_vec = np.array([np.cos(self.dog_heading),
|
||||||
|
np.sin(self.dog_heading)], dtype=np.float32)
|
||||||
|
new_dog = np.clip(
|
||||||
|
self.dog_pos + step_vec * fwd_speed * self.DT,
|
||||||
|
-self.FIELD, self.FIELD,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pen wall collision — west and east pen walls block the dog within
|
||||||
|
# the pen's y-range. North face is open, south is the field edge.
|
||||||
px0, px1 = self.PEN_X
|
px0, px1 = self.PEN_X
|
||||||
py0, py1 = self.PEN_Y
|
py0, py1 = self.PEN_Y
|
||||||
if py0 < new_dog[1] < py1:
|
if py0 < new_dog[1] < py1:
|
||||||
@@ -372,6 +412,8 @@ class HerdingEnv(gym.Env):
|
|||||||
(pen_ref[0] - far1[0]) / D, (pen_ref[1] - far1[1]) / D,
|
(pen_ref[0] - far1[0]) / D, (pen_ref[1] - far1[1]) / D,
|
||||||
radius / D,
|
radius / D,
|
||||||
active_mask.sum() / self.n_sheep,
|
active_mask.sum() / self.n_sheep,
|
||||||
|
float(np.cos(self.dog_heading)),
|
||||||
|
float(np.sin(self.dog_heading)),
|
||||||
], dtype=np.float32)
|
], dtype=np.float32)
|
||||||
|
|
||||||
def _reward(self, n_penned: int, newly_penned: int, action: np.ndarray):
|
def _reward(self, n_penned: int, newly_penned: int, action: np.ndarray):
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
Config loaded from config.json
|
||||||
|
Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_SOUTH': 0.01, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02}
|
||||||
|
Run dir: runs/wheeled_n10
|
||||||
|
Curriculum: 1 → 10 sheep, 1,500,000 steps/stage
|
||||||
|
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"W_PER_SHEEP": 2.0,
|
||||||
|
"W_ALIGN": 0.05,
|
||||||
|
"W_PEN_BONUS": 10.0,
|
||||||
|
"W_COMPLETE": 100.0,
|
||||||
|
"W_STEP_COST": 0.02,
|
||||||
|
"W_SOUTH": 0.01,
|
||||||
|
"W_COMPACT": 0.0,
|
||||||
|
"W_WALL_TOUCH": 0.0,
|
||||||
|
"WALL_TOUCH_BUFFER": 0.4,
|
||||||
|
"ALIGN_SHAPE": "standoff",
|
||||||
|
"ALIGN_GATED": true,
|
||||||
|
"ENTRY_AWARE": true,
|
||||||
|
"ent_coef": 0.02
|
||||||
|
}
|
||||||
+10
-10
@@ -518,13 +518,13 @@ Sheep {
|
|||||||
name "sheep3"
|
name "sheep3"
|
||||||
controller "sheep"
|
controller "sheep"
|
||||||
}
|
}
|
||||||
Sheep {
|
# Sheep {
|
||||||
translation 3.5 1 0.5
|
# translation 3.5 1 0.5
|
||||||
name "sheep4"
|
# name "sheep4"
|
||||||
controller "sheep"
|
# controller "sheep"
|
||||||
}
|
# }
|
||||||
Sheep {
|
# Sheep {
|
||||||
translation 3.5 -1 0.5
|
# translation 3.5 -1 0.5
|
||||||
name "sheep5"
|
# name "sheep5"
|
||||||
controller "sheep"
|
# controller "sheep"
|
||||||
}
|
# }
|
||||||
|
|||||||
Reference in New Issue
Block a user