Approach v3 w/ south penalty
|
Before Width: | Height: | Size: 146 KiB After Width: | Height: | Size: 146 KiB |
|
Before Width: | Height: | Size: 233 KiB After Width: | Height: | Size: 233 KiB |
|
Before Width: | Height: | Size: 74 KiB After Width: | Height: | Size: 74 KiB |
|
Before Width: | Height: | Size: 194 KiB After Width: | Height: | Size: 194 KiB |
|
Before Width: | Height: | Size: 233 KiB After Width: | Height: | Size: 233 KiB |
|
Before Width: | Height: | Size: 72 KiB After Width: | Height: | Size: 72 KiB |
|
After Width: | Height: | Size: 158 KiB |
|
After Width: | Height: | Size: 220 KiB |
|
After Width: | Height: | Size: 76 KiB |
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
@@ -59,6 +59,13 @@ VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
|
|||||||
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
|
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
|
||||||
DEBUG_ENABLED = True # set False to disable debug.csv logging
|
DEBUG_ENABLED = True # set False to disable debug.csv logging
|
||||||
|
|
||||||
|
# ── action smoothing ─────────────────────────────────────────────────────────
|
||||||
|
# EMA on policy output to suppress the rapid oscillation (vx/vy flipping
|
||||||
|
# between -1 and +1 every step) that stalls the physical dog. 0 = no
|
||||||
|
# smoothing (raw policy), 1 = frozen. 0.3 keeps ~30% of previous action.
|
||||||
|
ACTION_SMOOTH = 0.3
|
||||||
|
prev_action = np.zeros(2, dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
def norm_angle(a: float) -> float:
|
def norm_angle(a: float) -> float:
|
||||||
while a > math.pi: a -= 2 * math.pi
|
while a > math.pi: a -= 2 * math.pi
|
||||||
@@ -227,9 +234,15 @@ while robot.step(timestep) != -1:
|
|||||||
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep)
|
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep)
|
||||||
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
|
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
|
||||||
|
|
||||||
# 4. Policy inference
|
# 4. Policy inference + smoothing
|
||||||
action, _ = model.predict(obs_norm, deterministic=True)
|
action, _ = model.predict(obs_norm, deterministic=True)
|
||||||
vx, vy = float(action[0][0]), float(action[0][1])
|
raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
|
||||||
|
if ACTION_SMOOTH > 0:
|
||||||
|
smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
|
||||||
|
prev_action[:] = smoothed
|
||||||
|
vx, vy = float(smoothed[0]), float(smoothed[1])
|
||||||
|
else:
|
||||||
|
vx, vy = float(raw_a[0]), float(raw_a[1])
|
||||||
|
|
||||||
# 5. Drive
|
# 5. Drive
|
||||||
drive(vx, vy)
|
drive(vx, vy)
|
||||||
|
|||||||
@@ -60,6 +60,11 @@ class HerdingEnv(gym.Env):
|
|||||||
W_PEN_BONUS = 10.0 # per sheep penned
|
W_PEN_BONUS = 10.0 # per sheep penned
|
||||||
W_COMPLETE = 100.0 # all sheep penned
|
W_COMPLETE = 100.0 # all sheep penned
|
||||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||||
|
W_SOUTH = 0.01 # per-sheep per-metre penalty for active sheep below the pen
|
||||||
|
# entrance (y < PEN_Y[1]=-8). Keeps the dog from letting
|
||||||
|
# sheep drift into the dead zone below the open face where
|
||||||
|
# they must reverse direction (north) to enter — hard to
|
||||||
|
# recover. 0.01 ≈ half step_cost per metre below per sheep.
|
||||||
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
||||||
W_WALL_TOUCH = 0.01 # per-sheep max penalty at wall surface. Linear ramp
|
W_WALL_TOUCH = 0.01 # per-sheep max penalty at wall surface. Linear ramp
|
||||||
# within WALL_TOUCH_BUFFER. Covers field outer walls and
|
# within WALL_TOUCH_BUFFER. Covers field outer walls and
|
||||||
@@ -437,6 +442,16 @@ class HerdingEnv(gym.Env):
|
|||||||
else:
|
else:
|
||||||
r_wall_touch = 0.0
|
r_wall_touch = 0.0
|
||||||
|
|
||||||
|
# South penalty: discourage active sheep from drifting below the pen
|
||||||
|
# entrance (y < PEN_Y[1]). Sheep in this zone must reverse direction
|
||||||
|
# (move north) to enter — very hard for the dog to recover from.
|
||||||
|
if self.W_SOUTH and active.any():
|
||||||
|
pts = self.sheep_pos[:self.n_sheep][active]
|
||||||
|
depth = np.maximum(0.0, self.PEN_Y[1] - pts[:, 1]) # metres below entrance
|
||||||
|
r_south = -float(depth.sum()) * self.W_SOUTH
|
||||||
|
else:
|
||||||
|
r_south = 0.0
|
||||||
|
|
||||||
# Compactness shaping: reward decreases in flock radius (active sheep only)
|
# Compactness shaping: reward decreases in flock radius (active sheep only)
|
||||||
if self.W_COMPACT and active.any():
|
if self.W_COMPACT and active.any():
|
||||||
cur_radius = float(np.linalg.norm(
|
cur_radius = float(np.linalg.norm(
|
||||||
@@ -450,11 +465,12 @@ class HerdingEnv(gym.Env):
|
|||||||
r_pen_bonus = newly_penned * self.W_PEN_BONUS
|
r_pen_bonus = newly_penned * self.W_PEN_BONUS
|
||||||
r_step_cost = -self.W_STEP_COST
|
r_step_cost = -self.W_STEP_COST
|
||||||
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
|
r_complete = self.W_COMPLETE if n_penned == self.n_sheep else 0.0
|
||||||
reward = (r_progress + alignment + r_compact + r_wall_touch
|
reward = (r_progress + alignment + r_south + r_compact + r_wall_touch
|
||||||
+ r_pen_bonus + r_step_cost + r_complete)
|
+ r_pen_bonus + r_step_cost + r_complete)
|
||||||
rcomps = {
|
rcomps = {
|
||||||
"progress": float(r_progress),
|
"progress": float(r_progress),
|
||||||
"alignment": float(alignment),
|
"alignment": float(alignment),
|
||||||
|
"south": float(r_south),
|
||||||
"compact": float(r_compact),
|
"compact": float(r_compact),
|
||||||
"wall_touch": float(r_wall_touch),
|
"wall_touch": float(r_wall_touch),
|
||||||
"pen_bonus": float(r_pen_bonus),
|
"pen_bonus": float(r_pen_bonus),
|
||||||
|
|||||||
@@ -200,6 +200,7 @@ DEFAULT_CONFIG = {
|
|||||||
"W_PEN_BONUS": 10.0,
|
"W_PEN_BONUS": 10.0,
|
||||||
"W_COMPLETE": 100.0,
|
"W_COMPLETE": 100.0,
|
||||||
"W_STEP_COST": 0.02,
|
"W_STEP_COST": 0.02,
|
||||||
|
"W_SOUTH": 0.01,
|
||||||
"W_COMPACT": 0.0,
|
"W_COMPACT": 0.0,
|
||||||
"W_WALL_TOUCH": 0.04,
|
"W_WALL_TOUCH": 0.04,
|
||||||
"WALL_TOUCH_BUFFER": 0.3,
|
"WALL_TOUCH_BUFFER": 0.3,
|
||||||
|
|||||||