Approach v3 w/ south penalty
|
Before Width: | Height: | Size: 146 KiB After Width: | Height: | Size: 146 KiB |
|
Before Width: | Height: | Size: 233 KiB After Width: | Height: | Size: 233 KiB |
|
Before Width: | Height: | Size: 74 KiB After Width: | Height: | Size: 74 KiB |
|
Before Width: | Height: | Size: 194 KiB After Width: | Height: | Size: 194 KiB |
|
Before Width: | Height: | Size: 233 KiB After Width: | Height: | Size: 233 KiB |
|
Before Width: | Height: | Size: 72 KiB After Width: | Height: | Size: 72 KiB |
|
After Width: | Height: | Size: 158 KiB |
|
After Width: | Height: | Size: 220 KiB |
|
After Width: | Height: | Size: 76 KiB |
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
@@ -59,6 +59,13 @@ VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
|
||||
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
|
||||
DEBUG_ENABLED = True # set False to disable debug.csv logging
|
||||
|
||||
# ── action smoothing ─────────────────────────────────────────────────────────
|
||||
# EMA on policy output to suppress the rapid oscillation (vx/vy flipping
|
||||
# between -1 and +1 every step) that stalls the physical dog. 0 = no
|
||||
# smoothing (raw policy), 1 = frozen. 0.3 keeps ~30% of previous action.
|
||||
ACTION_SMOOTH = 0.3
|
||||
prev_action = np.zeros(2, dtype=np.float32)
|
||||
|
||||
|
||||
def norm_angle(a: float) -> float:
|
||||
while a > math.pi: a -= 2 * math.pi
|
||||
@@ -227,9 +234,15 @@ while robot.step(timestep) != -1:
|
||||
raw_obs = build_obs(dog_pos, sheep_positions, n_sheep)
|
||||
obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis]) # (1, 13)
|
||||
|
||||
# 4. Policy inference
|
||||
# 4. Policy inference + smoothing
|
||||
action, _ = model.predict(obs_norm, deterministic=True)
|
||||
vx, vy = float(action[0][0]), float(action[0][1])
|
||||
raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
|
||||
if ACTION_SMOOTH > 0:
|
||||
smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
|
||||
prev_action[:] = smoothed
|
||||
vx, vy = float(smoothed[0]), float(smoothed[1])
|
||||
else:
|
||||
vx, vy = float(raw_a[0]), float(raw_a[1])
|
||||
|
||||
# 5. Drive
|
||||
drive(vx, vy)
|
||||
|
||||