Approach v3 w/ south penalty

2026-04-26 14:55:13 +01:00
parent a561f8a697
commit 11e13c6980
19 changed files with 6549 additions and 3 deletions
@@ -59,6 +59,13 @@ VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
 DEBUG_CSV    = os.path.join(_HERE, "debug.csv")
 DEBUG_ENABLED = True   # set False to disable debug.csv logging

+# ── action smoothing ─────────────────────────────────────────────────────────
+# EMA on policy output to suppress the rapid oscillation (vx/vy flipping
+# between -1 and +1 every step) that stalls the physical dog.  0 = no
+# smoothing (raw policy), 1 = frozen.  0.3 keeps ~30% of previous action.
+ACTION_SMOOTH = 0.3
+prev_action   = np.zeros(2, dtype=np.float32)
+

 def norm_angle(a: float) -> float:
    while a >  math.pi: a -= 2 * math.pi
@@ -227,9 +234,15 @@ while robot.step(timestep) != -1:
    raw_obs  = build_obs(dog_pos, sheep_positions, n_sheep)
    obs_norm = vecnorm.normalize_obs(raw_obs[np.newaxis])  # (1, 13)

-    # 4. Policy inference
+    # 4. Policy inference + smoothing
    action, _ = model.predict(obs_norm, deterministic=True)
-    vx, vy    = float(action[0][0]), float(action[0][1])
+    raw_a = np.array([float(action[0][0]), float(action[0][1])], dtype=np.float32)
+    if ACTION_SMOOTH > 0:
+        smoothed = ACTION_SMOOTH * prev_action + (1.0 - ACTION_SMOOTH) * raw_a
+        prev_action[:] = smoothed
+        vx, vy = float(smoothed[0]), float(smoothed[1])
+    else:
+        vx, vy = float(raw_a[0]), float(raw_a[1])

    # 5. Drive
    drive(vx, vy)