Training pipelines auto-select mecanum-Webots preset

* training/bc/collect.py: --use-webots-preset now picks the
  drive-matched variant. Mecanum drives get HERDING_MEC_WEBOTS
  (with the Webots-calibrated strafe efficiency and bleed) so the
  collected demos reflect the imperfect physical mecanum the
  deployed policy will see. Differential drives still use
  HERDING_WEBOTS (no behaviour change there).
* training/rl/train.py: mecanum fine-tune now *unconditionally*
  applies the HERDING_MEC_WEBOTS robot config to the PPO env (the
  policy must update against the same imperfect kinematics it
  deploys on). Diff fine-tune unchanged.

To retrain a mecanum policy end-to-end against the new proto:

  python -m training.bc.collect --drive-mode mecanum --world field \
    --use-webots-preset \
    --out training/bc/demos_mecanum_field_v2.npz
  python -m training.bc.pretrain --demos training/bc/demos_mecanum_field_v2.npz \
    --out training/runs/bc_mecanum_field_v2 ...
  python -m training.rl.train --bc training/runs/bc_mecanum_field_v2 \
    --out training/runs/rl_mecanum_field_v2 \
    --drive-mode mecanum --world field --use-webots-preset

The same flow for field_round / mecanum/round.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Johnny Fernandes
2026-05-17 01:12:06 +00:00
parent ee77c8606c
commit 3b4c99a6c4
2 changed files with 42 additions and 9 deletions
+19 -5
View File
@@ -176,17 +176,31 @@ def main():
print(f"[demos] WARNING: --world={args.world} but geometry is "
f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
from herding.config import HerdingConfig, HERDING_WEBOTS, DomainRandomConfig, RobotConfig
from herding.config import (
HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS,
DomainRandomConfig, RobotConfig,
)
if args.use_webots_preset:
herding_cfg = HERDING_WEBOTS.replace(
# Pick the drive-matched Webots preset — for mecanum we use the
# variant that simulates the physical-roller proto's strafe
# efficiency and forward bleed so the policy trains under the
# same imperfect kinematics it sees at deployment.
base = HERDING_MEC_WEBOTS if args.drive_mode == "mecanum" else HERDING_WEBOTS
herding_cfg = base.replace(
domain_random=DomainRandomConfig(
fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std,
),
robot=RobotConfig(action_smooth=args.action_smooth),
robot=RobotConfig(
action_smooth=args.action_smooth,
strafe_efficiency=base.robot.strafe_efficiency,
strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed,
),
)
print(f"[demos] HERDING_WEBOTS preset + DR: fp_rate={args.fp_rate} "
f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std}")
preset_name = "HERDING_MEC_WEBOTS" if args.drive_mode == "mecanum" else "HERDING_WEBOTS"
print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} "
f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} "
f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f}")
else:
herding_cfg = None
if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
+23 -4
View File
@@ -275,19 +275,38 @@ def main() -> None:
drive_mode = "differential"
print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
from herding.config import HerdingConfig, DomainRandomConfig, RobotConfig
from herding.config import (
HerdingConfig, HERDING_MEC_WEBOTS, DomainRandomConfig, RobotConfig,
)
herding_cfg = None
if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
# When fine-tuning a mecanum policy we always apply the Webots
# roller-hinge calibration to the gym kinematics (strafe efficiency
# and bleed). Without this, the RL agent updates against the
# textbook X-pattern and fails on deployment.
is_mecanum = (drive_mode == "mecanum")
if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
if is_mecanum:
base_robot = HERDING_MEC_WEBOTS.robot
strafe_eff = base_robot.strafe_efficiency
strafe_bleed = base_robot.strafe_to_forward_bleed
else:
strafe_eff = 1.0
strafe_bleed = 0.0
herding_cfg = HerdingConfig(
domain_random=DomainRandomConfig(
fp_rate=args.fp_rate,
wheel_slip_std=args.wheel_slip_std,
),
robot=RobotConfig(action_smooth=args.action_smooth),
robot=RobotConfig(
action_smooth=args.action_smooth,
strafe_efficiency=strafe_eff,
strafe_to_forward_bleed=strafe_bleed,
),
)
print(f"[rl] domain-random: fp_rate={args.fp_rate} "
f"action_smooth={args.action_smooth} "
f"wheel_slip_std={args.wheel_slip_std}")
f"wheel_slip_std={args.wheel_slip_std} "
f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f}")
env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
difficulty=args.difficulty,