From 3b4c99a6c4ae13b8b2b1f00ed5af8ed3a171d53d Mon Sep 17 00:00:00 2001 From: Johnny Fernandes Date: Sun, 17 May 2026 01:12:06 +0000 Subject: [PATCH] Training pipelines auto-select mecanum-Webots preset * training/bc/collect.py: --use-webots-preset now picks the drive-matched variant. Mecanum drives get HERDING_MEC_WEBOTS (with the Webots-calibrated strafe efficiency and bleed) so the collected demos reflect the imperfect physical mecanum the deployed policy will see. Differential drives still use HERDING_WEBOTS (no behaviour change there). * training/rl/train.py: mecanum fine-tune now *unconditionally* applies the HERDING_MEC_WEBOTS robot config to the PPO env (the policy must update against the same imperfect kinematics it deploys on). Diff fine-tune unchanged. To retrain a mecanum policy end-to-end against the new proto: python -m training.bc.collect --drive-mode mecanum --world field \ --use-webots-preset \ --out training/bc/demos_mecanum_field_v2.npz python -m training.bc.pretrain --demos training/bc/demos_mecanum_field_v2.npz \ --out training/runs/bc_mecanum_field_v2 ... python -m training.rl.train --bc training/runs/bc_mecanum_field_v2 \ --out training/runs/rl_mecanum_field_v2 \ --drive-mode mecanum --world field --use-webots-preset The same flow for field_round / mecanum/round. Co-Authored-By: Claude Opus 4.7 --- training/bc/collect.py | 24 +++++++++++++++++++----- training/rl/train.py | 27 +++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/training/bc/collect.py b/training/bc/collect.py index 61eb127..dade3e4 100644 --- a/training/bc/collect.py +++ b/training/bc/collect.py @@ -176,17 +176,31 @@ def main(): print(f"[demos] WARNING: --world={args.world} but geometry is " f"'{FIELD_SHAPE}'. This should not happen — file a bug.") - from herding.config import HerdingConfig, HERDING_WEBOTS, DomainRandomConfig, RobotConfig + from herding.config import ( + HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, + DomainRandomConfig, RobotConfig, + ) if args.use_webots_preset: - herding_cfg = HERDING_WEBOTS.replace( + # Pick the drive-matched Webots preset — for mecanum we use the + # variant that simulates the physical-roller proto's strafe + # efficiency and forward bleed so the policy trains under the + # same imperfect kinematics it sees at deployment. + base = HERDING_MEC_WEBOTS if args.drive_mode == "mecanum" else HERDING_WEBOTS + herding_cfg = base.replace( domain_random=DomainRandomConfig( fp_rate=args.fp_rate, wheel_slip_std=args.wheel_slip_std, ), - robot=RobotConfig(action_smooth=args.action_smooth), + robot=RobotConfig( + action_smooth=args.action_smooth, + strafe_efficiency=base.robot.strafe_efficiency, + strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed, + ), ) - print(f"[demos] HERDING_WEBOTS preset + DR: fp_rate={args.fp_rate} " - f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std}") + preset_name = "HERDING_MEC_WEBOTS" if args.drive_mode == "mecanum" else "HERDING_WEBOTS" + print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} " + f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} " + f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f}") else: herding_cfg = None if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0: diff --git a/training/rl/train.py b/training/rl/train.py index 3f0e50a..d57113d 100644 --- a/training/rl/train.py +++ b/training/rl/train.py @@ -275,19 +275,38 @@ def main() -> None: drive_mode = "differential" print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})") - from herding.config import HerdingConfig, DomainRandomConfig, RobotConfig + from herding.config import ( + HerdingConfig, HERDING_MEC_WEBOTS, DomainRandomConfig, RobotConfig, + ) herding_cfg = None - if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0: + # When fine-tuning a mecanum policy we always apply the Webots + # roller-hinge calibration to the gym kinematics (strafe efficiency + # and bleed). Without this, the RL agent updates against the + # textbook X-pattern and fails on deployment. + is_mecanum = (drive_mode == "mecanum") + if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0: + if is_mecanum: + base_robot = HERDING_MEC_WEBOTS.robot + strafe_eff = base_robot.strafe_efficiency + strafe_bleed = base_robot.strafe_to_forward_bleed + else: + strafe_eff = 1.0 + strafe_bleed = 0.0 herding_cfg = HerdingConfig( domain_random=DomainRandomConfig( fp_rate=args.fp_rate, wheel_slip_std=args.wheel_slip_std, ), - robot=RobotConfig(action_smooth=args.action_smooth), + robot=RobotConfig( + action_smooth=args.action_smooth, + strafe_efficiency=strafe_eff, + strafe_to_forward_bleed=strafe_bleed, + ), ) print(f"[rl] domain-random: fp_rate={args.fp_rate} " f"action_smooth={args.action_smooth} " - f"wheel_slip_std={args.wheel_slip_std}") + f"wheel_slip_std={args.wheel_slip_std} " + f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f}") env_fns = [_make_env(i, args.seed, frame_stack, drive_mode, difficulty=args.difficulty,