From 1df84ae4b500e213f44d6c6a13dcf54c597ddb06 Mon Sep 17 00:00:00 2001 From: Johnny Fernandes Date: Sun, 17 May 2026 10:44:15 +0000 Subject: [PATCH] Drop webots_quick target; mecanum BC demos now auto-use HERDING_MEC_WEBOTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Remove `webots_quick` Makefile target — `make webots` is the only webots entry point now (it fires the interactive picker). The positional non-interactive path is still available as `bash tools/run_webots.sh N MODE DRIVE WORLD` for scripted use. * Add `WEBOTS_PRESET_FLAG = --use-webots-preset` for mecanum drive and pass it to the `bc.collect` recipe so demos are collected under the gym kinematics that match the physical-roller Webots mecanum. Without this, mecanum BC demos would record textbook X-pattern teacher actions against textbook gym kinematics, and the resulting policy would fail at deployment exactly the same way the current v1 mecanum policies do. * `rl/train.py` already auto-detects mecanum and applies HERDING_MEC_WEBOTS internally (commit 3b4c99a), so the rl recipe doesn't need the flag — a one-line comment in the Makefile makes that intent explicit. Diff drive keeps the existing recipe: no --use-webots-preset, so BC demos collected on HERDING_DEFAULT (360° gym, no FP). This is the regime that produced the current diff/field and diff/round policies that pen 5/5 in Webots LiDAR; retraining under the same regime is the safest reproduction. 126 pytest cases still pass. Co-Authored-By: Claude Opus 4.7 --- Makefile | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 2951ad9..2d89991 100644 --- a/Makefile +++ b/Makefile @@ -146,7 +146,7 @@ MODE ?= rl .PHONY: all bc_demos bc rl rl_fast eval eval_fast eval_all eval_all_fast \ - test webots webots_quick webots_sweep clean clean_all help \ + test webots webots_sweep clean clean_all help \ train_all train_diff_rect train_diff_round \ train_mec_rect train_mec_round \ train_all_fast train_diff_rect_fast train_diff_round_fast \ @@ -161,6 +161,17 @@ export HERDING_WORLD = $(WORLD) # the build is run under tee / nohup / tmux pipes. export PYTHONUNBUFFERED = 1 +# Mecanum needs --use-webots-preset so collect/rl pick up +# HERDING_MEC_WEBOTS — the gym mecanum kinematics get the strafe +# efficiency and forward-bleed match against the physical-roller +# Webots proto. Without this flag the policy trains on textbook +# X-pattern mecanum and fails on deployment. +ifeq ($(DRIVE),mecanum) +WEBOTS_PRESET_FLAG = --use-webots-preset +else +WEBOTS_PRESET_FLAG = +endif + bc_demos: $(BC_DEMOS) $(BC_DEMOS): $(PY) -m training.bc.collect \ @@ -171,7 +182,8 @@ $(BC_DEMOS): --max-steps $(DEMO_MAX_STEPS) \ --fp-rate $(FP_RATE) \ --action-smooth $(ACTION_SMOOTH_TRAIN) \ - --wheel-slip-std $(WHEEL_SLIP_STD) + --wheel-slip-std $(WHEEL_SLIP_STD) \ + $(WEBOTS_PRESET_FLAG) bc: $(BC_POLICY) $(BC_POLICY): $(BC_DEMOS) @@ -190,6 +202,8 @@ $(RL_POLICY): $(BC_POLICY) --fp-rate $(FP_RATE) \ --action-smooth $(ACTION_SMOOTH_TRAIN) \ --wheel-slip-std $(WHEEL_SLIP_STD) + # (rl/train.py auto-applies HERDING_MEC_WEBOTS when drive=mecanum; + # no --use-webots-preset flag needed.) eval: $(RL_POLICY) $(PY) -m training.eval --policy $(RL_DIR) \ @@ -223,9 +237,6 @@ test: webots: @bash tools/webots_menu.sh -webots_quick: - tools/run_webots.sh $(N) $(MODE) $(DRIVE) $(WORLD) - # Headless sweep across all modes × worlds × flock sizes. # Results are written to webots_sweep.log. # Set USE_GT=1 to bypass LiDAR tracker (isolate perception from policy).