From 1df84ae4b500e213f44d6c6a13dcf54c597ddb06 Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Sun, 17 May 2026 10:44:15 +0000
Subject: [PATCH] Drop webots_quick target; mecanum BC demos now auto-use
 HERDING_MEC_WEBOTS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Remove `webots_quick` Makefile target — `make webots` is the only
  webots entry point now (it fires the interactive picker). The
  positional non-interactive path is still available as
  `bash tools/run_webots.sh N MODE DRIVE WORLD` for scripted use.
* Add `WEBOTS_PRESET_FLAG = --use-webots-preset` for mecanum drive
  and pass it to the `bc.collect` recipe so demos are collected
  under the gym kinematics that match the physical-roller Webots
  mecanum. Without this, mecanum BC demos would record textbook
  X-pattern teacher actions against textbook gym kinematics, and
  the resulting policy would fail at deployment exactly the same
  way the current v1 mecanum policies do.
* `rl/train.py` already auto-detects mecanum and applies
  HERDING_MEC_WEBOTS internally (commit 3b4c99a), so the rl recipe
  doesn't need the flag — a one-line comment in the Makefile makes
  that intent explicit.

Diff drive keeps the existing recipe: no --use-webots-preset, so
BC demos collected on HERDING_DEFAULT (360° gym, no FP). This is
the regime that produced the current diff/field and diff/round
policies that pen 5/5 in Webots LiDAR; retraining under the same
regime is the safest reproduction.

126 pytest cases still pass.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 Makefile | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 2951ad9..2d89991 100644
--- a/Makefile
+++ b/Makefile
@@ -146,7 +146,7 @@ MODE             ?= rl
 
 
 .PHONY: all bc_demos bc rl rl_fast eval eval_fast eval_all eval_all_fast \
-        test webots webots_quick webots_sweep clean clean_all help \
+        test webots webots_sweep clean clean_all help \
         train_all train_diff_rect train_diff_round \
         train_mec_rect train_mec_round \
         train_all_fast train_diff_rect_fast train_diff_round_fast \
@@ -161,6 +161,17 @@ export HERDING_WORLD = $(WORLD)
 # the build is run under tee / nohup / tmux pipes.
 export PYTHONUNBUFFERED = 1
 
+# Mecanum needs --use-webots-preset so collect/rl pick up
+# HERDING_MEC_WEBOTS — the gym mecanum kinematics get the strafe
+# efficiency and forward-bleed match against the physical-roller
+# Webots proto. Without this flag the policy trains on textbook
+# X-pattern mecanum and fails on deployment.
+ifeq ($(DRIVE),mecanum)
+WEBOTS_PRESET_FLAG = --use-webots-preset
+else
+WEBOTS_PRESET_FLAG =
+endif
+
 bc_demos: $(BC_DEMOS)
 $(BC_DEMOS):
 	$(PY) -m training.bc.collect \
@@ -171,7 +182,8 @@ $(BC_DEMOS):
 		--max-steps $(DEMO_MAX_STEPS) \
 		--fp-rate $(FP_RATE) \
 		--action-smooth $(ACTION_SMOOTH_TRAIN) \
-		--wheel-slip-std $(WHEEL_SLIP_STD)
+		--wheel-slip-std $(WHEEL_SLIP_STD) \
+		$(WEBOTS_PRESET_FLAG)
 
 bc: $(BC_POLICY)
 $(BC_POLICY): $(BC_DEMOS)
@@ -190,6 +202,8 @@ $(RL_POLICY): $(BC_POLICY)
 		--fp-rate $(FP_RATE) \
 		--action-smooth $(ACTION_SMOOTH_TRAIN) \
 		--wheel-slip-std $(WHEEL_SLIP_STD)
+	# (rl/train.py auto-applies HERDING_MEC_WEBOTS when drive=mecanum;
+	# no --use-webots-preset flag needed.)
 
 eval: $(RL_POLICY)
 	$(PY) -m training.eval --policy $(RL_DIR) \
@@ -223,9 +237,6 @@ test:
 webots:
 	@bash tools/webots_menu.sh
 
-webots_quick:
-	tools/run_webots.sh $(N) $(MODE) $(DRIVE) $(WORLD)
-
 # Headless sweep across all modes × worlds × flock sizes.
 # Results are written to webots_sweep.log.
 # Set USE_GT=1 to bypass LiDAR tracker (isolate perception from policy).