Checkpoint 8
This commit is contained in:
@@ -9,35 +9,79 @@
|
||||
# make eval # 10-seed env eval of rl
|
||||
# make test # pytest suite
|
||||
# make webots N=10 MODE=rl # launch Webots in the chosen mode
|
||||
# WEBOTS_HEADLESS=1 make webots # no 3D view, fast mode (still needs DISPLAY or xvfb-run)
|
||||
# make clean # delete bc_demos and run artefacts
|
||||
# make clean_all # delete artefacts for all combinations
|
||||
# make help # print the target table
|
||||
#
|
||||
# Override any hyperparameter on the command line, for example:
|
||||
# make rl PPO_STEPS=2000000 KL=0.02
|
||||
# make eval EVAL_SEEDS=20
|
||||
#
|
||||
# Drive mode selects the locomotion model:
|
||||
# make DRIVE=differential 2-wheel diff-drive (default)
|
||||
# make DRIVE=mecanum 4-wheel omnidirectional
|
||||
#
|
||||
# World shape:
|
||||
# make WORLD=field rectangular (default)
|
||||
# make WORLD=field_round circular fence
|
||||
#
|
||||
# To train all 4 combinations:
|
||||
# make train_all
|
||||
|
||||
|
||||
PY := python
|
||||
|
||||
BC_DEMOS := training/bc/demos.npz
|
||||
BC_DIR := training/runs/bc
|
||||
RL_DIR := training/runs/rl
|
||||
BC_POLICY := $(BC_DIR)/policy.zip
|
||||
RL_POLICY := $(RL_DIR)/policy.zip
|
||||
# Drive mode and world shape — each combination gets its own artefacts.
|
||||
DRIVE ?= differential
|
||||
WORLD ?= field
|
||||
|
||||
# Derived tag and paths.
|
||||
TAG = $(DRIVE)_$(WORLD)
|
||||
BC_DEMOS = training/bc/demos_$(TAG).npz
|
||||
BC_DIR = training/runs/bc_$(TAG)
|
||||
RL_DIR = training/runs/rl_$(TAG)
|
||||
BC_POLICY = $(BC_DIR)/policy.zip
|
||||
RL_POLICY = $(RL_DIR)/policy.zip
|
||||
|
||||
# --- Demo collection ---
|
||||
TEACHER ?= strombom
|
||||
SEEDS_PER_N ?= 15
|
||||
TEACHER ?= universal
|
||||
# Round field is fundamentally harder (narrow gate at south of a circle).
|
||||
# Default to more demos there to give BC a fair shot at 60%+.
|
||||
ifeq ($(WORLD),field_round)
|
||||
SEEDS_PER_N ?= 40
|
||||
else
|
||||
SEEDS_PER_N ?= 25
|
||||
endif
|
||||
SUBSAMPLE ?= 3
|
||||
FRAME_STACK ?= 4
|
||||
DEMO_MAX_STEPS ?= 100000
|
||||
|
||||
# --- Behaviour cloning ---
|
||||
ifeq ($(WORLD),field_round)
|
||||
BC_EPOCHS ?= 100
|
||||
else
|
||||
BC_EPOCHS ?= 60
|
||||
endif
|
||||
BC_NET_ARCH ?= 512,512
|
||||
|
||||
# --- KL-PPO fine-tune ---
|
||||
PPO_STEPS ?= 1000000
|
||||
# Round field: longer training, looser KL, no time penalty (success
|
||||
# must be learned before speed is rewarded).
|
||||
ifeq ($(WORLD),field_round)
|
||||
PPO_STEPS ?= 4000000
|
||||
KL ?= 0.02
|
||||
TIME_W ?= 0.0
|
||||
else
|
||||
PPO_STEPS ?= 2000000
|
||||
KL ?= 0.05
|
||||
TIME_W ?= -0.05
|
||||
endif
|
||||
IMITATE ?= 0.0
|
||||
# PPO rollouts at full difficulty so the training distribution matches
|
||||
# eval (deployment). Anything lower causes a train/eval mismatch that
|
||||
# can make RL eval worse than BC.
|
||||
DIFFICULTY ?= 1.0
|
||||
|
||||
# --- Evaluation ---
|
||||
EVAL_SEEDS ?= 10
|
||||
@@ -48,16 +92,23 @@ N ?= 10
|
||||
MODE ?= rl
|
||||
|
||||
|
||||
.PHONY: all bc_demos bc rl eval test webots clean help
|
||||
.PHONY: all bc_demos bc rl eval test webots clean clean_all help \
|
||||
train_all train_diff_rect train_diff_round \
|
||||
train_mec_rect train_mec_round
|
||||
|
||||
all: eval
|
||||
|
||||
# Export HERDING_WORLD so that geometry.py picks it up at import time.
|
||||
export HERDING_WORLD = $(WORLD)
|
||||
|
||||
bc_demos: $(BC_DEMOS)
|
||||
$(BC_DEMOS):
|
||||
$(PY) -m training.bc.collect \
|
||||
--teacher $(TEACHER) --out $(BC_DEMOS) \
|
||||
--seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \
|
||||
--frame-stack $(FRAME_STACK)
|
||||
--frame-stack $(FRAME_STACK) --drive-mode $(DRIVE) \
|
||||
--world $(WORLD) \
|
||||
--max-steps $(DEMO_MAX_STEPS)
|
||||
|
||||
bc: $(BC_POLICY)
|
||||
$(BC_POLICY): $(BC_DEMOS)
|
||||
@@ -69,20 +120,44 @@ rl: $(RL_POLICY)
|
||||
$(RL_POLICY): $(BC_POLICY)
|
||||
$(PY) -m training.rl.train \
|
||||
--bc $(BC_DIR) --out $(RL_DIR) \
|
||||
--total-timesteps $(PPO_STEPS) --kl-coef $(KL)
|
||||
--total-timesteps $(PPO_STEPS) --kl-coef $(KL) \
|
||||
--imitate-weight $(IMITATE) --time-weight $(TIME_W) \
|
||||
--difficulty $(DIFFICULTY) \
|
||||
--drive-mode $(DRIVE) --world $(WORLD)
|
||||
|
||||
eval: $(RL_POLICY)
|
||||
$(PY) -m training.eval --policy $(RL_DIR) \
|
||||
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS)
|
||||
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
|
||||
--drive-mode $(DRIVE) --world $(WORLD)
|
||||
|
||||
test:
|
||||
$(PY) -m pytest tests/
|
||||
|
||||
webots:
|
||||
tools/run_webots.sh $(N) $(MODE)
|
||||
tools/run_webots.sh $(N) $(MODE) $(DRIVE) $(WORLD)
|
||||
|
||||
clean:
|
||||
rm -rf $(BC_DEMOS) $(BC_DIR) $(RL_DIR)
|
||||
rm -f $(BC_DEMOS)
|
||||
rm -rf $(BC_DIR) $(RL_DIR)
|
||||
|
||||
clean_all:
|
||||
rm -f training/bc/demos_*.npz
|
||||
rm -rf training/runs/bc_* training/runs/rl_*
|
||||
|
||||
# --- Train all 4 combinations ---
|
||||
train_diff_rect:
|
||||
$(MAKE) DRIVE=differential WORLD=field
|
||||
|
||||
train_diff_round:
|
||||
$(MAKE) DRIVE=differential WORLD=field_round
|
||||
|
||||
train_mec_rect:
|
||||
$(MAKE) DRIVE=mecanum WORLD=field
|
||||
|
||||
train_mec_round:
|
||||
$(MAKE) DRIVE=mecanum WORLD=field_round
|
||||
|
||||
train_all: train_diff_rect train_diff_round train_mec_rect train_mec_round
|
||||
|
||||
help:
|
||||
@echo "Targets:"
|
||||
@@ -92,12 +167,21 @@ help:
|
||||
@echo " make rl KL-PPO fine-tune (rebuilds bc if missing)"
|
||||
@echo " make eval $(EVAL_SEEDS)-seed env eval of rl"
|
||||
@echo " make test pytest suite"
|
||||
@echo " make webots [N=$(N)] [MODE=$(MODE)]"
|
||||
@echo " make webots [N=$(N)] [MODE=$(MODE)] [DRIVE=$(DRIVE)] [WORLD=$(WORLD)]"
|
||||
@echo " launch Webots in the chosen mode"
|
||||
@echo " make clean delete bc_demos and run artefacts"
|
||||
@echo " WEBOTS_HEADLESS=1 make webots … no 3D view + fast + --batch"
|
||||
@echo " make clean delete artefacts for current DRIVE+WORLD"
|
||||
@echo " make clean_all delete artefacts for all combinations"
|
||||
@echo ""
|
||||
@echo "Combinations:"
|
||||
@echo " make DRIVE=differential WORLD=field diff + rectangular (default)"
|
||||
@echo " make DRIVE=differential WORLD=field_round diff + circular"
|
||||
@echo " make DRIVE=mecanum WORLD=field mecanum + rectangular"
|
||||
@echo " make DRIVE=mecanum WORLD=field_round mecanum + circular"
|
||||
@echo " make train_all all 4 in sequence"
|
||||
@echo ""
|
||||
@echo "Hyperparameter overrides (showing defaults):"
|
||||
@echo " TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK)"
|
||||
@echo " TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK) DEMO_MAX_STEPS=$(DEMO_MAX_STEPS)"
|
||||
@echo " BC_EPOCHS=$(BC_EPOCHS) BC_NET_ARCH=$(BC_NET_ARCH)"
|
||||
@echo " PPO_STEPS=$(PPO_STEPS) KL=$(KL)"
|
||||
@echo " PPO_STEPS=$(PPO_STEPS) KL=$(KL) IMITATE=$(IMITATE) TIME_W=$(TIME_W)"
|
||||
@echo " EVAL_SEEDS=$(EVAL_SEEDS) EVAL_MAX_STEPS=$(EVAL_MAX_STEPS)"
|
||||
|
||||
Reference in New Issue
Block a user