# Training pipeline for the shepherd-dog herding project. # Stages chain via output files in training/. # # Usage: # make # full pipeline: bc_demos -> bc -> rl -> eval # make bc_demos # generate sim demos # make bc # behaviour clone (rebuilds bc_demos if missing) # make rl # KL-PPO fine-tune (rebuilds bc if missing) # make eval # 10-seed env eval of rl # make test # pytest suite # make webots N=10 MODE=rl # launch Webots in the chosen mode # make clean # delete bc_demos and run artefacts # make help # print the target table # # Override any hyperparameter on the command line, for example: # make rl PPO_STEPS=2000000 KL=0.02 # make eval EVAL_SEEDS=20 PY := python BC_DEMOS := training/bc/demos.npz BC_DIR := training/runs/bc RL_DIR := training/runs/rl BC_POLICY := $(BC_DIR)/policy.zip RL_POLICY := $(RL_DIR)/policy.zip # --- Demo collection --- TEACHER ?= strombom SEEDS_PER_N ?= 15 SUBSAMPLE ?= 3 FRAME_STACK ?= 4 # --- Behaviour cloning --- BC_EPOCHS ?= 60 BC_NET_ARCH ?= 512,512 # --- KL-PPO fine-tune --- PPO_STEPS ?= 1000000 KL ?= 0.05 # --- Evaluation --- EVAL_SEEDS ?= 10 EVAL_MAX_STEPS ?= 15000 # --- Webots launcher --- N ?= 10 MODE ?= rl .PHONY: all bc_demos bc rl eval test webots clean help all: eval bc_demos: $(BC_DEMOS) $(BC_DEMOS): $(PY) -m training.bc.collect \ --teacher $(TEACHER) --out $(BC_DEMOS) \ --seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \ --frame-stack $(FRAME_STACK) bc: $(BC_POLICY) $(BC_POLICY): $(BC_DEMOS) $(PY) -m training.bc.pretrain \ --demos $(BC_DEMOS) --out $(BC_DIR) \ --epochs $(BC_EPOCHS) --net-arch $(BC_NET_ARCH) rl: $(RL_POLICY) $(RL_POLICY): $(BC_POLICY) $(PY) -m training.rl.train \ --bc $(BC_DIR) --out $(RL_DIR) \ --total-timesteps $(PPO_STEPS) --kl-coef $(KL) eval: $(RL_POLICY) $(PY) -m training.eval --policy $(RL_DIR) \ --max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) test: $(PY) -m pytest tests/ webots: tools/run_webots.sh $(N) $(MODE) clean: rm -rf $(BC_DEMOS) $(BC_DIR) $(RL_DIR) help: @echo "Targets:" @echo " make full pipeline (bc_demos -> bc -> rl -> eval)" @echo " make bc_demos sim demos via the '$(TEACHER)' teacher" @echo " make bc train BC (rebuilds bc_demos if missing)" @echo " make rl KL-PPO fine-tune (rebuilds bc if missing)" @echo " make eval $(EVAL_SEEDS)-seed env eval of rl" @echo " make test pytest suite" @echo " make webots [N=$(N)] [MODE=$(MODE)]" @echo " launch Webots in the chosen mode" @echo " make clean delete bc_demos and run artefacts" @echo "" @echo "Hyperparameter overrides (showing defaults):" @echo " TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK)" @echo " BC_EPOCHS=$(BC_EPOCHS) BC_NET_ARCH=$(BC_NET_ARCH)" @echo " PPO_STEPS=$(PPO_STEPS) KL=$(KL)" @echo " EVAL_SEEDS=$(EVAL_SEEDS) EVAL_MAX_STEPS=$(EVAL_MAX_STEPS)"