104 lines
3.1 KiB
Makefile
104 lines
3.1 KiB
Makefile
# Training pipeline for the shepherd-dog herding project.
|
|
# Stages chain via output files in training/.
|
|
#
|
|
# Usage:
|
|
# make # full pipeline: bc_demos -> bc -> rl -> eval
|
|
# make bc_demos # generate sim demos
|
|
# make bc # behaviour clone (rebuilds bc_demos if missing)
|
|
# make rl # KL-PPO fine-tune (rebuilds bc if missing)
|
|
# make eval # 10-seed env eval of rl
|
|
# make test # pytest suite
|
|
# make webots N=10 MODE=rl # launch Webots in the chosen mode
|
|
# make clean # delete bc_demos and run artefacts
|
|
# make help # print the target table
|
|
#
|
|
# Override any hyperparameter on the command line, for example:
|
|
# make rl PPO_STEPS=2000000 KL=0.02
|
|
# make eval EVAL_SEEDS=20
|
|
|
|
|
|
PY := python
|
|
|
|
BC_DEMOS := training/bc/demos.npz
|
|
BC_DIR := training/runs/bc
|
|
RL_DIR := training/runs/rl
|
|
BC_POLICY := $(BC_DIR)/policy.zip
|
|
RL_POLICY := $(RL_DIR)/policy.zip
|
|
|
|
# --- Demo collection ---
|
|
TEACHER ?= strombom
|
|
SEEDS_PER_N ?= 15
|
|
SUBSAMPLE ?= 3
|
|
FRAME_STACK ?= 4
|
|
|
|
# --- Behaviour cloning ---
|
|
BC_EPOCHS ?= 60
|
|
BC_NET_ARCH ?= 512,512
|
|
|
|
# --- KL-PPO fine-tune ---
|
|
PPO_STEPS ?= 1000000
|
|
KL ?= 0.05
|
|
|
|
# --- Evaluation ---
|
|
EVAL_SEEDS ?= 10
|
|
EVAL_MAX_STEPS ?= 15000
|
|
|
|
# --- Webots launcher ---
|
|
N ?= 10
|
|
MODE ?= rl
|
|
|
|
|
|
.PHONY: all bc_demos bc rl eval test webots clean help
|
|
|
|
all: eval
|
|
|
|
bc_demos: $(BC_DEMOS)
|
|
$(BC_DEMOS):
|
|
$(PY) -m training.bc.collect \
|
|
--teacher $(TEACHER) --out $(BC_DEMOS) \
|
|
--seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \
|
|
--frame-stack $(FRAME_STACK)
|
|
|
|
bc: $(BC_POLICY)
|
|
$(BC_POLICY): $(BC_DEMOS)
|
|
$(PY) -m training.bc.pretrain \
|
|
--demos $(BC_DEMOS) --out $(BC_DIR) \
|
|
--epochs $(BC_EPOCHS) --net-arch $(BC_NET_ARCH)
|
|
|
|
rl: $(RL_POLICY)
|
|
$(RL_POLICY): $(BC_POLICY)
|
|
$(PY) -m training.rl.train \
|
|
--bc $(BC_DIR) --out $(RL_DIR) \
|
|
--total-timesteps $(PPO_STEPS) --kl-coef $(KL)
|
|
|
|
eval: $(RL_POLICY)
|
|
$(PY) -m training.eval --policy $(RL_DIR) \
|
|
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS)
|
|
|
|
test:
|
|
$(PY) -m pytest tests/
|
|
|
|
webots:
|
|
tools/run_webots.sh $(N) $(MODE)
|
|
|
|
clean:
|
|
rm -rf $(BC_DEMOS) $(BC_DIR) $(RL_DIR)
|
|
|
|
help:
|
|
@echo "Targets:"
|
|
@echo " make full pipeline (bc_demos -> bc -> rl -> eval)"
|
|
@echo " make bc_demos sim demos via the '$(TEACHER)' teacher"
|
|
@echo " make bc train BC (rebuilds bc_demos if missing)"
|
|
@echo " make rl KL-PPO fine-tune (rebuilds bc if missing)"
|
|
@echo " make eval $(EVAL_SEEDS)-seed env eval of rl"
|
|
@echo " make test pytest suite"
|
|
@echo " make webots [N=$(N)] [MODE=$(MODE)]"
|
|
@echo " launch Webots in the chosen mode"
|
|
@echo " make clean delete bc_demos and run artefacts"
|
|
@echo ""
|
|
@echo "Hyperparameter overrides (showing defaults):"
|
|
@echo " TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK)"
|
|
@echo " BC_EPOCHS=$(BC_EPOCHS) BC_NET_ARCH=$(BC_NET_ARCH)"
|
|
@echo " PPO_STEPS=$(PPO_STEPS) KL=$(KL)"
|
|
@echo " EVAL_SEEDS=$(EVAL_SEEDS) EVAL_MAX_STEPS=$(EVAL_MAX_STEPS)"
|