# Training pipeline for the shepherd-dog herding project.
# Stages chain via output files in training/.
#
# Usage:
#   make            # full pipeline: bc_demos -> bc -> rl -> eval
#   make bc_demos   # generate sim demos
#   make bc         # behaviour clone (rebuilds bc_demos if missing)
#   make rl         # KL-PPO fine-tune (rebuilds bc if missing)
#   make eval       # 10-seed env eval of rl
#   make test       # pytest suite
#   make webots N=10 MODE=rl   # launch Webots in the chosen mode
#   make clean      # delete bc_demos and run artefacts
#   make help       # print the target table
#
# Override any hyperparameter on the command line, for example:
#   make rl PPO_STEPS=2000000 KL=0.02
#   make eval EVAL_SEEDS=20


PY               := python

BC_DEMOS         := training/bc/demos.npz
BC_DIR           := training/runs/bc
RL_DIR           := training/runs/rl
BC_POLICY        := $(BC_DIR)/policy.zip
RL_POLICY        := $(RL_DIR)/policy.zip

# --- Demo collection ---
TEACHER          ?= strombom
SEEDS_PER_N      ?= 15
SUBSAMPLE        ?= 3
FRAME_STACK      ?= 4

# --- Behaviour cloning ---
BC_EPOCHS        ?= 60
BC_NET_ARCH      ?= 512,512

# --- KL-PPO fine-tune ---
PPO_STEPS        ?= 1000000
KL               ?= 0.05

# --- Evaluation ---
EVAL_SEEDS       ?= 10
EVAL_MAX_STEPS   ?= 15000

# --- Webots launcher ---
N                ?= 10
MODE             ?= rl


.PHONY: all bc_demos bc rl eval test webots clean help

all: eval

bc_demos: $(BC_DEMOS)
$(BC_DEMOS):
	$(PY) -m training.bc.collect \
		--teacher $(TEACHER) --out $(BC_DEMOS) \
		--seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \
		--frame-stack $(FRAME_STACK)

bc: $(BC_POLICY)
$(BC_POLICY): $(BC_DEMOS)
	$(PY) -m training.bc.pretrain \
		--demos $(BC_DEMOS) --out $(BC_DIR) \
		--epochs $(BC_EPOCHS) --net-arch $(BC_NET_ARCH)

rl: $(RL_POLICY)
$(RL_POLICY): $(BC_POLICY)
	$(PY) -m training.rl.train \
		--bc $(BC_DIR) --out $(RL_DIR) \
		--total-timesteps $(PPO_STEPS) --kl-coef $(KL)

eval: $(RL_POLICY)
	$(PY) -m training.eval --policy $(RL_DIR) \
		--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS)

test:
	$(PY) -m pytest tests/

webots:
	tools/run_webots.sh $(N) $(MODE)

clean:
	rm -rf $(BC_DEMOS) $(BC_DIR) $(RL_DIR)

help:
	@echo "Targets:"
	@echo "  make              full pipeline (bc_demos -> bc -> rl -> eval)"
	@echo "  make bc_demos     sim demos via the '$(TEACHER)' teacher"
	@echo "  make bc           train BC (rebuilds bc_demos if missing)"
	@echo "  make rl           KL-PPO fine-tune (rebuilds bc if missing)"
	@echo "  make eval         $(EVAL_SEEDS)-seed env eval of rl"
	@echo "  make test         pytest suite"
	@echo "  make webots [N=$(N)] [MODE=$(MODE)]"
	@echo "                    launch Webots in the chosen mode"
	@echo "  make clean        delete bc_demos and run artefacts"
	@echo ""
	@echo "Hyperparameter overrides (showing defaults):"
	@echo "  TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK)"
	@echo "  BC_EPOCHS=$(BC_EPOCHS) BC_NET_ARCH=$(BC_NET_ARCH)"
	@echo "  PPO_STEPS=$(PPO_STEPS) KL=$(KL)"
	@echo "  EVAL_SEEDS=$(EVAL_SEEDS) EVAL_MAX_STEPS=$(EVAL_MAX_STEPS)"