Checkpoint 9

This commit is contained in:
Johnny Fernandes
2026-05-13 13:46:50 +01:00
parent be58ad2054
commit 683de740af
2 changed files with 24 additions and 9 deletions
+4 -2
View File
@@ -71,12 +71,14 @@ BC_NET_ARCH ?= 512,512
ifeq ($(WORLD),field_round)
PPO_STEPS ?= 4000000
KL ?= 0.02
TIME_W ?= 0.0
else
PPO_STEPS ?= 2000000
KL ?= 0.05
TIME_W ?= -0.05
endif
# Time penalty is 0 until success rate is high. Earlier runs showed
# TIME_W=-0.05 traded ~10 pts of success for speed on hard combos —
# learn to succeed first, optimize speed in a later pass.
TIME_W ?= 0.0
IMITATE ?= 0.0
# PPO rollouts at full difficulty so the training distribution matches
# eval (deployment). Anything lower causes a train/eval mismatch that