Checkpoint 2
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
# PPO hyperparameters for the herding env. Tuned for a 28-D obs / 2-D
|
||||
# continuous action space with 16 parallel envs on GPU. These are SB3
|
||||
# defaults nudged toward longer credit assignment (gamma=0.995) and a
|
||||
# slightly higher entropy bonus to keep exploration alive while curriculum
|
||||
# expands the flock size.
|
||||
|
||||
# --- PPO ---
|
||||
learning_rate: 3.0e-4
|
||||
n_steps: 2048 # rollout length per env before each update
|
||||
batch_size: 256
|
||||
n_epochs: 10
|
||||
gamma: 0.995
|
||||
gae_lambda: 0.95
|
||||
clip_range: 0.2
|
||||
ent_coef: 0.05 # was 0.01 — earlier runs collapsed to ~0 actions
|
||||
vf_coef: 0.5
|
||||
max_grad_norm: 0.5
|
||||
target_kl: null # disable early-stop on KL
|
||||
|
||||
# --- Network ---
|
||||
policy: MlpPolicy
|
||||
net_arch_pi: [128, 128]
|
||||
net_arch_vf: [128, 128]
|
||||
log_std_init: 0.5 # std≈1.6 instead of default 1.0 — more exploration
|
||||
|
||||
# --- Training schedule ---
|
||||
total_timesteps: 10_000_000
|
||||
n_envs: 16
|
||||
checkpoint_freq: 500_000 # in env steps
|
||||
eval_freq: 100_000 # in env steps
|
||||
n_eval_episodes: 20
|
||||
|
||||
# --- Curriculum (max-n_sheep schedule, in env steps) ---
|
||||
# Each entry: at step s, raise the env's max_n_sheep to k. The env samples
|
||||
# uniformly from [1, max_n_sheep] each reset, so this widens the
|
||||
# distribution gradually rather than swapping fixed sizes.
|
||||
#
|
||||
# State-space curriculum: difficulty controls sheep spawn area
|
||||
# (0 = sheep spawn just north of gate, 1 = sheep spawn anywhere in field).
|
||||
# Plus the existing flock-size curriculum.
|
||||
#
|
||||
# The two together let the policy first learn "what penning looks like"
|
||||
# in a regime where random exploration reliably triggers it, then
|
||||
# gradually generalise to the deployment distribution.
|
||||
curriculum:
|
||||
- { step: 0, max_n_sheep: 1, difficulty: 0.0 }
|
||||
- { step: 1_000_000, max_n_sheep: 1, difficulty: 0.3 }
|
||||
- { step: 2_000_000, max_n_sheep: 2, difficulty: 0.5 }
|
||||
- { step: 4_000_000, max_n_sheep: 3, difficulty: 0.8 }
|
||||
- { step: 6_000_000, max_n_sheep: 5, difficulty: 1.0 }
|
||||
- { step: 8_000_000, max_n_sheep: 8, difficulty: 1.0 }
|
||||
- { step: 9_000_000, max_n_sheep: 10, difficulty: 1.0 }
|
||||
Reference in New Issue
Block a user