53 lines
2.0 KiB
YAML
53 lines
2.0 KiB
YAML
# PPO hyperparameters for the herding env. Tuned for a 28-D obs / 2-D
|
|
# continuous action space with 16 parallel envs on GPU. These are SB3
|
|
# defaults nudged toward longer credit assignment (gamma=0.995) and a
|
|
# slightly higher entropy bonus to keep exploration alive while curriculum
|
|
# expands the flock size.
|
|
|
|
# --- PPO ---
|
|
learning_rate: 3.0e-4
|
|
n_steps: 2048 # rollout length per env before each update
|
|
batch_size: 256
|
|
n_epochs: 10
|
|
gamma: 0.995
|
|
gae_lambda: 0.95
|
|
clip_range: 0.2
|
|
ent_coef: 0.05 # was 0.01 — earlier runs collapsed to ~0 actions
|
|
vf_coef: 0.5
|
|
max_grad_norm: 0.5
|
|
target_kl: null # disable early-stop on KL
|
|
|
|
# --- Network ---
|
|
policy: MlpPolicy
|
|
net_arch_pi: [128, 128]
|
|
net_arch_vf: [128, 128]
|
|
log_std_init: 0.5 # std≈1.6 instead of default 1.0 — more exploration
|
|
|
|
# --- Training schedule ---
|
|
total_timesteps: 10_000_000
|
|
n_envs: 16
|
|
checkpoint_freq: 500_000 # in env steps
|
|
eval_freq: 100_000 # in env steps
|
|
n_eval_episodes: 20
|
|
|
|
# --- Curriculum (max-n_sheep schedule, in env steps) ---
|
|
# Each entry: at step s, raise the env's max_n_sheep to k. The env samples
|
|
# uniformly from [1, max_n_sheep] each reset, so this widens the
|
|
# distribution gradually rather than swapping fixed sizes.
|
|
#
|
|
# State-space curriculum: difficulty controls sheep spawn area
|
|
# (0 = sheep spawn just north of gate, 1 = sheep spawn anywhere in field).
|
|
# Plus the existing flock-size curriculum.
|
|
#
|
|
# The two together let the policy first learn "what penning looks like"
|
|
# in a regime where random exploration reliably triggers it, then
|
|
# gradually generalise to the deployment distribution.
|
|
curriculum:
|
|
- { step: 0, max_n_sheep: 1, difficulty: 0.0 }
|
|
- { step: 1_000_000, max_n_sheep: 1, difficulty: 0.3 }
|
|
- { step: 2_000_000, max_n_sheep: 2, difficulty: 0.5 }
|
|
- { step: 4_000_000, max_n_sheep: 3, difficulty: 0.8 }
|
|
- { step: 6_000_000, max_n_sheep: 5, difficulty: 1.0 }
|
|
- { step: 8_000_000, max_n_sheep: 8, difficulty: 1.0 }
|
|
- { step: 9_000_000, max_n_sheep: 10, difficulty: 1.0 }
|