Checkpoint 2

This commit is contained in:
Johnny Fernandes
2026-05-07 22:00:10 +01:00
parent 90aa3bbcb4
commit 1bb9415414
37 changed files with 3068 additions and 2912 deletions
View File
+52
View File
@@ -0,0 +1,52 @@
# PPO hyperparameters for the herding env. Tuned for a 28-D obs / 2-D
# continuous action space with 16 parallel envs on GPU. These are SB3
# defaults nudged toward longer credit assignment (gamma=0.995) and a
# slightly higher entropy bonus to keep exploration alive while curriculum
# expands the flock size.
# --- PPO ---
learning_rate: 3.0e-4
n_steps: 2048 # rollout length per env before each update
batch_size: 256
n_epochs: 10
gamma: 0.995
gae_lambda: 0.95
clip_range: 0.2
ent_coef: 0.05 # was 0.01 — earlier runs collapsed to ~0 actions
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null # disable early-stop on KL
# --- Network ---
policy: MlpPolicy
net_arch_pi: [128, 128]
net_arch_vf: [128, 128]
log_std_init: 0.5 # std≈1.6 instead of default 1.0 — more exploration
# --- Training schedule ---
total_timesteps: 10_000_000
n_envs: 16
checkpoint_freq: 500_000 # in env steps
eval_freq: 100_000 # in env steps
n_eval_episodes: 20
# --- Curriculum (max-n_sheep schedule, in env steps) ---
# Each entry: at step s, raise the env's max_n_sheep to k. The env samples
# uniformly from [1, max_n_sheep] each reset, so this widens the
# distribution gradually rather than swapping fixed sizes.
#
# State-space curriculum: difficulty controls sheep spawn area
# (0 = sheep spawn just north of gate, 1 = sheep spawn anywhere in field).
# Plus the existing flock-size curriculum.
#
# The two together let the policy first learn "what penning looks like"
# in a regime where random exploration reliably triggers it, then
# gradually generalise to the deployment distribution.
curriculum:
- { step: 0, max_n_sheep: 1, difficulty: 0.0 }
- { step: 1_000_000, max_n_sheep: 1, difficulty: 0.3 }
- { step: 2_000_000, max_n_sheep: 2, difficulty: 0.5 }
- { step: 4_000_000, max_n_sheep: 3, difficulty: 0.8 }
- { step: 6_000_000, max_n_sheep: 5, difficulty: 1.0 }
- { step: 8_000_000, max_n_sheep: 8, difficulty: 1.0 }
- { step: 9_000_000, max_n_sheep: 10, difficulty: 1.0 }