diff --git a/training/smoke_test.py b/training/smoke_test.py index 71f36e1..0df69d2 100644 --- a/training/smoke_test.py +++ b/training/smoke_test.py @@ -110,7 +110,7 @@ def train_stage(n_sheep, steps, n_envs=4, prev_model=None, prev_vecnorm=None): model = PPO( "MlpPolicy", vn, learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10, - gamma=0.995, gae_lambda=0.95, clip_range=0.2, ent_coef=0.02, + gamma=0.995, gae_lambda=0.95, clip_range=0.2, ent_coef=0.005, vf_coef=0.5, max_grad_norm=0.5, policy_kwargs=dict(net_arch=[256, 256]), verbose=1, diff --git a/training/train.py b/training/train.py index adb577a..b43e88b 100644 --- a/training/train.py +++ b/training/train.py @@ -332,7 +332,7 @@ def main(): gamma = 0.995, gae_lambda = 0.95, clip_range = 0.2, - ent_coef = 0.02, + ent_coef = 0.005, vf_coef = 0.5, max_grad_norm = 0.5, policy_kwargs = dict(net_arch=[256, 256]),