Sheep training flock _ improver
This commit is contained in:
@@ -110,7 +110,7 @@ def train_stage(n_sheep, steps, n_envs=4, prev_model=None, prev_vecnorm=None):
|
||||
model = PPO(
|
||||
"MlpPolicy", vn,
|
||||
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
|
||||
gamma=0.995, gae_lambda=0.95, clip_range=0.2, ent_coef=0.02,
|
||||
gamma=0.995, gae_lambda=0.95, clip_range=0.2, ent_coef=0.005,
|
||||
vf_coef=0.5, max_grad_norm=0.5,
|
||||
policy_kwargs=dict(net_arch=[256, 256]),
|
||||
verbose=1,
|
||||
|
||||
+1
-1
@@ -332,7 +332,7 @@ def main():
|
||||
gamma = 0.995,
|
||||
gae_lambda = 0.95,
|
||||
clip_range = 0.2,
|
||||
ent_coef = 0.02,
|
||||
ent_coef = 0.005,
|
||||
vf_coef = 0.5,
|
||||
max_grad_norm = 0.5,
|
||||
policy_kwargs = dict(net_arch=[256, 256]),
|
||||
|
||||
Reference in New Issue
Block a user