Sheep training flock _ improver
This commit is contained in:
@@ -110,7 +110,7 @@ def train_stage(n_sheep, steps, n_envs=4, prev_model=None, prev_vecnorm=None):
|
|||||||
model = PPO(
|
model = PPO(
|
||||||
"MlpPolicy", vn,
|
"MlpPolicy", vn,
|
||||||
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
|
learning_rate=3e-4, n_steps=2048, batch_size=256, n_epochs=10,
|
||||||
gamma=0.995, gae_lambda=0.95, clip_range=0.2, ent_coef=0.02,
|
gamma=0.995, gae_lambda=0.95, clip_range=0.2, ent_coef=0.005,
|
||||||
vf_coef=0.5, max_grad_norm=0.5,
|
vf_coef=0.5, max_grad_norm=0.5,
|
||||||
policy_kwargs=dict(net_arch=[256, 256]),
|
policy_kwargs=dict(net_arch=[256, 256]),
|
||||||
verbose=1,
|
verbose=1,
|
||||||
|
|||||||
+1
-1
@@ -332,7 +332,7 @@ def main():
|
|||||||
gamma = 0.995,
|
gamma = 0.995,
|
||||||
gae_lambda = 0.95,
|
gae_lambda = 0.95,
|
||||||
clip_range = 0.2,
|
clip_range = 0.2,
|
||||||
ent_coef = 0.02,
|
ent_coef = 0.005,
|
||||||
vf_coef = 0.5,
|
vf_coef = 0.5,
|
||||||
max_grad_norm = 0.5,
|
max_grad_norm = 0.5,
|
||||||
policy_kwargs = dict(net_arch=[256, 256]),
|
policy_kwargs = dict(net_arch=[256, 256]),
|
||||||
|
|||||||
Reference in New Issue
Block a user