diff --git a/training/runs/ppo_debug.log b/training/runs/ppo_debug.log new file mode 100644 index 0000000..81b308e --- /dev/null +++ b/training/runs/ppo_debug.log @@ -0,0 +1,5569 @@ +Using cpu device +Logging to runs/ppo_debug/ppo_1 +------------------------------ +| time/ | | +| fps | 5496 | +| iterations | 1 | +| time_elapsed | 2 | +| total_timesteps | 16384 | +------------------------------ +------------------------------------------ +| time/ | | +| fps | 4317 | +| iterations | 2 | +| time_elapsed | 7 | +| total_timesteps | 32768 | +| train/ | | +| approx_kl | 0.0036917897 | +| clip_fraction | 0.0212 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.352 | +| learning_rate | 0.0003 | +| loss | -0.0118 | +| n_updates | 10 | +| policy_gradient_loss | -0.000544 | +| std | 0.999 | +| value_loss | 0.0658 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 3946 | +| iterations | 3 | +| time_elapsed | 12 | +| total_timesteps | 49152 | +| train/ | | +| approx_kl | 0.0033213054 | +| clip_fraction | 0.0266 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.502 | +| learning_rate | 0.0003 | +| loss | -0.0255 | +| n_updates | 20 | +| policy_gradient_loss | -0.00158 | +| std | 0.997 | +| value_loss | 0.08 | +------------------------------------------ +/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper. + warnings.warn( +Eval num_timesteps=50000, episode_reward=-32.92 +/- 15.12 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -32.9 | +| time/ | | +| total_timesteps | 50000 | +| train/ | | +| approx_kl | 0.005147726 | +| clip_fraction | 0.0478 | +| clip_range | 0.2 | +| entropy_loss | -2.84 | +| explained_variance | 0.893 | +| learning_rate | 0.0003 | +| loss | -0.0145 | +| n_updates | 30 | +| policy_gradient_loss | -0.00318 | +| std | 1 | +| value_loss | 0.0194 | +----------------------------------------- +New best mean reward! +------------------------------ +| time/ | | +| fps | 2231 | +| iterations | 4 | +| time_elapsed | 29 | +| total_timesteps | 65536 | +------------------------------ +------------------------------------------ +| time/ | | +| fps | 2444 | +| iterations | 5 | +| time_elapsed | 33 | +| total_timesteps | 81920 | +| train/ | | +| approx_kl | 0.0054671075 | +| clip_fraction | 0.0529 | +| clip_range | 0.2 | +| entropy_loss | -2.84 | +| explained_variance | 0.914 | +| learning_rate | 0.0003 | +| loss | -0.021 | +| n_updates | 40 | +| policy_gradient_loss | -0.00416 | +| std | 1 | +| value_loss | 0.0247 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 2616 | +| iterations | 6 | +| time_elapsed | 37 | +| total_timesteps | 98304 | +| train/ | | +| approx_kl | 0.004603466 | +| clip_fraction | 0.0379 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.931 | +| learning_rate | 0.0003 | +| loss | -0.0193 | +| n_updates | 50 | +| policy_gradient_loss | -0.00284 | +| std | 0.995 | +| value_loss | 0.0171 | +----------------------------------------- +/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper. + warnings.warn( +Eval num_timesteps=100000, episode_reward=-27.45 +/- 49.10 +Episode length: 1973.15 +/- 86.14 +------------------------------------------ +| eval/ | | +| mean_ep_length | 1.97e+03 | +| mean_reward | -27.4 | +| time/ | | +| total_timesteps | 100000 | +| train/ | | +| approx_kl | 0.0053039393 | +| clip_fraction | 0.0564 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.878 | +| learning_rate | 0.0003 | +| loss | -0.0325 | +| n_updates | 60 | +| policy_gradient_loss | -0.00404 | +| std | 0.998 | +| value_loss | 0.0118 | +------------------------------------------ +New best mean reward! +------------------------------- +| time/ | | +| fps | 2212 | +| iterations | 7 | +| time_elapsed | 51 | +| total_timesteps | 114688 | +------------------------------- +------------------------------------------ +| time/ | | +| fps | 2332 | +| iterations | 8 | +| time_elapsed | 56 | +| total_timesteps | 131072 | +| train/ | | +| approx_kl | 0.0048020086 | +| clip_fraction | 0.0449 | +| clip_range | 0.2 | +| entropy_loss | -2.84 | +| explained_variance | 0.839 | +| learning_rate | 0.0003 | +| loss | -0.0375 | +| n_updates | 70 | +| policy_gradient_loss | -0.00359 | +| std | 1 | +| value_loss | 0.0102 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 2429 | +| iterations | 9 | +| time_elapsed | 60 | +| total_timesteps | 147456 | +| train/ | | +| approx_kl | 0.004460754 | +| clip_fraction | 0.0349 | +| clip_range | 0.2 | +| entropy_loss | -2.85 | +| explained_variance | 0.874 | +| learning_rate | 0.0003 | +| loss | -0.0293 | +| n_updates | 80 | +| policy_gradient_loss | -0.00294 | +| std | 1.01 | +| value_loss | 0.0132 | +----------------------------------------- +Eval num_timesteps=150000, episode_reward=-33.46 +/- 39.53 +Episode length: 1990.60 +/- 40.97 +----------------------------------------- +| eval/ | | +| mean_ep_length | 1.99e+03 | +| mean_reward | -33.5 | +| time/ | | +| total_timesteps | 150000 | +| train/ | | +| approx_kl | 0.003831089 | +| clip_fraction | 0.0196 | +| clip_range | 0.2 | +| entropy_loss | -2.82 | +| explained_variance | 0.381 | +| learning_rate | 0.0003 | +| loss | -0.0191 | +| n_updates | 90 | +| policy_gradient_loss | -0.00202 | +| std | 0.984 | +| value_loss | 0.104 | +----------------------------------------- +------------------------------- +| time/ | | +| fps | 2147 | +| iterations | 10 | +| time_elapsed | 76 | +| total_timesteps | 163840 | +------------------------------- +------------------------------------------ +| time/ | | +| fps | 2218 | +| iterations | 11 | +| time_elapsed | 81 | +| total_timesteps | 180224 | +| train/ | | +| approx_kl | 0.0032510734 | +| clip_fraction | 0.0246 | +| clip_range | 0.2 | +| entropy_loss | -2.82 | +| explained_variance | 0.887 | +| learning_rate | 0.0003 | +| loss | -0.0279 | +| n_updates | 100 | +| policy_gradient_loss | -0.00207 | +| std | 0.993 | +| value_loss | 0.045 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 2289 | +| iterations | 12 | +| time_elapsed | 85 | +| total_timesteps | 196608 | +| train/ | | +| approx_kl | 0.0047060847 | +| clip_fraction | 0.0387 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.896 | +| learning_rate | 0.0003 | +| loss | 0.00931 | +| n_updates | 110 | +| policy_gradient_loss | -0.00305 | +| std | 0.994 | +| value_loss | 0.0489 | +------------------------------------------ +Eval num_timesteps=200000, episode_reward=-18.47 +/- 55.53 +Episode length: 1938.95 +/- 147.97 +------------------------------------------ +| eval/ | | +| mean_ep_length | 1.94e+03 | +| mean_reward | -18.5 | +| time/ | | +| total_timesteps | 200000 | +| train/ | | +| approx_kl | 0.0047602034 | +| clip_fraction | 0.0421 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.968 | +| learning_rate | 0.0003 | +| loss | -0.0301 | +| n_updates | 120 | +| policy_gradient_loss | -0.00281 | +| std | 1.01 | +| value_loss | 0.0094 | +------------------------------------------ +New best mean reward! + +[Diag @ 200,000 | n_sheep=1 | success=5%] + COMPACT_CANT_DRIVE 18/20 + DROVE_NO_SHEEP 1/20 + SUCCESS 1/20 + action_mag mean=0.269 p10=0.129 p90=0.447 (0=stopped, 1=full speed) + min_flock_radius mean=0.00m best=0.00m (target <5m to compact) + min_dog_to_com mean=3.86m best=1.91m (FLEE_DIST=7m) + min_com_to_pen mean=11.22m best=2.44m + reward/step (mean): progress=-0.0022 alignment=+0.0006 pen_bonus=+0.0003 step_cost=-0.0200 complete=+0.0026 +------------------------------- +| time/ | | +| fps | 1964 | +| iterations | 13 | +| time_elapsed | 108 | +| total_timesteps | 212992 | +------------------------------- +------------------------------------------ +| time/ | | +| fps | 2034 | +| iterations | 14 | +| time_elapsed | 112 | +| total_timesteps | 229376 | +| train/ | | +| approx_kl | 0.0041663316 | +| clip_fraction | 0.0373 | +| clip_range | 0.2 | +| entropy_loss | -2.88 | +| explained_variance | 0.901 | +| learning_rate | 0.0003 | +| loss | -0.0251 | +| n_updates | 130 | +| policy_gradient_loss | -0.00223 | +| std | 1.03 | +| value_loss | 0.00752 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 2102 | +| iterations | 15 | +| time_elapsed | 116 | +| total_timesteps | 245760 | +| train/ | | +| approx_kl | 0.0042076977 | +| clip_fraction | 0.032 | +| clip_range | 0.2 | +| entropy_loss | -2.91 | +| explained_variance | 0.939 | +| learning_rate | 0.0003 | +| loss | -0.0333 | +| n_updates | 140 | +| policy_gradient_loss | -0.00281 | +| std | 1.04 | +| value_loss | 0.00934 | +------------------------------------------ +Eval num_timesteps=250000, episode_reward=-37.07 +/- 35.02 +Episode length: 1938.20 +/- 269.38 +------------------------------------------ +| eval/ | | +| mean_ep_length | 1.94e+03 | +| mean_reward | -37.1 | +| time/ | | +| total_timesteps | 250000 | +| train/ | | +| approx_kl | 0.0028561926 | +| clip_fraction | 0.0171 | +| clip_range | 0.2 | +| entropy_loss | -2.92 | +| explained_variance | 0.822 | +| learning_rate | 0.0003 | +| loss | -0.0292 | +| n_updates | 150 | +| policy_gradient_loss | -0.00113 | +| std | 1.04 | +| value_loss | 0.0473 | +------------------------------------------ +------------------------------- +| time/ | | +| fps | 1990 | +| iterations | 16 | +| time_elapsed | 131 | +| total_timesteps | 262144 | +------------------------------- +------------------------------------------ +| time/ | | +| fps | 2042 | +| iterations | 17 | +| time_elapsed | 136 | +| total_timesteps | 278528 | +| train/ | | +| approx_kl | 0.0054259067 | +| clip_fraction | 0.0468 | +| clip_range | 0.2 | +| entropy_loss | -2.91 | +| explained_variance | 0.891 | +| learning_rate | 0.0003 | +| loss | -0.032 | +| n_updates | 160 | +| policy_gradient_loss | -0.00597 | +| std | 1.03 | +| value_loss | 0.0128 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 2085 | +| iterations | 18 | +| time_elapsed | 141 | +| total_timesteps | 294912 | +| train/ | | +| approx_kl | 0.004205579 | +| clip_fraction | 0.0291 | +| clip_range | 0.2 | +| entropy_loss | -2.91 | +| explained_variance | 0.834 | +| learning_rate | 0.0003 | +| loss | -0.0364 | +| n_updates | 170 | +| policy_gradient_loss | -0.00307 | +| std | 1.03 | +| value_loss | 0.0107 | +----------------------------------------- +Eval num_timesteps=300000, episode_reward=-25.41 +/- 48.70 +Episode length: 1886.45 +/- 435.99 +------------------------------------------ +| eval/ | | +| mean_ep_length | 1.89e+03 | +| mean_reward | -25.4 | +| time/ | | +| total_timesteps | 300000 | +| train/ | | +| approx_kl | 0.0045948992 | +| clip_fraction | 0.0354 | +| clip_range | 0.2 | +| entropy_loss | -2.9 | +| explained_variance | 0.806 | +| learning_rate | 0.0003 | +| loss | -0.0242 | +| n_updates | 180 | +| policy_gradient_loss | -0.00236 | +| std | 1.03 | +| value_loss | 0.0371 | +------------------------------------------ +------------------------------- +| time/ | | +| fps | 1981 | +| iterations | 19 | +| time_elapsed | 157 | +| total_timesteps | 311296 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 2024 | +| iterations | 20 | +| time_elapsed | 161 | +| total_timesteps | 327680 | +| train/ | | +| approx_kl | 0.005344864 | +| clip_fraction | 0.0442 | +| clip_range | 0.2 | +| entropy_loss | -2.91 | +| explained_variance | 0.877 | +| learning_rate | 0.0003 | +| loss | -0.0369 | +| n_updates | 190 | +| policy_gradient_loss | -0.00344 | +| std | 1.04 | +| value_loss | 0.0104 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2066 | +| iterations | 21 | +| time_elapsed | 166 | +| total_timesteps | 344064 | +| train/ | | +| approx_kl | 0.007574372 | +| clip_fraction | 0.0753 | +| clip_range | 0.2 | +| entropy_loss | -2.92 | +| explained_variance | 0.903 | +| learning_rate | 0.0003 | +| loss | -0.0272 | +| n_updates | 200 | +| policy_gradient_loss | -0.00726 | +| std | 1.04 | +| value_loss | 0.0113 | +----------------------------------------- +Eval num_timesteps=350000, episode_reward=-21.14 +/- 37.01 +Episode length: 1959.80 +/- 175.23 +------------------------------------------ +| eval/ | | +| mean_ep_length | 1.96e+03 | +| mean_reward | -21.1 | +| time/ | | +| total_timesteps | 350000 | +| train/ | | +| approx_kl | 0.0061714016 | +| clip_fraction | 0.0569 | +| clip_range | 0.2 | +| entropy_loss | -2.91 | +| explained_variance | 0.917 | +| learning_rate | 0.0003 | +| loss | -0.022 | +| n_updates | 210 | +| policy_gradient_loss | -0.00598 | +| std | 1.04 | +| value_loss | 0.0231 | +------------------------------------------ +------------------------------- +| time/ | | +| fps | 1984 | +| iterations | 22 | +| time_elapsed | 181 | +| total_timesteps | 360448 | +------------------------------- +---------------------------------------- +| time/ | | +| fps | 2026 | +| iterations | 23 | +| time_elapsed | 185 | +| total_timesteps | 376832 | +| train/ | | +| approx_kl | 0.00587913 | +| clip_fraction | 0.0501 | +| clip_range | 0.2 | +| entropy_loss | -2.92 | +| explained_variance | 0.932 | +| learning_rate | 0.0003 | +| loss | -0.0415 | +| n_updates | 220 | +| policy_gradient_loss | -0.00484 | +| std | 1.04 | +| value_loss | 0.0242 | +---------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2064 | +| iterations | 24 | +| time_elapsed | 190 | +| total_timesteps | 393216 | +| train/ | | +| approx_kl | 0.006933649 | +| clip_fraction | 0.081 | +| clip_range | 0.2 | +| entropy_loss | -2.91 | +| explained_variance | 0.918 | +| learning_rate | 0.0003 | +| loss | -0.032 | +| n_updates | 230 | +| policy_gradient_loss | -0.00773 | +| std | 1.03 | +| value_loss | 0.0233 | +----------------------------------------- +Eval num_timesteps=400000, episode_reward=-2.75 +/- 37.08 +Episode length: 1998.55 +/- 6.32 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -2.75 | +| time/ | | +| total_timesteps | 400000 | +| train/ | | +| approx_kl | 0.0064436095 | +| clip_fraction | 0.0647 | +| clip_range | 0.2 | +| entropy_loss | -2.9 | +| explained_variance | 0.853 | +| learning_rate | 0.0003 | +| loss | 0.0633 | +| n_updates | 240 | +| policy_gradient_loss | -0.00551 | +| std | 1.03 | +| value_loss | 0.128 | +------------------------------------------ +New best mean reward! + +[Diag @ 400,000 | n_sheep=1 | success=0%] + DROVE_NO_SHEEP 13/20 + COMPACT_CANT_DRIVE 7/20 + action_mag mean=0.316 p10=0.057 p90=0.512 (0=stopped, 1=full speed) + min_flock_radius mean=0.00m best=0.00m (target <5m to compact) + min_dog_to_com mean=1.86m best=0.95m (FLEE_DIST=7m) + min_com_to_pen mean=3.19m best=1.50m + reward/step (mean): progress=+0.0093 alignment=+0.0040 pen_bonus=+0.0000 step_cost=-0.0200 complete=+0.0000 +------------------------------- +| time/ | | +| fps | 1925 | +| iterations | 25 | +| time_elapsed | 212 | +| total_timesteps | 409600 | +------------------------------- +---------------------------------------- +| time/ | | +| fps | 1961 | +| iterations | 26 | +| time_elapsed | 217 | +| total_timesteps | 425984 | +| train/ | | +| approx_kl | 0.00806847 | +| clip_fraction | 0.1 | +| clip_range | 0.2 | +| entropy_loss | -2.88 | +| explained_variance | 0.933 | +| learning_rate | 0.0003 | +| loss | -0.0254 | +| n_updates | 250 | +| policy_gradient_loss | -0.00871 | +| std | 1.02 | +| value_loss | 0.0264 | +---------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1997 | +| iterations | 27 | +| time_elapsed | 221 | +| total_timesteps | 442368 | +| train/ | | +| approx_kl | 0.005784355 | +| clip_fraction | 0.0531 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.878 | +| learning_rate | 0.0003 | +| loss | 0.00996 | +| n_updates | 260 | +| policy_gradient_loss | -0.00485 | +| std | 1 | +| value_loss | 0.0868 | +----------------------------------------- +Eval num_timesteps=450000, episode_reward=51.79 +/- 20.61 +Episode length: 1912.30 +/- 382.28 +----------------------------------------- +| eval/ | | +| mean_ep_length | 1.91e+03 | +| mean_reward | 51.8 | +| time/ | | +| total_timesteps | 450000 | +| train/ | | +| approx_kl | 0.005881632 | +| clip_fraction | 0.0639 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.952 | +| learning_rate | 0.0003 | +| loss | -0.0187 | +| n_updates | 270 | +| policy_gradient_loss | -0.00655 | +| std | 0.991 | +| value_loss | 0.0226 | +----------------------------------------- +New best mean reward! +------------------------------- +| time/ | | +| fps | 1936 | +| iterations | 28 | +| time_elapsed | 236 | +| total_timesteps | 458752 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 1965 | +| iterations | 29 | +| time_elapsed | 241 | +| total_timesteps | 475136 | +| train/ | | +| approx_kl | 0.009020726 | +| clip_fraction | 0.0982 | +| clip_range | 0.2 | +| entropy_loss | -2.81 | +| explained_variance | 0.87 | +| learning_rate | 0.0003 | +| loss | 0.0218 | +| n_updates | 280 | +| policy_gradient_loss | -0.0061 | +| std | 0.984 | +| value_loss | 0.209 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1999 | +| iterations | 30 | +| time_elapsed | 245 | +| total_timesteps | 491520 | +| train/ | | +| approx_kl | 0.011525536 | +| clip_fraction | 0.136 | +| clip_range | 0.2 | +| entropy_loss | -2.79 | +| explained_variance | 0.92 | +| learning_rate | 0.0003 | +| loss | 0.0306 | +| n_updates | 290 | +| policy_gradient_loss | -0.00896 | +| std | 0.97 | +| value_loss | 0.0903 | +----------------------------------------- +Eval num_timesteps=500000, episode_reward=87.01 +/- 42.12 +Episode length: 1359.85 +/- 815.95 +----------------------------------------- +| eval/ | | +| mean_ep_length | 1.36e+03 | +| mean_reward | 87 | +| time/ | | +| total_timesteps | 500000 | +| train/ | | +| approx_kl | 0.012545023 | +| clip_fraction | 0.171 | +| clip_range | 0.2 | +| entropy_loss | -2.78 | +| explained_variance | 0.956 | +| learning_rate | 0.0003 | +| loss | -0.0369 | +| n_updates | 300 | +| policy_gradient_loss | -0.0069 | +| std | 0.972 | +| value_loss | 0.034 | +----------------------------------------- +New best mean reward! +------------------------------- +| time/ | | +| fps | 1968 | +| iterations | 31 | +| time_elapsed | 258 | +| total_timesteps | 507904 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 1996 | +| iterations | 32 | +| time_elapsed | 262 | +| total_timesteps | 524288 | +| train/ | | +| approx_kl | 0.008305798 | +| clip_fraction | 0.102 | +| clip_range | 0.2 | +| entropy_loss | -2.78 | +| explained_variance | 0.975 | +| learning_rate | 0.0003 | +| loss | -0.0285 | +| n_updates | 310 | +| policy_gradient_loss | -0.00343 | +| std | 0.972 | +| value_loss | 0.0162 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 2021 | +| iterations | 33 | +| time_elapsed | 267 | +| total_timesteps | 540672 | +| train/ | | +| approx_kl | 0.0074599315 | +| clip_fraction | 0.0925 | +| clip_range | 0.2 | +| entropy_loss | -2.81 | +| explained_variance | 0.976 | +| learning_rate | 0.0003 | +| loss | -0.0282 | +| n_updates | 320 | +| policy_gradient_loss | -0.0028 | +| std | 0.989 | +| value_loss | 0.0136 | +------------------------------------------ +Eval num_timesteps=550000, episode_reward=113.42 +/- 48.33 +Episode length: 926.05 +/- 792.99 +----------------------------------------- +| eval/ | | +| mean_ep_length | 926 | +| mean_reward | 113 | +| time/ | | +| total_timesteps | 550000 | +| train/ | | +| approx_kl | 0.010888291 | +| clip_fraction | 0.136 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.981 | +| learning_rate | 0.0003 | +| loss | -0.0226 | +| n_updates | 330 | +| policy_gradient_loss | -0.00266 | +| std | 1 | +| value_loss | 0.00643 | +----------------------------------------- +New best mean reward! +------------------------------- +| time/ | | +| fps | 2005 | +| iterations | 34 | +| time_elapsed | 277 | +| total_timesteps | 557056 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 2030 | +| iterations | 35 | +| time_elapsed | 282 | +| total_timesteps | 573440 | +| train/ | | +| approx_kl | 0.009418717 | +| clip_fraction | 0.121 | +| clip_range | 0.2 | +| entropy_loss | -2.84 | +| explained_variance | 0.975 | +| learning_rate | 0.0003 | +| loss | -0.0234 | +| n_updates | 340 | +| policy_gradient_loss | -0.00417 | +| std | 1 | +| value_loss | 0.0219 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2054 | +| iterations | 36 | +| time_elapsed | 287 | +| total_timesteps | 589824 | +| train/ | | +| approx_kl | 0.009153167 | +| clip_fraction | 0.132 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.972 | +| learning_rate | 0.0003 | +| loss | 0.00458 | +| n_updates | 350 | +| policy_gradient_loss | -0.00925 | +| std | 1.01 | +| value_loss | 0.0644 | +----------------------------------------- +Eval num_timesteps=600000, episode_reward=142.43 +/- 15.10 +Episode length: 292.00 +/- 114.85 +------------------------------------------ +| eval/ | | +| mean_ep_length | 292 | +| mean_reward | 142 | +| time/ | | +| total_timesteps | 600000 | +| train/ | | +| approx_kl | 0.0073751104 | +| clip_fraction | 0.0817 | +| clip_range | 0.2 | +| entropy_loss | -2.85 | +| explained_variance | 0.967 | +| learning_rate | 0.0003 | +| loss | 0.0205 | +| n_updates | 360 | +| policy_gradient_loss | -0.0078 | +| std | 1.01 | +| value_loss | 0.0854 | +------------------------------------------ +New best mean reward! + +[Diag @ 600,000 | n_sheep=1 | success=100%] + SUCCESS 20/20 + action_mag mean=0.339 p10=0.246 p90=0.609 (0=stopped, 1=full speed) + min_flock_radius mean=0.00m best=0.00m (target <5m to compact) + min_dog_to_com mean=1.68m best=0.23m (FLEE_DIST=7m) + min_com_to_pen mean=3.54m best=2.70m + reward/step (mean): progress=+0.0996 alignment=+0.0271 pen_bonus=+0.0302 step_cost=-0.0200 complete=+0.3022 +------------------------------- +| time/ | | +| fps | 2059 | +| iterations | 37 | +| time_elapsed | 294 | +| total_timesteps | 606208 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 2069 | +| iterations | 38 | +| time_elapsed | 300 | +| total_timesteps | 622592 | +| train/ | | +| approx_kl | 0.006348365 | +| clip_fraction | 0.0685 | +| clip_range | 0.2 | +| entropy_loss | -2.85 | +| explained_variance | 0.954 | +| learning_rate | 0.0003 | +| loss | -0.0107 | +| n_updates | 370 | +| policy_gradient_loss | -0.00403 | +| std | 1 | +| value_loss | 0.0629 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 2085 | +| iterations | 39 | +| time_elapsed | 306 | +| total_timesteps | 638976 | +| train/ | | +| approx_kl | 0.0073653567 | +| clip_fraction | 0.089 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.976 | +| learning_rate | 0.0003 | +| loss | -0.0379 | +| n_updates | 380 | +| policy_gradient_loss | -0.00635 | +| std | 0.993 | +| value_loss | 0.0213 | +------------------------------------------ +Eval num_timesteps=650000, episode_reward=148.63 +/- 11.08 +Episode length: 312.15 +/- 83.52 +------------------------------------------ +| eval/ | | +| mean_ep_length | 312 | +| mean_reward | 149 | +| time/ | | +| total_timesteps | 650000 | +| train/ | | +| approx_kl | 0.0064217458 | +| clip_fraction | 0.0662 | +| clip_range | 0.2 | +| entropy_loss | -2.81 | +| explained_variance | 0.977 | +| learning_rate | 0.0003 | +| loss | -0.0177 | +| n_updates | 390 | +| policy_gradient_loss | -0.00451 | +| std | 0.983 | +| value_loss | 0.0325 | +------------------------------------------ +New best mean reward! +------------------------------- +| time/ | | +| fps | 2092 | +| iterations | 40 | +| time_elapsed | 313 | +| total_timesteps | 655360 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 2107 | +| iterations | 41 | +| time_elapsed | 318 | +| total_timesteps | 671744 | +| train/ | | +| approx_kl | 0.007330196 | +| clip_fraction | 0.0823 | +| clip_range | 0.2 | +| entropy_loss | -2.79 | +| explained_variance | 0.985 | +| learning_rate | 0.0003 | +| loss | -0.0257 | +| n_updates | 400 | +| policy_gradient_loss | -0.00559 | +| std | 0.971 | +| value_loss | 0.0108 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 2123 | +| iterations | 42 | +| time_elapsed | 323 | +| total_timesteps | 688128 | +| train/ | | +| approx_kl | 0.0076610697 | +| clip_fraction | 0.0876 | +| clip_range | 0.2 | +| entropy_loss | -2.77 | +| explained_variance | 0.99 | +| learning_rate | 0.0003 | +| loss | -0.037 | +| n_updates | 410 | +| policy_gradient_loss | -0.00581 | +| std | 0.966 | +| value_loss | 0.00623 | +------------------------------------------ +Eval num_timesteps=700000, episode_reward=137.38 +/- 18.54 +Episode length: 255.10 +/- 119.47 +------------------------------------------ +| eval/ | | +| mean_ep_length | 255 | +| mean_reward | 137 | +| time/ | | +| total_timesteps | 700000 | +| train/ | | +| approx_kl | 0.0072219693 | +| clip_fraction | 0.0734 | +| clip_range | 0.2 | +| entropy_loss | -2.76 | +| explained_variance | 0.989 | +| learning_rate | 0.0003 | +| loss | -0.0383 | +| n_updates | 420 | +| policy_gradient_loss | -0.00416 | +| std | 0.961 | +| value_loss | 0.00951 | +------------------------------------------ +------------------------------- +| time/ | | +| fps | 2128 | +| iterations | 43 | +| time_elapsed | 331 | +| total_timesteps | 704512 | +------------------------------- +------------------------------------------ +| time/ | | +| fps | 2144 | +| iterations | 44 | +| time_elapsed | 336 | +| total_timesteps | 720896 | +| train/ | | +| approx_kl | 0.0075956425 | +| clip_fraction | 0.0895 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.993 | +| learning_rate | 0.0003 | +| loss | -0.0433 | +| n_updates | 430 | +| policy_gradient_loss | -0.00475 | +| std | 0.953 | +| value_loss | 0.00343 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 2160 | +| iterations | 45 | +| time_elapsed | 341 | +| total_timesteps | 737280 | +| train/ | | +| approx_kl | 0.0062526334 | +| clip_fraction | 0.0699 | +| clip_range | 0.2 | +| entropy_loss | -2.72 | +| explained_variance | 0.99 | +| learning_rate | 0.0003 | +| loss | -0.0329 | +| n_updates | 440 | +| policy_gradient_loss | -0.00355 | +| std | 0.942 | +| value_loss | 0.0113 | +------------------------------------------ +Eval num_timesteps=750000, episode_reward=145.04 +/- 16.56 +Episode length: 291.10 +/- 132.25 +------------------------------------------ +| eval/ | | +| mean_ep_length | 291 | +| mean_reward | 145 | +| time/ | | +| total_timesteps | 750000 | +| train/ | | +| approx_kl | 0.0058749127 | +| clip_fraction | 0.0607 | +| clip_range | 0.2 | +| entropy_loss | -2.71 | +| explained_variance | 0.993 | +| learning_rate | 0.0003 | +| loss | -0.0281 | +| n_updates | 450 | +| policy_gradient_loss | -0.00324 | +| std | 0.934 | +| value_loss | 0.00811 | +------------------------------------------ +------------------------------- +| time/ | | +| fps | 2161 | +| iterations | 46 | +| time_elapsed | 348 | +| total_timesteps | 753664 | +------------------------------- +------------------------------------------ +| time/ | | +| fps | 2176 | +| iterations | 47 | +| time_elapsed | 353 | +| total_timesteps | 770048 | +| train/ | | +| approx_kl | 0.0070656985 | +| clip_fraction | 0.0763 | +| clip_range | 0.2 | +| entropy_loss | -2.68 | +| explained_variance | 0.996 | +| learning_rate | 0.0003 | +| loss | -0.0322 | +| n_updates | 460 | +| policy_gradient_loss | -0.00485 | +| std | 0.92 | +| value_loss | 0.00234 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 2193 | +| iterations | 48 | +| time_elapsed | 358 | +| total_timesteps | 786432 | +| train/ | | +| approx_kl | 0.008987564 | +| clip_fraction | 0.112 | +| clip_range | 0.2 | +| entropy_loss | -2.66 | +| explained_variance | 0.997 | +| learning_rate | 0.0003 | +| loss | -0.0471 | +| n_updates | 470 | +| policy_gradient_loss | -0.00864 | +| std | 0.909 | +| value_loss | 0.00178 | +----------------------------------------- +Eval num_timesteps=800000, episode_reward=141.03 +/- 13.75 +Episode length: 256.90 +/- 100.39 +----------------------------------------- +| eval/ | | +| mean_ep_length | 257 | +| mean_reward | 141 | +| time/ | | +| total_timesteps | 800000 | +| train/ | | +| approx_kl | 0.008297143 | +| clip_fraction | 0.0945 | +| clip_range | 0.2 | +| entropy_loss | -2.67 | +| explained_variance | 0.989 | +| learning_rate | 0.0003 | +| loss | -0.0173 | +| n_updates | 480 | +| policy_gradient_loss | -0.00352 | +| std | 0.921 | +| value_loss | 0.00934 | +----------------------------------------- + +[Diag @ 800,000 | n_sheep=1 | success=100%] + SUCCESS 20/20 + action_mag mean=0.333 p10=0.244 p90=0.332 (0=stopped, 1=full speed) + min_flock_radius mean=0.00m best=0.00m (target <5m to compact) + min_dog_to_com mean=1.40m best=0.75m (FLEE_DIST=7m) + min_com_to_pen mean=3.47m best=1.58m + reward/step (mean): progress=+0.1108 alignment=+0.0328 pen_bonus=+0.0366 step_cost=-0.0200 complete=+0.3664 + +[Curriculum] leaving stage n_sheep=1 after 800,000 steps | training success rate (last 100 eps) = 100% +[Curriculum] → 2 sheep at step 800,000 + +------------------------------- +| time/ | | +| fps | 2187 | +| iterations | 49 | +| time_elapsed | 367 | +| total_timesteps | 802816 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 2201 | +| iterations | 50 | +| time_elapsed | 372 | +| total_timesteps | 819200 | +| train/ | | +| approx_kl | 0.006534174 | +| clip_fraction | 0.0754 | +| clip_range | 0.2 | +| entropy_loss | -2.7 | +| explained_variance | 0.968 | +| learning_rate | 0.0003 | +| loss | -0.0252 | +| n_updates | 490 | +| policy_gradient_loss | 0.00248 | +| std | 0.942 | +| value_loss | 0.021 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2213 | +| iterations | 51 | +| time_elapsed | 377 | +| total_timesteps | 835584 | +| train/ | | +| approx_kl | 0.012509884 | +| clip_fraction | 0.182 | +| clip_range | 0.2 | +| entropy_loss | -2.73 | +| explained_variance | 0.51 | +| learning_rate | 0.0003 | +| loss | -0.0127 | +| n_updates | 500 | +| policy_gradient_loss | 0.00321 | +| std | 0.953 | +| value_loss | 0.0093 | +----------------------------------------- +Eval num_timesteps=850000, episode_reward=-30.43 +/- 29.94 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -30.4 | +| time/ | | +| total_timesteps | 850000 | +| train/ | | +| approx_kl | 0.009752454 | +| clip_fraction | 0.146 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.865 | +| learning_rate | 0.0003 | +| loss | -0.0289 | +| n_updates | 510 | +| policy_gradient_loss | 0.00274 | +| std | 0.95 | +| value_loss | 0.0117 | +----------------------------------------- +------------------------------- +| time/ | | +| fps | 2153 | +| iterations | 52 | +| time_elapsed | 395 | +| total_timesteps | 851968 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 2166 | +| iterations | 53 | +| time_elapsed | 400 | +| total_timesteps | 868352 | +| train/ | | +| approx_kl | 0.011746319 | +| clip_fraction | 0.133 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.953 | +| learning_rate | 0.0003 | +| loss | -0.0316 | +| n_updates | 520 | +| policy_gradient_loss | 0.00116 | +| std | 0.958 | +| value_loss | 0.00603 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2179 | +| iterations | 54 | +| time_elapsed | 405 | +| total_timesteps | 884736 | +| train/ | | +| approx_kl | 0.008340008 | +| clip_fraction | 0.111 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.959 | +| learning_rate | 0.0003 | +| loss | -0.0317 | +| n_updates | 530 | +| policy_gradient_loss | 0.000628 | +| std | 0.955 | +| value_loss | 0.00663 | +----------------------------------------- +Eval num_timesteps=900000, episode_reward=-21.80 +/- 34.98 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -21.8 | +| time/ | | +| total_timesteps | 900000 | +| train/ | | +| approx_kl | 0.010461532 | +| clip_fraction | 0.13 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.88 | +| learning_rate | 0.0003 | +| loss | -0.00905 | +| n_updates | 540 | +| policy_gradient_loss | -0.000256 | +| std | 0.951 | +| value_loss | 0.00567 | +----------------------------------------- +------------------------------- +| time/ | | +| fps | 2128 | +| iterations | 55 | +| time_elapsed | 423 | +| total_timesteps | 901120 | +------------------------------- +------------------------------------------ +| time/ | | +| fps | 2139 | +| iterations | 56 | +| time_elapsed | 428 | +| total_timesteps | 917504 | +| train/ | | +| approx_kl | 0.0071650296 | +| clip_fraction | 0.0988 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.931 | +| learning_rate | 0.0003 | +| loss | -0.0294 | +| n_updates | 550 | +| policy_gradient_loss | -0.000672 | +| std | 0.957 | +| value_loss | 0.00545 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 2152 | +| iterations | 57 | +| time_elapsed | 433 | +| total_timesteps | 933888 | +| train/ | | +| approx_kl | 0.009678386 | +| clip_fraction | 0.112 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.927 | +| learning_rate | 0.0003 | +| loss | -0.0308 | +| n_updates | 560 | +| policy_gradient_loss | -0.000959 | +| std | 0.953 | +| value_loss | 0.00409 | +----------------------------------------- +Eval num_timesteps=950000, episode_reward=-34.37 +/- 35.50 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -34.4 | +| time/ | | +| total_timesteps | 950000 | +| train/ | | +| approx_kl | 0.008903094 | +| clip_fraction | 0.111 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.939 | +| learning_rate | 0.0003 | +| loss | -0.0259 | +| n_updates | 570 | +| policy_gradient_loss | -0.000299 | +| std | 0.955 | +| value_loss | 0.00432 | +----------------------------------------- +------------------------------- +| time/ | | +| fps | 2108 | +| iterations | 58 | +| time_elapsed | 450 | +| total_timesteps | 950272 | +------------------------------- +----------------------------------------- +| time/ | | +| fps | 2117 | +| iterations | 59 | +| time_elapsed | 456 | +| total_timesteps | 966656 | +| train/ | | +| approx_kl | 0.008592881 | +| clip_fraction | 0.0954 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.929 | +| learning_rate | 0.0003 | +| loss | -0.0173 | +| n_updates | 580 | +| policy_gradient_loss | 0.00103 | +| std | 0.95 | +| value_loss | 0.00265 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2129 | +| iterations | 60 | +| time_elapsed | 461 | +| total_timesteps | 983040 | +| train/ | | +| approx_kl | 0.010225108 | +| clip_fraction | 0.108 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.972 | +| learning_rate | 0.0003 | +| loss | -0.0135 | +| n_updates | 590 | +| policy_gradient_loss | -0.000738 | +| std | 0.954 | +| value_loss | 0.0029 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2137 | +| iterations | 61 | +| time_elapsed | 467 | +| total_timesteps | 999424 | +| train/ | | +| approx_kl | 0.008312117 | +| clip_fraction | 0.0887 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.898 | +| learning_rate | 0.0003 | +| loss | -0.0262 | +| n_updates | 600 | +| policy_gradient_loss | -0.000497 | +| std | 0.958 | +| value_loss | 0.00511 | +----------------------------------------- +Eval num_timesteps=1000000, episode_reward=-32.64 +/- 38.38 +Episode length: 2000.00 +/- 0.00 +---------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -32.6 | +| time/ | | +| total_timesteps | 1000000 | +| train/ | | +| approx_kl | 0.00942917 | +| clip_fraction | 0.105 | +| clip_range | 0.2 | +| entropy_loss | -2.76 | +| explained_variance | 0.961 | +| learning_rate | 0.0003 | +| loss | -0.0331 | +| n_updates | 610 | +| policy_gradient_loss | -0.0023 | +| std | 0.966 | +| value_loss | 0.00282 | +---------------------------------------- + +[Diag @ 1,000,000 | n_sheep=2 | success=0%] + COMPACT_CANT_DRIVE 14/20 + NEVER_COMPACT 6/20 + action_mag mean=0.216 p10=0.000 p90=0.805 (0=stopped, 1=full speed) + min_flock_radius mean=3.39m best=0.00m (target <5m to compact) + min_dog_to_com mean=1.18m best=0.11m (FLEE_DIST=7m) + min_com_to_pen mean=13.11m best=7.44m + reward/step (mean): progress=-0.0011 alignment=+0.0106 pen_bonus=+0.0003 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 2057 | +| iterations | 62 | +| time_elapsed | 493 | +| total_timesteps | 1015808 | +-------------------------------- +--------------------------------------- +| time/ | | +| fps | 2067 | +| iterations | 63 | +| time_elapsed | 499 | +| total_timesteps | 1032192 | +| train/ | | +| approx_kl | 0.008683 | +| clip_fraction | 0.0967 | +| clip_range | 0.2 | +| entropy_loss | -2.77 | +| explained_variance | 0.93 | +| learning_rate | 0.0003 | +| loss | -0.029 | +| n_updates | 620 | +| policy_gradient_loss | -0.000765 | +| std | 0.965 | +| value_loss | 0.00446 | +--------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2077 | +| iterations | 64 | +| time_elapsed | 504 | +| total_timesteps | 1048576 | +| train/ | | +| approx_kl | 0.009014329 | +| clip_fraction | 0.113 | +| clip_range | 0.2 | +| entropy_loss | -2.76 | +| explained_variance | 0.984 | +| learning_rate | 0.0003 | +| loss | -0.0279 | +| n_updates | 630 | +| policy_gradient_loss | -0.00211 | +| std | 0.962 | +| value_loss | 0.00312 | +----------------------------------------- +Eval num_timesteps=1050000, episode_reward=-31.51 +/- 42.52 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -31.5 | +| time/ | | +| total_timesteps | 1050000 | +| train/ | | +| approx_kl | 0.008500135 | +| clip_fraction | 0.105 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.968 | +| learning_rate | 0.0003 | +| loss | -0.0306 | +| n_updates | 640 | +| policy_gradient_loss | -0.00312 | +| std | 0.955 | +| value_loss | 0.00288 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 2042 | +| iterations | 65 | +| time_elapsed | 521 | +| total_timesteps | 1064960 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 2056 | +| iterations | 66 | +| time_elapsed | 525 | +| total_timesteps | 1081344 | +| train/ | | +| approx_kl | 0.0069593494 | +| clip_fraction | 0.0923 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.835 | +| learning_rate | 0.0003 | +| loss | -0.0291 | +| n_updates | 650 | +| policy_gradient_loss | -0.000469 | +| std | 0.952 | +| value_loss | 0.00186 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 2064 | +| iterations | 67 | +| time_elapsed | 531 | +| total_timesteps | 1097728 | +| train/ | | +| approx_kl | 0.007817726 | +| clip_fraction | 0.0933 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.922 | +| learning_rate | 0.0003 | +| loss | -0.0206 | +| n_updates | 660 | +| policy_gradient_loss | -0.00208 | +| std | 0.953 | +| value_loss | 0.00234 | +----------------------------------------- +Eval num_timesteps=1100000, episode_reward=-22.82 +/- 33.61 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -22.8 | +| time/ | | +| total_timesteps | 1100000 | +| train/ | | +| approx_kl | 0.006177975 | +| clip_fraction | 0.0806 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.951 | +| learning_rate | 0.0003 | +| loss | -0.026 | +| n_updates | 670 | +| policy_gradient_loss | -5.8e-05 | +| std | 0.951 | +| value_loss | 0.00184 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 2035 | +| iterations | 68 | +| time_elapsed | 547 | +| total_timesteps | 1114112 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 2048 | +| iterations | 69 | +| time_elapsed | 551 | +| total_timesteps | 1130496 | +| train/ | | +| approx_kl | 0.009605391 | +| clip_fraction | 0.102 | +| clip_range | 0.2 | +| entropy_loss | -2.74 | +| explained_variance | 0.951 | +| learning_rate | 0.0003 | +| loss | -0.0344 | +| n_updates | 680 | +| policy_gradient_loss | -0.0022 | +| std | 0.957 | +| value_loss | 0.00221 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 2060 | +| iterations | 70 | +| time_elapsed | 556 | +| total_timesteps | 1146880 | +| train/ | | +| approx_kl | 0.0064521013 | +| clip_fraction | 0.0953 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.898 | +| learning_rate | 0.0003 | +| loss | -0.0348 | +| n_updates | 690 | +| policy_gradient_loss | -0.00112 | +| std | 0.96 | +| value_loss | 0.00221 | +------------------------------------------ +Eval num_timesteps=1150000, episode_reward=-26.36 +/- 35.49 +Episode length: 2000.00 +/- 0.00 +---------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -26.4 | +| time/ | | +| total_timesteps | 1150000 | +| train/ | | +| approx_kl | 0.00777065 | +| clip_fraction | 0.0837 | +| clip_range | 0.2 | +| entropy_loss | -2.76 | +| explained_variance | 0.907 | +| learning_rate | 0.0003 | +| loss | -0.0198 | +| n_updates | 700 | +| policy_gradient_loss | -0.000371 | +| std | 0.963 | +| value_loss | 0.00182 | +---------------------------------------- +-------------------------------- +| time/ | | +| fps | 2031 | +| iterations | 71 | +| time_elapsed | 572 | +| total_timesteps | 1163264 | +-------------------------------- +--------------------------------------- +| time/ | | +| fps | 2044 | +| iterations | 72 | +| time_elapsed | 577 | +| total_timesteps | 1179648 | +| train/ | | +| approx_kl | 0.006344 | +| clip_fraction | 0.0719 | +| clip_range | 0.2 | +| entropy_loss | -2.76 | +| explained_variance | 0.908 | +| learning_rate | 0.0003 | +| loss | -0.0347 | +| n_updates | 710 | +| policy_gradient_loss | -0.000455 | +| std | 0.961 | +| value_loss | 0.00145 | +--------------------------------------- +------------------------------------------ +| time/ | | +| fps | 2054 | +| iterations | 73 | +| time_elapsed | 582 | +| total_timesteps | 1196032 | +| train/ | | +| approx_kl | 0.0060829036 | +| clip_fraction | 0.0854 | +| clip_range | 0.2 | +| entropy_loss | -2.75 | +| explained_variance | 0.896 | +| learning_rate | 0.0003 | +| loss | -0.0232 | +| n_updates | 720 | +| policy_gradient_loss | -0.00108 | +| std | 0.957 | +| value_loss | 0.00152 | +------------------------------------------ +Eval num_timesteps=1200000, episode_reward=-14.33 +/- 30.83 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -14.3 | +| time/ | | +| total_timesteps | 1200000 | +| train/ | | +| approx_kl | 0.0073732347 | +| clip_fraction | 0.0783 | +| clip_range | 0.2 | +| entropy_loss | -2.76 | +| explained_variance | 0.948 | +| learning_rate | 0.0003 | +| loss | -0.0267 | +| n_updates | 730 | +| policy_gradient_loss | -0.00212 | +| std | 0.968 | +| value_loss | 0.00253 | +------------------------------------------ + +[Diag @ 1,200,000 | n_sheep=2 | success=0%] + COMPACT_CANT_DRIVE 15/20 + NEVER_COMPACT 5/20 + action_mag mean=0.273 p10=0.004 p90=1.008 (0=stopped, 1=full speed) + min_flock_radius mean=3.94m best=0.97m (target <5m to compact) + min_dog_to_com mean=1.16m best=0.35m (FLEE_DIST=7m) + min_com_to_pen mean=13.54m best=4.20m + reward/step (mean): progress=+0.0001 alignment=+0.0121 pen_bonus=+0.0000 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1998 | +| iterations | 74 | +| time_elapsed | 606 | +| total_timesteps | 1212416 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 2008 | +| iterations | 75 | +| time_elapsed | 611 | +| total_timesteps | 1228800 | +| train/ | | +| approx_kl | 0.006109112 | +| clip_fraction | 0.0814 | +| clip_range | 0.2 | +| entropy_loss | -2.78 | +| explained_variance | 0.86 | +| learning_rate | 0.0003 | +| loss | -0.0205 | +| n_updates | 740 | +| policy_gradient_loss | -0.000541 | +| std | 0.973 | +| value_loss | 0.00171 | +----------------------------------------- +---------------------------------------- +| time/ | | +| fps | 2016 | +| iterations | 76 | +| time_elapsed | 617 | +| total_timesteps | 1245184 | +| train/ | | +| approx_kl | 0.00703271 | +| clip_fraction | 0.0781 | +| clip_range | 0.2 | +| entropy_loss | -2.78 | +| explained_variance | 0.934 | +| learning_rate | 0.0003 | +| loss | -0.0394 | +| n_updates | 750 | +| policy_gradient_loss | -0.00105 | +| std | 0.975 | +| value_loss | 0.00168 | +---------------------------------------- +Eval num_timesteps=1250000, episode_reward=-18.12 +/- 39.82 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -18.1 | +| time/ | | +| total_timesteps | 1250000 | +| train/ | | +| approx_kl | 0.0064994176 | +| clip_fraction | 0.0698 | +| clip_range | 0.2 | +| entropy_loss | -2.8 | +| explained_variance | 0.919 | +| learning_rate | 0.0003 | +| loss | -0.0166 | +| n_updates | 760 | +| policy_gradient_loss | -0.000919 | +| std | 0.985 | +| value_loss | 0.000832 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1989 | +| iterations | 77 | +| time_elapsed | 634 | +| total_timesteps | 1261568 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 2001 | +| iterations | 78 | +| time_elapsed | 638 | +| total_timesteps | 1277952 | +| train/ | | +| approx_kl | 0.008321709 | +| clip_fraction | 0.0902 | +| clip_range | 0.2 | +| entropy_loss | -2.81 | +| explained_variance | 0.874 | +| learning_rate | 0.0003 | +| loss | -0.0295 | +| n_updates | 770 | +| policy_gradient_loss | -0.00219 | +| std | 0.991 | +| value_loss | 0.00127 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 2010 | +| iterations | 79 | +| time_elapsed | 643 | +| total_timesteps | 1294336 | +| train/ | | +| approx_kl | 0.009220061 | +| clip_fraction | 0.112 | +| clip_range | 0.2 | +| entropy_loss | -2.82 | +| explained_variance | 0.952 | +| learning_rate | 0.0003 | +| loss | -0.0379 | +| n_updates | 780 | +| policy_gradient_loss | -0.00411 | +| std | 0.994 | +| value_loss | 0.00295 | +----------------------------------------- +Eval num_timesteps=1300000, episode_reward=-22.41 +/- 35.57 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -22.4 | +| time/ | | +| total_timesteps | 1300000 | +| train/ | | +| approx_kl | 0.0071307076 | +| clip_fraction | 0.0826 | +| clip_range | 0.2 | +| entropy_loss | -2.83 | +| explained_variance | 0.948 | +| learning_rate | 0.0003 | +| loss | -0.0281 | +| n_updates | 790 | +| policy_gradient_loss | -0.00178 | +| std | 0.995 | +| value_loss | 0.00169 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1986 | +| iterations | 80 | +| time_elapsed | 659 | +| total_timesteps | 1310720 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1996 | +| iterations | 81 | +| time_elapsed | 664 | +| total_timesteps | 1327104 | +| train/ | | +| approx_kl | 0.008566003 | +| clip_fraction | 0.0857 | +| clip_range | 0.2 | +| entropy_loss | -2.84 | +| explained_variance | 0.904 | +| learning_rate | 0.0003 | +| loss | -0.0369 | +| n_updates | 800 | +| policy_gradient_loss | -0.00199 | +| std | 1.01 | +| value_loss | 0.00203 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 2006 | +| iterations | 82 | +| time_elapsed | 669 | +| total_timesteps | 1343488 | +| train/ | | +| approx_kl | 0.0082352655 | +| clip_fraction | 0.0989 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.918 | +| learning_rate | 0.0003 | +| loss | -0.0297 | +| n_updates | 810 | +| policy_gradient_loss | -0.0023 | +| std | 1.01 | +| value_loss | 0.00203 | +------------------------------------------ +Eval num_timesteps=1350000, episode_reward=-14.21 +/- 38.53 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -14.2 | +| time/ | | +| total_timesteps | 1350000 | +| train/ | | +| approx_kl | 0.0066830693 | +| clip_fraction | 0.0831 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.923 | +| learning_rate | 0.0003 | +| loss | -0.0331 | +| n_updates | 820 | +| policy_gradient_loss | -0.00226 | +| std | 1.01 | +| value_loss | 0.00125 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1983 | +| iterations | 83 | +| time_elapsed | 685 | +| total_timesteps | 1359872 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1991 | +| iterations | 84 | +| time_elapsed | 691 | +| total_timesteps | 1376256 | +| train/ | | +| approx_kl | 0.008341949 | +| clip_fraction | 0.101 | +| clip_range | 0.2 | +| entropy_loss | -2.85 | +| explained_variance | 0.928 | +| learning_rate | 0.0003 | +| loss | -0.0156 | +| n_updates | 830 | +| policy_gradient_loss | -0.00132 | +| std | 1.01 | +| value_loss | 0.00407 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1999 | +| iterations | 85 | +| time_elapsed | 696 | +| total_timesteps | 1392640 | +| train/ | | +| approx_kl | 0.010089031 | +| clip_fraction | 0.109 | +| clip_range | 0.2 | +| entropy_loss | -2.84 | +| explained_variance | 0.914 | +| learning_rate | 0.0003 | +| loss | -0.0249 | +| n_updates | 840 | +| policy_gradient_loss | -0.00202 | +| std | 0.999 | +| value_loss | 0.00555 | +----------------------------------------- +Eval num_timesteps=1400000, episode_reward=-5.74 +/- 37.76 +Episode length: 2000.00 +/- 0.00 +---------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -5.74 | +| time/ | | +| total_timesteps | 1400000 | +| train/ | | +| approx_kl | 0.00840036 | +| clip_fraction | 0.112 | +| clip_range | 0.2 | +| entropy_loss | -2.84 | +| explained_variance | 0.915 | +| learning_rate | 0.0003 | +| loss | -0.0267 | +| n_updates | 850 | +| policy_gradient_loss | -0.00422 | +| std | 1 | +| value_loss | 0.0017 | +---------------------------------------- + +[Diag @ 1,400,000 | n_sheep=2 | success=0%] + COMPACT_CANT_DRIVE 16/20 + NEVER_COMPACT 4/20 + action_mag mean=0.258 p10=0.000 p90=1.004 (0=stopped, 1=full speed) + min_flock_radius mean=3.30m best=0.61m (target <5m to compact) + min_dog_to_com mean=0.76m best=0.22m (FLEE_DIST=7m) + min_com_to_pen mean=12.16m best=4.08m + reward/step (mean): progress=+0.0035 alignment=+0.0165 pen_bonus=+0.0000 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1954 | +| iterations | 86 | +| time_elapsed | 720 | +| total_timesteps | 1409024 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1964 | +| iterations | 87 | +| time_elapsed | 725 | +| total_timesteps | 1425408 | +| train/ | | +| approx_kl | 0.007908808 | +| clip_fraction | 0.0839 | +| clip_range | 0.2 | +| entropy_loss | -2.85 | +| explained_variance | 0.755 | +| learning_rate | 0.0003 | +| loss | -0.018 | +| n_updates | 860 | +| policy_gradient_loss | -0.00223 | +| std | 1.01 | +| value_loss | 0.00248 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1972 | +| iterations | 88 | +| time_elapsed | 730 | +| total_timesteps | 1441792 | +| train/ | | +| approx_kl | 0.007957449 | +| clip_fraction | 0.0864 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.868 | +| learning_rate | 0.0003 | +| loss | -0.0315 | +| n_updates | 870 | +| policy_gradient_loss | -0.00288 | +| std | 1.01 | +| value_loss | 0.00145 | +----------------------------------------- +Eval num_timesteps=1450000, episode_reward=-13.10 +/- 29.51 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -13.1 | +| time/ | | +| total_timesteps | 1450000 | +| train/ | | +| approx_kl | 0.007803983 | +| clip_fraction | 0.083 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.83 | +| learning_rate | 0.0003 | +| loss | -0.0212 | +| n_updates | 880 | +| policy_gradient_loss | -0.00119 | +| std | 1.01 | +| value_loss | 0.00191 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1952 | +| iterations | 89 | +| time_elapsed | 746 | +| total_timesteps | 1458176 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1961 | +| iterations | 90 | +| time_elapsed | 751 | +| total_timesteps | 1474560 | +| train/ | | +| approx_kl | 0.010021031 | +| clip_fraction | 0.097 | +| clip_range | 0.2 | +| entropy_loss | -2.88 | +| explained_variance | 0.902 | +| learning_rate | 0.0003 | +| loss | -0.0221 | +| n_updates | 890 | +| policy_gradient_loss | -0.00294 | +| std | 1.02 | +| value_loss | 0.00136 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1970 | +| iterations | 91 | +| time_elapsed | 756 | +| total_timesteps | 1490944 | +| train/ | | +| approx_kl | 0.0076614916 | +| clip_fraction | 0.0963 | +| clip_range | 0.2 | +| entropy_loss | -2.89 | +| explained_variance | 0.945 | +| learning_rate | 0.0003 | +| loss | -0.0273 | +| n_updates | 900 | +| policy_gradient_loss | -0.00355 | +| std | 1.03 | +| value_loss | 0.00181 | +------------------------------------------ +Eval num_timesteps=1500000, episode_reward=5.01 +/- 34.23 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 5.01 | +| time/ | | +| total_timesteps | 1500000 | +| train/ | | +| approx_kl | 0.005815446 | +| clip_fraction | 0.0675 | +| clip_range | 0.2 | +| entropy_loss | -2.9 | +| explained_variance | 0.934 | +| learning_rate | 0.0003 | +| loss | -0.0316 | +| n_updates | 910 | +| policy_gradient_loss | -0.00215 | +| std | 1.03 | +| value_loss | 0.00162 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1950 | +| iterations | 92 | +| time_elapsed | 772 | +| total_timesteps | 1507328 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1959 | +| iterations | 93 | +| time_elapsed | 777 | +| total_timesteps | 1523712 | +| train/ | | +| approx_kl | 0.0071218535 | +| clip_fraction | 0.0897 | +| clip_range | 0.2 | +| entropy_loss | -2.9 | +| explained_variance | 0.937 | +| learning_rate | 0.0003 | +| loss | -0.0219 | +| n_updates | 920 | +| policy_gradient_loss | -0.00225 | +| std | 1.03 | +| value_loss | 0.00463 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1967 | +| iterations | 94 | +| time_elapsed | 782 | +| total_timesteps | 1540096 | +| train/ | | +| approx_kl | 0.006857206 | +| clip_fraction | 0.0809 | +| clip_range | 0.2 | +| entropy_loss | -2.89 | +| explained_variance | 0.933 | +| learning_rate | 0.0003 | +| loss | -0.0252 | +| n_updates | 930 | +| policy_gradient_loss | -0.00219 | +| std | 1.02 | +| value_loss | 0.00436 | +----------------------------------------- +Eval num_timesteps=1550000, episode_reward=-4.04 +/- 33.69 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -4.04 | +| time/ | | +| total_timesteps | 1550000 | +| train/ | | +| approx_kl | 0.006146897 | +| clip_fraction | 0.0821 | +| clip_range | 0.2 | +| entropy_loss | -2.87 | +| explained_variance | 0.913 | +| learning_rate | 0.0003 | +| loss | -0.0352 | +| n_updates | 940 | +| policy_gradient_loss | -0.00258 | +| std | 1.02 | +| value_loss | 0.00325 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1948 | +| iterations | 95 | +| time_elapsed | 798 | +| total_timesteps | 1556480 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1958 | +| iterations | 96 | +| time_elapsed | 803 | +| total_timesteps | 1572864 | +| train/ | | +| approx_kl | 0.0069321445 | +| clip_fraction | 0.0778 | +| clip_range | 0.2 | +| entropy_loss | -2.86 | +| explained_variance | 0.94 | +| learning_rate | 0.0003 | +| loss | -0.013 | +| n_updates | 950 | +| policy_gradient_loss | -0.00214 | +| std | 1.01 | +| value_loss | 0.00162 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1965 | +| iterations | 97 | +| time_elapsed | 808 | +| total_timesteps | 1589248 | +| train/ | | +| approx_kl | 0.0066491435 | +| clip_fraction | 0.0714 | +| clip_range | 0.2 | +| entropy_loss | -2.88 | +| explained_variance | 0.941 | +| learning_rate | 0.0003 | +| loss | -0.0304 | +| n_updates | 960 | +| policy_gradient_loss | -0.00212 | +| std | 1.03 | +| value_loss | 0.0011 | +------------------------------------------ +Eval num_timesteps=1600000, episode_reward=12.65 +/- 31.73 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 12.6 | +| time/ | | +| total_timesteps | 1600000 | +| train/ | | +| approx_kl | 0.0050257677 | +| clip_fraction | 0.0588 | +| clip_range | 0.2 | +| entropy_loss | -2.9 | +| explained_variance | 0.939 | +| learning_rate | 0.0003 | +| loss | -0.0359 | +| n_updates | 970 | +| policy_gradient_loss | -0.0013 | +| std | 1.04 | +| value_loss | 0.00201 | +------------------------------------------ + +[Diag @ 1,600,000 | n_sheep=2 | success=0%] + COMPACT_CANT_DRIVE 13/20 + NEVER_COMPACT 7/20 + action_mag mean=0.252 p10=0.004 p90=0.980 (0=stopped, 1=full speed) + min_flock_radius mean=4.30m best=0.92m (target <5m to compact) + min_dog_to_com mean=0.74m best=0.38m (FLEE_DIST=7m) + min_com_to_pen mean=13.76m best=5.49m + reward/step (mean): progress=-0.0006 alignment=+0.0287 pen_bonus=+0.0000 step_cost=-0.0200 complete=+0.0000 + +[Curriculum] leaving stage n_sheep=2 after 800,000 steps | training success rate (last 100 eps) = 0% +[Curriculum] → 3 sheep at step 1,600,000 + +-------------------------------- +| time/ | | +| fps | 1930 | +| iterations | 98 | +| time_elapsed | 831 | +| total_timesteps | 1605632 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1937 | +| iterations | 99 | +| time_elapsed | 837 | +| total_timesteps | 1622016 | +| train/ | | +| approx_kl | 0.0085028205 | +| clip_fraction | 0.0905 | +| clip_range | 0.2 | +| entropy_loss | -2.89 | +| explained_variance | 0.909 | +| learning_rate | 0.0003 | +| loss | -0.0346 | +| n_updates | 980 | +| policy_gradient_loss | -0.00245 | +| std | 1.02 | +| value_loss | 0.00492 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1945 | +| iterations | 100 | +| time_elapsed | 842 | +| total_timesteps | 1638400 | +| train/ | | +| approx_kl | 0.009084044 | +| clip_fraction | 0.118 | +| clip_range | 0.2 | +| entropy_loss | -2.91 | +| explained_variance | 0.964 | +| learning_rate | 0.0003 | +| loss | -0.0416 | +| n_updates | 990 | +| policy_gradient_loss | 0.0025 | +| std | 1.04 | +| value_loss | 0.00194 | +----------------------------------------- +Eval num_timesteps=1650000, episode_reward=3.05 +/- 36.42 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 3.05 | +| time/ | | +| total_timesteps | 1650000 | +| train/ | | +| approx_kl | 0.009275759 | +| clip_fraction | 0.108 | +| clip_range | 0.2 | +| entropy_loss | -2.92 | +| explained_variance | 0.965 | +| learning_rate | 0.0003 | +| loss | -0.0336 | +| n_updates | 1000 | +| policy_gradient_loss | 0.000149 | +| std | 1.04 | +| value_loss | 0.00185 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1926 | +| iterations | 101 | +| time_elapsed | 859 | +| total_timesteps | 1654784 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1934 | +| iterations | 102 | +| time_elapsed | 864 | +| total_timesteps | 1671168 | +| train/ | | +| approx_kl | 0.008650862 | +| clip_fraction | 0.117 | +| clip_range | 0.2 | +| entropy_loss | -2.92 | +| explained_variance | 0.938 | +| learning_rate | 0.0003 | +| loss | -0.0279 | +| n_updates | 1010 | +| policy_gradient_loss | -0.000545 | +| std | 1.04 | +| value_loss | 0.00611 | +----------------------------------------- +--------------------------------------- +| time/ | | +| fps | 1939 | +| iterations | 103 | +| time_elapsed | 869 | +| total_timesteps | 1687552 | +| train/ | | +| approx_kl | 0.0080826 | +| clip_fraction | 0.0992 | +| clip_range | 0.2 | +| entropy_loss | -2.93 | +| explained_variance | 0.952 | +| learning_rate | 0.0003 | +| loss | -0.0415 | +| n_updates | 1020 | +| policy_gradient_loss | -0.00201 | +| std | 1.05 | +| value_loss | 0.00251 | +--------------------------------------- +Eval num_timesteps=1700000, episode_reward=-4.66 +/- 36.05 +Episode length: 2000.00 +/- 0.00 +---------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -4.66 | +| time/ | | +| total_timesteps | 1700000 | +| train/ | | +| approx_kl | 0.00786162 | +| clip_fraction | 0.0921 | +| clip_range | 0.2 | +| entropy_loss | -2.95 | +| explained_variance | 0.893 | +| learning_rate | 0.0003 | +| loss | -0.0301 | +| n_updates | 1030 | +| policy_gradient_loss | -0.000631 | +| std | 1.06 | +| value_loss | 0.00158 | +---------------------------------------- +-------------------------------- +| time/ | | +| fps | 1922 | +| iterations | 104 | +| time_elapsed | 886 | +| total_timesteps | 1703936 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1930 | +| iterations | 105 | +| time_elapsed | 891 | +| total_timesteps | 1720320 | +| train/ | | +| approx_kl | 0.008055547 | +| clip_fraction | 0.0842 | +| clip_range | 0.2 | +| entropy_loss | -2.96 | +| explained_variance | 0.918 | +| learning_rate | 0.0003 | +| loss | -0.027 | +| n_updates | 1040 | +| policy_gradient_loss | -6.56e-05 | +| std | 1.07 | +| value_loss | 0.00193 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1937 | +| iterations | 106 | +| time_elapsed | 896 | +| total_timesteps | 1736704 | +| train/ | | +| approx_kl | 0.008067045 | +| clip_fraction | 0.087 | +| clip_range | 0.2 | +| entropy_loss | -2.97 | +| explained_variance | 0.878 | +| learning_rate | 0.0003 | +| loss | -0.0281 | +| n_updates | 1050 | +| policy_gradient_loss | -0.00194 | +| std | 1.07 | +| value_loss | 0.0082 | +----------------------------------------- +Eval num_timesteps=1750000, episode_reward=-0.31 +/- 42.66 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | -0.309 | +| time/ | | +| total_timesteps | 1750000 | +| train/ | | +| approx_kl | 0.0066514863 | +| clip_fraction | 0.0808 | +| clip_range | 0.2 | +| entropy_loss | -2.99 | +| explained_variance | 0.888 | +| learning_rate | 0.0003 | +| loss | -0.0335 | +| n_updates | 1060 | +| policy_gradient_loss | -0.00108 | +| std | 1.08 | +| value_loss | 0.00303 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1921 | +| iterations | 107 | +| time_elapsed | 912 | +| total_timesteps | 1753088 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1927 | +| iterations | 108 | +| time_elapsed | 917 | +| total_timesteps | 1769472 | +| train/ | | +| approx_kl | 0.008252729 | +| clip_fraction | 0.093 | +| clip_range | 0.2 | +| entropy_loss | -3 | +| explained_variance | 0.959 | +| learning_rate | 0.0003 | +| loss | -0.0413 | +| n_updates | 1070 | +| policy_gradient_loss | -0.00241 | +| std | 1.09 | +| value_loss | 0.00122 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1935 | +| iterations | 109 | +| time_elapsed | 922 | +| total_timesteps | 1785856 | +| train/ | | +| approx_kl | 0.0073527684 | +| clip_fraction | 0.0822 | +| clip_range | 0.2 | +| entropy_loss | -3.01 | +| explained_variance | 0.883 | +| learning_rate | 0.0003 | +| loss | -0.018 | +| n_updates | 1080 | +| policy_gradient_loss | -0.00172 | +| std | 1.1 | +| value_loss | 0.00172 | +------------------------------------------ +Eval num_timesteps=1800000, episode_reward=8.99 +/- 39.35 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 8.99 | +| time/ | | +| total_timesteps | 1800000 | +| train/ | | +| approx_kl | 0.006149094 | +| clip_fraction | 0.0771 | +| clip_range | 0.2 | +| entropy_loss | -3.03 | +| explained_variance | 0.911 | +| learning_rate | 0.0003 | +| loss | -0.0315 | +| n_updates | 1090 | +| policy_gradient_loss | -0.000744 | +| std | 1.1 | +| value_loss | 0.00456 | +----------------------------------------- + +[Diag @ 1,800,000 | n_sheep=3 | success=0%] + NEVER_COMPACT 19/20 + COMPACT_CANT_DRIVE 1/20 + action_mag mean=0.049 p10=0.007 p90=0.049 (0=stopped, 1=full speed) + min_flock_radius mean=7.79m best=4.73m (target <5m to compact) + min_dog_to_com mean=0.92m best=0.25m (FLEE_DIST=7m) + min_com_to_pen mean=14.27m best=7.54m + reward/step (mean): progress=-0.0043 alignment=+0.0208 pen_bonus=+0.0000 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1899 | +| iterations | 110 | +| time_elapsed | 948 | +| total_timesteps | 1802240 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1906 | +| iterations | 111 | +| time_elapsed | 953 | +| total_timesteps | 1818624 | +| train/ | | +| approx_kl | 0.007161974 | +| clip_fraction | 0.0871 | +| clip_range | 0.2 | +| entropy_loss | -3.03 | +| explained_variance | 0.914 | +| learning_rate | 0.0003 | +| loss | -0.0359 | +| n_updates | 1100 | +| policy_gradient_loss | -0.00186 | +| std | 1.1 | +| value_loss | 0.00214 | +----------------------------------------- +---------------------------------------- +| time/ | | +| fps | 1914 | +| iterations | 112 | +| time_elapsed | 958 | +| total_timesteps | 1835008 | +| train/ | | +| approx_kl | 0.00886854 | +| clip_fraction | 0.103 | +| clip_range | 0.2 | +| entropy_loss | -3.04 | +| explained_variance | 0.94 | +| learning_rate | 0.0003 | +| loss | -0.04 | +| n_updates | 1110 | +| policy_gradient_loss | -0.00333 | +| std | 1.11 | +| value_loss | 0.00456 | +---------------------------------------- +Eval num_timesteps=1850000, episode_reward=14.49 +/- 36.35 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 14.5 | +| time/ | | +| total_timesteps | 1850000 | +| train/ | | +| approx_kl | 0.0058414284 | +| clip_fraction | 0.0642 | +| clip_range | 0.2 | +| entropy_loss | -3.05 | +| explained_variance | 0.871 | +| learning_rate | 0.0003 | +| loss | -0.033 | +| n_updates | 1120 | +| policy_gradient_loss | -0.000891 | +| std | 1.11 | +| value_loss | 0.00394 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1898 | +| iterations | 113 | +| time_elapsed | 975 | +| total_timesteps | 1851392 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1906 | +| iterations | 114 | +| time_elapsed | 979 | +| total_timesteps | 1867776 | +| train/ | | +| approx_kl | 0.008916938 | +| clip_fraction | 0.0916 | +| clip_range | 0.2 | +| entropy_loss | -3.05 | +| explained_variance | 0.937 | +| learning_rate | 0.0003 | +| loss | -0.0334 | +| n_updates | 1130 | +| policy_gradient_loss | -0.00257 | +| std | 1.12 | +| value_loss | 0.00285 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1913 | +| iterations | 115 | +| time_elapsed | 984 | +| total_timesteps | 1884160 | +| train/ | | +| approx_kl | 0.008523149 | +| clip_fraction | 0.0907 | +| clip_range | 0.2 | +| entropy_loss | -3.06 | +| explained_variance | 0.954 | +| learning_rate | 0.0003 | +| loss | -0.0339 | +| n_updates | 1140 | +| policy_gradient_loss | -0.0034 | +| std | 1.12 | +| value_loss | 0.00209 | +----------------------------------------- +Eval num_timesteps=1900000, episode_reward=9.85 +/- 42.18 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 9.85 | +| time/ | | +| total_timesteps | 1900000 | +| train/ | | +| approx_kl | 0.0075978916 | +| clip_fraction | 0.0819 | +| clip_range | 0.2 | +| entropy_loss | -3.06 | +| explained_variance | 0.96 | +| learning_rate | 0.0003 | +| loss | -0.0313 | +| n_updates | 1150 | +| policy_gradient_loss | -0.00272 | +| std | 1.12 | +| value_loss | 0.00332 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1896 | +| iterations | 116 | +| time_elapsed | 1002 | +| total_timesteps | 1900544 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1902 | +| iterations | 117 | +| time_elapsed | 1007 | +| total_timesteps | 1916928 | +| train/ | | +| approx_kl | 0.008376695 | +| clip_fraction | 0.0935 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.964 | +| learning_rate | 0.0003 | +| loss | -0.0392 | +| n_updates | 1160 | +| policy_gradient_loss | -0.00354 | +| std | 1.12 | +| value_loss | 0.00203 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1909 | +| iterations | 118 | +| time_elapsed | 1012 | +| total_timesteps | 1933312 | +| train/ | | +| approx_kl | 0.0077100536 | +| clip_fraction | 0.0854 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.933 | +| learning_rate | 0.0003 | +| loss | -0.0467 | +| n_updates | 1170 | +| policy_gradient_loss | -0.00421 | +| std | 1.12 | +| value_loss | 0.00132 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1915 | +| iterations | 119 | +| time_elapsed | 1018 | +| total_timesteps | 1949696 | +| train/ | | +| approx_kl | 0.006848542 | +| clip_fraction | 0.0674 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.959 | +| learning_rate | 0.0003 | +| loss | -0.0335 | +| n_updates | 1180 | +| policy_gradient_loss | -0.00229 | +| std | 1.13 | +| value_loss | 0.00138 | +----------------------------------------- +Eval num_timesteps=1950000, episode_reward=29.72 +/- 38.42 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 29.7 | +| time/ | | +| total_timesteps | 1950000 | +| train/ | | +| approx_kl | 0.007300608 | +| clip_fraction | 0.0824 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.977 | +| learning_rate | 0.0003 | +| loss | -0.0358 | +| n_updates | 1190 | +| policy_gradient_loss | -0.00364 | +| std | 1.12 | +| value_loss | 0.00159 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1899 | +| iterations | 120 | +| time_elapsed | 1034 | +| total_timesteps | 1966080 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1906 | +| iterations | 121 | +| time_elapsed | 1040 | +| total_timesteps | 1982464 | +| train/ | | +| approx_kl | 0.0072772675 | +| clip_fraction | 0.0703 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.882 | +| learning_rate | 0.0003 | +| loss | -0.0357 | +| n_updates | 1200 | +| policy_gradient_loss | -0.00163 | +| std | 1.13 | +| value_loss | 0.00471 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1912 | +| iterations | 122 | +| time_elapsed | 1045 | +| total_timesteps | 1998848 | +| train/ | | +| approx_kl | 0.007866079 | +| clip_fraction | 0.0898 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.962 | +| learning_rate | 0.0003 | +| loss | -0.0304 | +| n_updates | 1210 | +| policy_gradient_loss | -0.0052 | +| std | 1.13 | +| value_loss | 0.0014 | +----------------------------------------- +Eval num_timesteps=2000000, episode_reward=14.20 +/- 34.02 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 14.2 | +| time/ | | +| total_timesteps | 2000000 | +| train/ | | +| approx_kl | 0.0073383995 | +| clip_fraction | 0.083 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.95 | +| learning_rate | 0.0003 | +| loss | -0.0369 | +| n_updates | 1220 | +| policy_gradient_loss | -0.00296 | +| std | 1.12 | +| value_loss | 0.00336 | +------------------------------------------ + +[Diag @ 2,000,000 | n_sheep=3 | success=0%] + NEVER_COMPACT 12/20 + COMPACT_CANT_DRIVE 8/20 + action_mag mean=0.076 p10=0.007 p90=0.097 (0=stopped, 1=full speed) + min_flock_radius mean=5.33m best=0.00m (target <5m to compact) + min_dog_to_com mean=1.01m best=0.16m (FLEE_DIST=7m) + min_com_to_pen mean=12.40m best=6.50m + reward/step (mean): progress=+0.0041 alignment=+0.0263 pen_bonus=+0.0013 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1881 | +| iterations | 123 | +| time_elapsed | 1071 | +| total_timesteps | 2015232 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1887 | +| iterations | 124 | +| time_elapsed | 1076 | +| total_timesteps | 2031616 | +| train/ | | +| approx_kl | 0.0060287267 | +| clip_fraction | 0.0716 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.902 | +| learning_rate | 0.0003 | +| loss | -0.0402 | +| n_updates | 1230 | +| policy_gradient_loss | -0.00308 | +| std | 1.13 | +| value_loss | 0.00475 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1894 | +| iterations | 125 | +| time_elapsed | 1081 | +| total_timesteps | 2048000 | +| train/ | | +| approx_kl | 0.0073304214 | +| clip_fraction | 0.08 | +| clip_range | 0.2 | +| entropy_loss | -3.08 | +| explained_variance | 0.95 | +| learning_rate | 0.0003 | +| loss | -0.0436 | +| n_updates | 1240 | +| policy_gradient_loss | -0.00373 | +| std | 1.13 | +| value_loss | 0.00138 | +------------------------------------------ +Eval num_timesteps=2050000, episode_reward=18.68 +/- 36.20 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 18.7 | +| time/ | | +| total_timesteps | 2050000 | +| train/ | | +| approx_kl | 0.0068036346 | +| clip_fraction | 0.0768 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.897 | +| learning_rate | 0.0003 | +| loss | -0.0461 | +| n_updates | 1250 | +| policy_gradient_loss | -0.00392 | +| std | 1.13 | +| value_loss | 0.0013 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1880 | +| iterations | 126 | +| time_elapsed | 1097 | +| total_timesteps | 2064384 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1886 | +| iterations | 127 | +| time_elapsed | 1102 | +| total_timesteps | 2080768 | +| train/ | | +| approx_kl | 0.006960577 | +| clip_fraction | 0.0689 | +| clip_range | 0.2 | +| entropy_loss | -3.07 | +| explained_variance | 0.917 | +| learning_rate | 0.0003 | +| loss | -0.0302 | +| n_updates | 1260 | +| policy_gradient_loss | -0.00248 | +| std | 1.12 | +| value_loss | 0.00841 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1892 | +| iterations | 128 | +| time_elapsed | 1108 | +| total_timesteps | 2097152 | +| train/ | | +| approx_kl | 0.007300884 | +| clip_fraction | 0.0705 | +| clip_range | 0.2 | +| entropy_loss | -3.09 | +| explained_variance | 0.915 | +| learning_rate | 0.0003 | +| loss | -0.0338 | +| n_updates | 1270 | +| policy_gradient_loss | -0.00351 | +| std | 1.14 | +| value_loss | 0.00336 | +----------------------------------------- +Eval num_timesteps=2100000, episode_reward=37.33 +/- 41.91 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 37.3 | +| time/ | | +| total_timesteps | 2100000 | +| train/ | | +| approx_kl | 0.007571588 | +| clip_fraction | 0.076 | +| clip_range | 0.2 | +| entropy_loss | -3.1 | +| explained_variance | 0.907 | +| learning_rate | 0.0003 | +| loss | -0.0278 | +| n_updates | 1280 | +| policy_gradient_loss | -0.00336 | +| std | 1.14 | +| value_loss | 0.00228 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1878 | +| iterations | 129 | +| time_elapsed | 1124 | +| total_timesteps | 2113536 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1884 | +| iterations | 130 | +| time_elapsed | 1130 | +| total_timesteps | 2129920 | +| train/ | | +| approx_kl | 0.007885255 | +| clip_fraction | 0.088 | +| clip_range | 0.2 | +| entropy_loss | -3.11 | +| explained_variance | 0.939 | +| learning_rate | 0.0003 | +| loss | -0.0388 | +| n_updates | 1290 | +| policy_gradient_loss | -0.00498 | +| std | 1.15 | +| value_loss | 0.00231 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1890 | +| iterations | 131 | +| time_elapsed | 1135 | +| total_timesteps | 2146304 | +| train/ | | +| approx_kl | 0.0073760273 | +| clip_fraction | 0.0769 | +| clip_range | 0.2 | +| entropy_loss | -3.11 | +| explained_variance | 0.955 | +| learning_rate | 0.0003 | +| loss | -0.0277 | +| n_updates | 1300 | +| policy_gradient_loss | -0.00306 | +| std | 1.15 | +| value_loss | 0.00294 | +------------------------------------------ +Eval num_timesteps=2150000, episode_reward=31.84 +/- 38.92 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 31.8 | +| time/ | | +| total_timesteps | 2150000 | +| train/ | | +| approx_kl | 0.006736047 | +| clip_fraction | 0.0685 | +| clip_range | 0.2 | +| entropy_loss | -3.12 | +| explained_variance | 0.913 | +| learning_rate | 0.0003 | +| loss | -0.0302 | +| n_updates | 1310 | +| policy_gradient_loss | -0.0021 | +| std | 1.16 | +| value_loss | 0.00422 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1872 | +| iterations | 132 | +| time_elapsed | 1155 | +| total_timesteps | 2162688 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1878 | +| iterations | 133 | +| time_elapsed | 1160 | +| total_timesteps | 2179072 | +| train/ | | +| approx_kl | 0.006166819 | +| clip_fraction | 0.0668 | +| clip_range | 0.2 | +| entropy_loss | -3.13 | +| explained_variance | 0.956 | +| learning_rate | 0.0003 | +| loss | -0.0473 | +| n_updates | 1320 | +| policy_gradient_loss | -0.00364 | +| std | 1.16 | +| value_loss | 0.00158 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1884 | +| iterations | 134 | +| time_elapsed | 1165 | +| total_timesteps | 2195456 | +| train/ | | +| approx_kl | 0.0075986157 | +| clip_fraction | 0.0769 | +| clip_range | 0.2 | +| entropy_loss | -3.14 | +| explained_variance | 0.966 | +| learning_rate | 0.0003 | +| loss | -0.0317 | +| n_updates | 1330 | +| policy_gradient_loss | -0.00398 | +| std | 1.17 | +| value_loss | 0.00307 | +------------------------------------------ +Eval num_timesteps=2200000, episode_reward=26.98 +/- 37.84 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 27 | +| time/ | | +| total_timesteps | 2200000 | +| train/ | | +| approx_kl | 0.008170303 | +| clip_fraction | 0.0981 | +| clip_range | 0.2 | +| entropy_loss | -3.14 | +| explained_variance | 0.964 | +| learning_rate | 0.0003 | +| loss | -0.0326 | +| n_updates | 1340 | +| policy_gradient_loss | -0.00415 | +| std | 1.16 | +| value_loss | 0.00349 | +----------------------------------------- + +[Diag @ 2,200,000 | n_sheep=3 | success=0%] + NEVER_COMPACT 16/20 + COMPACT_CANT_DRIVE 4/20 + action_mag mean=0.067 p10=0.003 p90=0.067 (0=stopped, 1=full speed) + min_flock_radius mean=7.25m best=1.61m (target <5m to compact) + min_dog_to_com mean=0.97m best=0.20m (FLEE_DIST=7m) + min_com_to_pen mean=13.28m best=5.53m + reward/step (mean): progress=+0.0007 alignment=+0.0353 pen_bonus=+0.0008 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1832 | +| iterations | 135 | +| time_elapsed | 1206 | +| total_timesteps | 2211840 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1805 | +| iterations | 136 | +| time_elapsed | 1234 | +| total_timesteps | 2228224 | +| train/ | | +| approx_kl | 0.006131858 | +| clip_fraction | 0.067 | +| clip_range | 0.2 | +| entropy_loss | -3.13 | +| explained_variance | 0.927 | +| learning_rate | 0.0003 | +| loss | -0.0328 | +| n_updates | 1350 | +| policy_gradient_loss | -0.0022 | +| std | 1.16 | +| value_loss | 0.000981 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1811 | +| iterations | 137 | +| time_elapsed | 1239 | +| total_timesteps | 2244608 | +| train/ | | +| approx_kl | 0.0071705403 | +| clip_fraction | 0.0699 | +| clip_range | 0.2 | +| entropy_loss | -3.12 | +| explained_variance | 0.913 | +| learning_rate | 0.0003 | +| loss | -0.0391 | +| n_updates | 1360 | +| policy_gradient_loss | -0.0032 | +| std | 1.15 | +| value_loss | 0.00639 | +------------------------------------------ +Eval num_timesteps=2250000, episode_reward=28.55 +/- 29.67 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 28.5 | +| time/ | | +| total_timesteps | 2250000 | +| train/ | | +| approx_kl | 0.007929602 | +| clip_fraction | 0.0812 | +| clip_range | 0.2 | +| entropy_loss | -3.14 | +| explained_variance | 0.933 | +| learning_rate | 0.0003 | +| loss | -0.0592 | +| n_updates | 1370 | +| policy_gradient_loss | -0.00434 | +| std | 1.17 | +| value_loss | 0.00337 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1800 | +| iterations | 138 | +| time_elapsed | 1255 | +| total_timesteps | 2260992 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1806 | +| iterations | 139 | +| time_elapsed | 1260 | +| total_timesteps | 2277376 | +| train/ | | +| approx_kl | 0.0062256474 | +| clip_fraction | 0.0592 | +| clip_range | 0.2 | +| entropy_loss | -3.15 | +| explained_variance | 0.935 | +| learning_rate | 0.0003 | +| loss | -0.0368 | +| n_updates | 1380 | +| policy_gradient_loss | -0.00242 | +| std | 1.17 | +| value_loss | 0.00787 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1812 | +| iterations | 140 | +| time_elapsed | 1265 | +| total_timesteps | 2293760 | +| train/ | | +| approx_kl | 0.0075241774 | +| clip_fraction | 0.0885 | +| clip_range | 0.2 | +| entropy_loss | -3.14 | +| explained_variance | 0.948 | +| learning_rate | 0.0003 | +| loss | -0.0385 | +| n_updates | 1390 | +| policy_gradient_loss | -0.00346 | +| std | 1.16 | +| value_loss | 0.00172 | +------------------------------------------ +Eval num_timesteps=2300000, episode_reward=43.34 +/- 34.73 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 43.3 | +| time/ | | +| total_timesteps | 2300000 | +| train/ | | +| approx_kl | 0.0073855575 | +| clip_fraction | 0.0753 | +| clip_range | 0.2 | +| entropy_loss | -3.12 | +| explained_variance | 0.911 | +| learning_rate | 0.0003 | +| loss | -0.0377 | +| n_updates | 1400 | +| policy_gradient_loss | -0.0034 | +| std | 1.15 | +| value_loss | 0.00645 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1801 | +| iterations | 141 | +| time_elapsed | 1282 | +| total_timesteps | 2310144 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1806 | +| iterations | 142 | +| time_elapsed | 1287 | +| total_timesteps | 2326528 | +| train/ | | +| approx_kl | 0.007232903 | +| clip_fraction | 0.0845 | +| clip_range | 0.2 | +| entropy_loss | -3.13 | +| explained_variance | 0.956 | +| learning_rate | 0.0003 | +| loss | -0.0346 | +| n_updates | 1410 | +| policy_gradient_loss | -0.003 | +| std | 1.16 | +| value_loss | 0.00134 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1812 | +| iterations | 143 | +| time_elapsed | 1292 | +| total_timesteps | 2342912 | +| train/ | | +| approx_kl | 0.007283367 | +| clip_fraction | 0.0785 | +| clip_range | 0.2 | +| entropy_loss | -3.14 | +| explained_variance | 0.913 | +| learning_rate | 0.0003 | +| loss | -0.0306 | +| n_updates | 1420 | +| policy_gradient_loss | -0.00368 | +| std | 1.17 | +| value_loss | 0.00385 | +----------------------------------------- +Eval num_timesteps=2350000, episode_reward=33.49 +/- 34.79 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 33.5 | +| time/ | | +| total_timesteps | 2350000 | +| train/ | | +| approx_kl | 0.006632698 | +| clip_fraction | 0.0647 | +| clip_range | 0.2 | +| entropy_loss | -3.15 | +| explained_variance | 0.934 | +| learning_rate | 0.0003 | +| loss | -0.0469 | +| n_updates | 1430 | +| policy_gradient_loss | -0.00327 | +| std | 1.17 | +| value_loss | 0.00793 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1800 | +| iterations | 144 | +| time_elapsed | 1310 | +| total_timesteps | 2359296 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1805 | +| iterations | 145 | +| time_elapsed | 1315 | +| total_timesteps | 2375680 | +| train/ | | +| approx_kl | 0.008364577 | +| clip_fraction | 0.089 | +| clip_range | 0.2 | +| entropy_loss | -3.15 | +| explained_variance | 0.957 | +| learning_rate | 0.0003 | +| loss | -0.0464 | +| n_updates | 1440 | +| policy_gradient_loss | -0.00453 | +| std | 1.17 | +| value_loss | 0.00507 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1810 | +| iterations | 146 | +| time_elapsed | 1321 | +| total_timesteps | 2392064 | +| train/ | | +| approx_kl | 0.007854694 | +| clip_fraction | 0.0927 | +| clip_range | 0.2 | +| entropy_loss | -3.15 | +| explained_variance | 0.953 | +| learning_rate | 0.0003 | +| loss | -0.0436 | +| n_updates | 1450 | +| policy_gradient_loss | -0.00519 | +| std | 1.17 | +| value_loss | 0.00289 | +----------------------------------------- +Eval num_timesteps=2400000, episode_reward=34.64 +/- 37.27 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 34.6 | +| time/ | | +| total_timesteps | 2400000 | +| train/ | | +| approx_kl | 0.0076201856 | +| clip_fraction | 0.0844 | +| clip_range | 0.2 | +| entropy_loss | -3.15 | +| explained_variance | 0.945 | +| learning_rate | 0.0003 | +| loss | -0.0431 | +| n_updates | 1460 | +| policy_gradient_loss | -0.00554 | +| std | 1.17 | +| value_loss | 0.00196 | +------------------------------------------ + +[Diag @ 2,400,000 | n_sheep=3 | success=0%] + NEVER_COMPACT 15/20 + COMPACT_CANT_DRIVE 5/20 + action_mag mean=0.058 p10=0.006 p90=0.053 (0=stopped, 1=full speed) + min_flock_radius mean=6.68m best=0.96m (target <5m to compact) + min_dog_to_com mean=0.92m best=0.16m (FLEE_DIST=7m) + min_com_to_pen mean=12.18m best=5.62m + reward/step (mean): progress=+0.0034 alignment=+0.0352 pen_bonus=+0.0010 step_cost=-0.0200 complete=+0.0000 + +[Curriculum] leaving stage n_sheep=3 after 800,000 steps | training success rate (last 100 eps) = 0% +[Curriculum] → 4 sheep at step 2,400,000 + +-------------------------------- +| time/ | | +| fps | 1788 | +| iterations | 147 | +| time_elapsed | 1346 | +| total_timesteps | 2408448 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1794 | +| iterations | 148 | +| time_elapsed | 1351 | +| total_timesteps | 2424832 | +| train/ | | +| approx_kl | 0.006801254 | +| clip_fraction | 0.0797 | +| clip_range | 0.2 | +| entropy_loss | -3.16 | +| explained_variance | 0.922 | +| learning_rate | 0.0003 | +| loss | -0.0313 | +| n_updates | 1470 | +| policy_gradient_loss | -0.00418 | +| std | 1.18 | +| value_loss | 0.00724 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1798 | +| iterations | 149 | +| time_elapsed | 1357 | +| total_timesteps | 2441216 | +| train/ | | +| approx_kl | 0.007604986 | +| clip_fraction | 0.0758 | +| clip_range | 0.2 | +| entropy_loss | -3.18 | +| explained_variance | 0.937 | +| learning_rate | 0.0003 | +| loss | -0.0354 | +| n_updates | 1480 | +| policy_gradient_loss | -0.00189 | +| std | 1.19 | +| value_loss | 0.00591 | +----------------------------------------- +Eval num_timesteps=2450000, episode_reward=27.82 +/- 47.76 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 27.8 | +| time/ | | +| total_timesteps | 2450000 | +| train/ | | +| approx_kl | 0.0070674624 | +| clip_fraction | 0.0749 | +| clip_range | 0.2 | +| entropy_loss | -3.2 | +| explained_variance | 0.893 | +| learning_rate | 0.0003 | +| loss | -0.0327 | +| n_updates | 1490 | +| policy_gradient_loss | -0.00322 | +| std | 1.2 | +| value_loss | 0.0105 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1788 | +| iterations | 150 | +| time_elapsed | 1374 | +| total_timesteps | 2457600 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1792 | +| iterations | 151 | +| time_elapsed | 1380 | +| total_timesteps | 2473984 | +| train/ | | +| approx_kl | 0.008372683 | +| clip_fraction | 0.0874 | +| clip_range | 0.2 | +| entropy_loss | -3.21 | +| explained_variance | 0.932 | +| learning_rate | 0.0003 | +| loss | -0.0381 | +| n_updates | 1500 | +| policy_gradient_loss | -0.00471 | +| std | 1.21 | +| value_loss | 0.00563 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1796 | +| iterations | 152 | +| time_elapsed | 1385 | +| total_timesteps | 2490368 | +| train/ | | +| approx_kl | 0.007761459 | +| clip_fraction | 0.0794 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.929 | +| learning_rate | 0.0003 | +| loss | -0.0345 | +| n_updates | 1510 | +| policy_gradient_loss | -0.00402 | +| std | 1.22 | +| value_loss | 0.00736 | +----------------------------------------- +Eval num_timesteps=2500000, episode_reward=25.79 +/- 28.60 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 25.8 | +| time/ | | +| total_timesteps | 2500000 | +| train/ | | +| approx_kl | 0.0070840344 | +| clip_fraction | 0.0711 | +| clip_range | 0.2 | +| entropy_loss | -3.22 | +| explained_variance | 0.9 | +| learning_rate | 0.0003 | +| loss | -0.0322 | +| n_updates | 1520 | +| policy_gradient_loss | -0.00397 | +| std | 1.21 | +| value_loss | 0.00517 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1785 | +| iterations | 153 | +| time_elapsed | 1404 | +| total_timesteps | 2506752 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1788 | +| iterations | 154 | +| time_elapsed | 1410 | +| total_timesteps | 2523136 | +| train/ | | +| approx_kl | 0.0062630484 | +| clip_fraction | 0.069 | +| clip_range | 0.2 | +| entropy_loss | -3.22 | +| explained_variance | 0.93 | +| learning_rate | 0.0003 | +| loss | -0.0363 | +| n_updates | 1530 | +| policy_gradient_loss | -0.00382 | +| std | 1.21 | +| value_loss | 0.00546 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1792 | +| iterations | 155 | +| time_elapsed | 1416 | +| total_timesteps | 2539520 | +| train/ | | +| approx_kl | 0.007609036 | +| clip_fraction | 0.0815 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.832 | +| learning_rate | 0.0003 | +| loss | -0.0404 | +| n_updates | 1540 | +| policy_gradient_loss | -0.00347 | +| std | 1.22 | +| value_loss | 0.00902 | +----------------------------------------- +Eval num_timesteps=2550000, episode_reward=26.76 +/- 38.76 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 26.8 | +| time/ | | +| total_timesteps | 2550000 | +| train/ | | +| approx_kl | 0.0070117847 | +| clip_fraction | 0.0808 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.863 | +| learning_rate | 0.0003 | +| loss | -0.0357 | +| n_updates | 1550 | +| policy_gradient_loss | -0.00279 | +| std | 1.22 | +| value_loss | 0.0114 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1780 | +| iterations | 156 | +| time_elapsed | 1435 | +| total_timesteps | 2555904 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1786 | +| iterations | 157 | +| time_elapsed | 1440 | +| total_timesteps | 2572288 | +| train/ | | +| approx_kl | 0.0070258966 | +| clip_fraction | 0.0817 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.941 | +| learning_rate | 0.0003 | +| loss | -0.039 | +| n_updates | 1560 | +| policy_gradient_loss | -0.00488 | +| std | 1.22 | +| value_loss | 0.00696 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1791 | +| iterations | 158 | +| time_elapsed | 1445 | +| total_timesteps | 2588672 | +| train/ | | +| approx_kl | 0.007600763 | +| clip_fraction | 0.0842 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.912 | +| learning_rate | 0.0003 | +| loss | -0.0363 | +| n_updates | 1570 | +| policy_gradient_loss | -0.00544 | +| std | 1.22 | +| value_loss | 0.00556 | +----------------------------------------- +Eval num_timesteps=2600000, episode_reward=19.53 +/- 46.34 +Episode length: 2000.00 +/- 0.00 +---------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 19.5 | +| time/ | | +| total_timesteps | 2600000 | +| train/ | | +| approx_kl | 0.00714178 | +| clip_fraction | 0.0783 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.92 | +| learning_rate | 0.0003 | +| loss | -0.0352 | +| n_updates | 1580 | +| policy_gradient_loss | -0.00468 | +| std | 1.22 | +| value_loss | 0.00364 | +---------------------------------------- + +[Diag @ 2,600,000 | n_sheep=4 | success=0%] + NEVER_COMPACT 20/20 + action_mag mean=0.061 p10=0.006 p90=0.047 (0=stopped, 1=full speed) + min_flock_radius mean=7.84m best=5.75m (target <5m to compact) + min_dog_to_com mean=0.66m best=0.09m (FLEE_DIST=7m) + min_com_to_pen mean=12.60m best=6.52m + reward/step (mean): progress=-0.0028 alignment=+0.0337 pen_bonus=+0.0005 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1768 | +| iterations | 159 | +| time_elapsed | 1473 | +| total_timesteps | 2605056 | +-------------------------------- +---------------------------------------- +| time/ | | +| fps | 1771 | +| iterations | 160 | +| time_elapsed | 1479 | +| total_timesteps | 2621440 | +| train/ | | +| approx_kl | 0.00681924 | +| clip_fraction | 0.0779 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.946 | +| learning_rate | 0.0003 | +| loss | -0.0409 | +| n_updates | 1590 | +| policy_gradient_loss | -0.00346 | +| std | 1.22 | +| value_loss | 0.00377 | +---------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1775 | +| iterations | 161 | +| time_elapsed | 1485 | +| total_timesteps | 2637824 | +| train/ | | +| approx_kl | 0.008016385 | +| clip_fraction | 0.0888 | +| clip_range | 0.2 | +| entropy_loss | -3.24 | +| explained_variance | 0.931 | +| learning_rate | 0.0003 | +| loss | -0.0311 | +| n_updates | 1600 | +| policy_gradient_loss | -0.00526 | +| std | 1.22 | +| value_loss | 0.00681 | +----------------------------------------- +Eval num_timesteps=2650000, episode_reward=28.98 +/- 40.07 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 29 | +| time/ | | +| total_timesteps | 2650000 | +| train/ | | +| approx_kl | 0.006836592 | +| clip_fraction | 0.0778 | +| clip_range | 0.2 | +| entropy_loss | -3.24 | +| explained_variance | 0.9 | +| learning_rate | 0.0003 | +| loss | -0.0304 | +| n_updates | 1610 | +| policy_gradient_loss | -0.00255 | +| std | 1.23 | +| value_loss | 0.00574 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1765 | +| iterations | 162 | +| time_elapsed | 1503 | +| total_timesteps | 2654208 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1770 | +| iterations | 163 | +| time_elapsed | 1508 | +| total_timesteps | 2670592 | +| train/ | | +| approx_kl | 0.0072684484 | +| clip_fraction | 0.0764 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.948 | +| learning_rate | 0.0003 | +| loss | -0.0295 | +| n_updates | 1620 | +| policy_gradient_loss | -0.00325 | +| std | 1.22 | +| value_loss | 0.00254 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1775 | +| iterations | 164 | +| time_elapsed | 1513 | +| total_timesteps | 2686976 | +| train/ | | +| approx_kl | 0.007457966 | +| clip_fraction | 0.0845 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.919 | +| learning_rate | 0.0003 | +| loss | -0.0473 | +| n_updates | 1630 | +| policy_gradient_loss | -0.00505 | +| std | 1.22 | +| value_loss | 0.004 | +----------------------------------------- +Eval num_timesteps=2700000, episode_reward=33.96 +/- 32.11 +Episode length: 2000.00 +/- 0.00 +---------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 34 | +| time/ | | +| total_timesteps | 2700000 | +| train/ | | +| approx_kl | 0.00796853 | +| clip_fraction | 0.0782 | +| clip_range | 0.2 | +| entropy_loss | -3.22 | +| explained_variance | 0.959 | +| learning_rate | 0.0003 | +| loss | -0.0336 | +| n_updates | 1640 | +| policy_gradient_loss | -0.00288 | +| std | 1.21 | +| value_loss | 0.00235 | +---------------------------------------- +-------------------------------- +| time/ | | +| fps | 1761 | +| iterations | 165 | +| time_elapsed | 1534 | +| total_timesteps | 2703360 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1764 | +| iterations | 166 | +| time_elapsed | 1541 | +| total_timesteps | 2719744 | +| train/ | | +| approx_kl | 0.0073700505 | +| clip_fraction | 0.0857 | +| clip_range | 0.2 | +| entropy_loss | -3.21 | +| explained_variance | 0.875 | +| learning_rate | 0.0003 | +| loss | -0.0255 | +| n_updates | 1650 | +| policy_gradient_loss | -0.00495 | +| std | 1.21 | +| value_loss | 0.00846 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1768 | +| iterations | 167 | +| time_elapsed | 1546 | +| total_timesteps | 2736128 | +| train/ | | +| approx_kl | 0.007965144 | +| clip_fraction | 0.0858 | +| clip_range | 0.2 | +| entropy_loss | -3.22 | +| explained_variance | 0.898 | +| learning_rate | 0.0003 | +| loss | -0.0451 | +| n_updates | 1660 | +| policy_gradient_loss | -0.00518 | +| std | 1.22 | +| value_loss | 0.00395 | +----------------------------------------- +Eval num_timesteps=2750000, episode_reward=23.58 +/- 34.37 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 23.6 | +| time/ | | +| total_timesteps | 2750000 | +| train/ | | +| approx_kl | 0.0065765316 | +| clip_fraction | 0.0682 | +| clip_range | 0.2 | +| entropy_loss | -3.23 | +| explained_variance | 0.934 | +| learning_rate | 0.0003 | +| loss | -0.0429 | +| n_updates | 1670 | +| policy_gradient_loss | -0.00379 | +| std | 1.23 | +| value_loss | 0.00677 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1756 | +| iterations | 168 | +| time_elapsed | 1566 | +| total_timesteps | 2752512 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1761 | +| iterations | 169 | +| time_elapsed | 1571 | +| total_timesteps | 2768896 | +| train/ | | +| approx_kl | 0.0066236854 | +| clip_fraction | 0.0619 | +| clip_range | 0.2 | +| entropy_loss | -3.25 | +| explained_variance | 0.935 | +| learning_rate | 0.0003 | +| loss | -0.0365 | +| n_updates | 1680 | +| policy_gradient_loss | -0.00239 | +| std | 1.23 | +| value_loss | 0.00922 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1766 | +| iterations | 170 | +| time_elapsed | 1576 | +| total_timesteps | 2785280 | +| train/ | | +| approx_kl | 0.007887056 | +| clip_fraction | 0.0836 | +| clip_range | 0.2 | +| entropy_loss | -3.25 | +| explained_variance | 0.899 | +| learning_rate | 0.0003 | +| loss | -0.0353 | +| n_updates | 1690 | +| policy_gradient_loss | -0.0053 | +| std | 1.24 | +| value_loss | 0.00635 | +----------------------------------------- +Eval num_timesteps=2800000, episode_reward=33.57 +/- 35.56 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 33.6 | +| time/ | | +| total_timesteps | 2800000 | +| train/ | | +| approx_kl | 0.0067548407 | +| clip_fraction | 0.0804 | +| clip_range | 0.2 | +| entropy_loss | -3.25 | +| explained_variance | 0.887 | +| learning_rate | 0.0003 | +| loss | -0.0408 | +| n_updates | 1700 | +| policy_gradient_loss | -0.00444 | +| std | 1.24 | +| value_loss | 0.0101 | +------------------------------------------ + +[Diag @ 2,800,000 | n_sheep=4 | success=0%] + NEVER_COMPACT 19/20 + COMPACT_CANT_DRIVE 1/20 + action_mag mean=0.050 p10=0.003 p90=0.039 (0=stopped, 1=full speed) + min_flock_radius mean=8.42m best=4.84m (target <5m to compact) + min_dog_to_com mean=0.73m best=0.12m (FLEE_DIST=7m) + min_com_to_pen mean=14.29m best=7.66m + reward/step (mean): progress=-0.0027 alignment=+0.0365 pen_bonus=+0.0005 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1746 | +| iterations | 171 | +| time_elapsed | 1604 | +| total_timesteps | 2801664 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1750 | +| iterations | 172 | +| time_elapsed | 1609 | +| total_timesteps | 2818048 | +| train/ | | +| approx_kl | 0.0069283517 | +| clip_fraction | 0.0847 | +| clip_range | 0.2 | +| entropy_loss | -3.24 | +| explained_variance | 0.899 | +| learning_rate | 0.0003 | +| loss | -0.0476 | +| n_updates | 1710 | +| policy_gradient_loss | -0.00499 | +| std | 1.23 | +| value_loss | 0.00708 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1754 | +| iterations | 173 | +| time_elapsed | 1615 | +| total_timesteps | 2834432 | +| train/ | | +| approx_kl | 0.008303071 | +| clip_fraction | 0.082 | +| clip_range | 0.2 | +| entropy_loss | -3.25 | +| explained_variance | 0.911 | +| learning_rate | 0.0003 | +| loss | -0.0484 | +| n_updates | 1720 | +| policy_gradient_loss | -0.00388 | +| std | 1.23 | +| value_loss | 0.0061 | +----------------------------------------- +Eval num_timesteps=2850000, episode_reward=34.42 +/- 32.01 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 34.4 | +| time/ | | +| total_timesteps | 2850000 | +| train/ | | +| approx_kl | 0.0063731004 | +| clip_fraction | 0.069 | +| clip_range | 0.2 | +| entropy_loss | -3.26 | +| explained_variance | 0.951 | +| learning_rate | 0.0003 | +| loss | -0.029 | +| n_updates | 1730 | +| policy_gradient_loss | -0.00384 | +| std | 1.25 | +| value_loss | 0.00528 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1745 | +| iterations | 174 | +| time_elapsed | 1633 | +| total_timesteps | 2850816 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1749 | +| iterations | 175 | +| time_elapsed | 1638 | +| total_timesteps | 2867200 | +| train/ | | +| approx_kl | 0.008163793 | +| clip_fraction | 0.0812 | +| clip_range | 0.2 | +| entropy_loss | -3.28 | +| explained_variance | 0.935 | +| learning_rate | 0.0003 | +| loss | -0.0374 | +| n_updates | 1740 | +| policy_gradient_loss | -0.0032 | +| std | 1.26 | +| value_loss | 0.00432 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1754 | +| iterations | 176 | +| time_elapsed | 1643 | +| total_timesteps | 2883584 | +| train/ | | +| approx_kl | 0.0063439216 | +| clip_fraction | 0.0743 | +| clip_range | 0.2 | +| entropy_loss | -3.29 | +| explained_variance | 0.89 | +| learning_rate | 0.0003 | +| loss | -0.0372 | +| n_updates | 1750 | +| policy_gradient_loss | -0.00403 | +| std | 1.26 | +| value_loss | 0.00654 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1759 | +| iterations | 177 | +| time_elapsed | 1648 | +| total_timesteps | 2899968 | +| train/ | | +| approx_kl | 0.006967159 | +| clip_fraction | 0.0761 | +| clip_range | 0.2 | +| entropy_loss | -3.29 | +| explained_variance | 0.929 | +| learning_rate | 0.0003 | +| loss | -0.0462 | +| n_updates | 1760 | +| policy_gradient_loss | -0.00382 | +| std | 1.26 | +| value_loss | 0.00381 | +----------------------------------------- +Eval num_timesteps=2900000, episode_reward=40.78 +/- 43.99 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 40.8 | +| time/ | | +| total_timesteps | 2900000 | +| train/ | | +| approx_kl | 0.0075211767 | +| clip_fraction | 0.0727 | +| clip_range | 0.2 | +| entropy_loss | -3.29 | +| explained_variance | 0.955 | +| learning_rate | 0.0003 | +| loss | -0.0178 | +| n_updates | 1770 | +| policy_gradient_loss | -0.00285 | +| std | 1.27 | +| value_loss | 0.00798 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1751 | +| iterations | 178 | +| time_elapsed | 1664 | +| total_timesteps | 2916352 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1756 | +| iterations | 179 | +| time_elapsed | 1669 | +| total_timesteps | 2932736 | +| train/ | | +| approx_kl | 0.006763531 | +| clip_fraction | 0.0678 | +| clip_range | 0.2 | +| entropy_loss | -3.3 | +| explained_variance | 0.91 | +| learning_rate | 0.0003 | +| loss | -0.0349 | +| n_updates | 1780 | +| policy_gradient_loss | -0.00361 | +| std | 1.27 | +| value_loss | 0.00528 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1760 | +| iterations | 180 | +| time_elapsed | 1675 | +| total_timesteps | 2949120 | +| train/ | | +| approx_kl | 0.0067441636 | +| clip_fraction | 0.0732 | +| clip_range | 0.2 | +| entropy_loss | -3.3 | +| explained_variance | 0.888 | +| learning_rate | 0.0003 | +| loss | -0.0261 | +| n_updates | 1790 | +| policy_gradient_loss | -0.00291 | +| std | 1.27 | +| value_loss | 0.00582 | +------------------------------------------ +Eval num_timesteps=2950000, episode_reward=48.39 +/- 31.91 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 48.4 | +| time/ | | +| total_timesteps | 2950000 | +| train/ | | +| approx_kl | 0.0076025603 | +| clip_fraction | 0.0858 | +| clip_range | 0.2 | +| entropy_loss | -3.31 | +| explained_variance | 0.92 | +| learning_rate | 0.0003 | +| loss | -0.0394 | +| n_updates | 1800 | +| policy_gradient_loss | -0.00443 | +| std | 1.27 | +| value_loss | 0.00647 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1751 | +| iterations | 181 | +| time_elapsed | 1693 | +| total_timesteps | 2965504 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1754 | +| iterations | 182 | +| time_elapsed | 1699 | +| total_timesteps | 2981888 | +| train/ | | +| approx_kl | 0.008041672 | +| clip_fraction | 0.0795 | +| clip_range | 0.2 | +| entropy_loss | -3.3 | +| explained_variance | 0.939 | +| learning_rate | 0.0003 | +| loss | -0.0344 | +| n_updates | 1810 | +| policy_gradient_loss | -0.00456 | +| std | 1.27 | +| value_loss | 0.00404 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1758 | +| iterations | 183 | +| time_elapsed | 1704 | +| total_timesteps | 2998272 | +| train/ | | +| approx_kl | 0.0066829836 | +| clip_fraction | 0.0712 | +| clip_range | 0.2 | +| entropy_loss | -3.32 | +| explained_variance | 0.921 | +| learning_rate | 0.0003 | +| loss | -0.0361 | +| n_updates | 1820 | +| policy_gradient_loss | -0.00379 | +| std | 1.28 | +| value_loss | 0.00818 | +------------------------------------------ +Eval num_timesteps=3000000, episode_reward=33.06 +/- 47.57 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 33.1 | +| time/ | | +| total_timesteps | 3000000 | +| train/ | | +| approx_kl | 0.006152373 | +| clip_fraction | 0.0633 | +| clip_range | 0.2 | +| entropy_loss | -3.33 | +| explained_variance | 0.912 | +| learning_rate | 0.0003 | +| loss | -0.0316 | +| n_updates | 1830 | +| policy_gradient_loss | -0.00335 | +| std | 1.29 | +| value_loss | 0.00404 | +----------------------------------------- + +[Diag @ 3,000,000 | n_sheep=4 | success=0%] + NEVER_COMPACT 20/20 + action_mag mean=0.049 p10=0.005 p90=0.046 (0=stopped, 1=full speed) + min_flock_radius mean=8.21m best=5.29m (target <5m to compact) + min_dog_to_com mean=0.76m best=0.22m (FLEE_DIST=7m) + min_com_to_pen mean=12.62m best=4.77m + reward/step (mean): progress=+0.0089 alignment=+0.0386 pen_bonus=+0.0008 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1740 | +| iterations | 184 | +| time_elapsed | 1731 | +| total_timesteps | 3014656 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1745 | +| iterations | 185 | +| time_elapsed | 1736 | +| total_timesteps | 3031040 | +| train/ | | +| approx_kl | 0.006385569 | +| clip_fraction | 0.0703 | +| clip_range | 0.2 | +| entropy_loss | -3.34 | +| explained_variance | 0.919 | +| learning_rate | 0.0003 | +| loss | -0.0313 | +| n_updates | 1840 | +| policy_gradient_loss | -0.00274 | +| std | 1.3 | +| value_loss | 0.00503 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1748 | +| iterations | 186 | +| time_elapsed | 1743 | +| total_timesteps | 3047424 | +| train/ | | +| approx_kl | 0.007695101 | +| clip_fraction | 0.0784 | +| clip_range | 0.2 | +| entropy_loss | -3.36 | +| explained_variance | 0.935 | +| learning_rate | 0.0003 | +| loss | -0.0244 | +| n_updates | 1850 | +| policy_gradient_loss | -0.00342 | +| std | 1.31 | +| value_loss | 0.0051 | +----------------------------------------- +Eval num_timesteps=3050000, episode_reward=45.25 +/- 31.57 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 45.2 | +| time/ | | +| total_timesteps | 3050000 | +| train/ | | +| approx_kl | 0.0067556566 | +| clip_fraction | 0.082 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.868 | +| learning_rate | 0.0003 | +| loss | -0.0349 | +| n_updates | 1860 | +| policy_gradient_loss | -0.00353 | +| std | 1.31 | +| value_loss | 0.00931 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1738 | +| iterations | 187 | +| time_elapsed | 1762 | +| total_timesteps | 3063808 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1741 | +| iterations | 188 | +| time_elapsed | 1768 | +| total_timesteps | 3080192 | +| train/ | | +| approx_kl | 0.008263266 | +| clip_fraction | 0.0792 | +| clip_range | 0.2 | +| entropy_loss | -3.36 | +| explained_variance | 0.924 | +| learning_rate | 0.0003 | +| loss | -0.0411 | +| n_updates | 1870 | +| policy_gradient_loss | -0.00382 | +| std | 1.31 | +| value_loss | 0.00429 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1746 | +| iterations | 189 | +| time_elapsed | 1773 | +| total_timesteps | 3096576 | +| train/ | | +| approx_kl | 0.008488305 | +| clip_fraction | 0.08 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.925 | +| learning_rate | 0.0003 | +| loss | -0.0292 | +| n_updates | 1880 | +| policy_gradient_loss | -0.00441 | +| std | 1.31 | +| value_loss | 0.00748 | +----------------------------------------- +Eval num_timesteps=3100000, episode_reward=30.63 +/- 33.70 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 30.6 | +| time/ | | +| total_timesteps | 3100000 | +| train/ | | +| approx_kl | 0.0065515246 | +| clip_fraction | 0.0736 | +| clip_range | 0.2 | +| entropy_loss | -3.35 | +| explained_variance | 0.932 | +| learning_rate | 0.0003 | +| loss | 0.00192 | +| n_updates | 1890 | +| policy_gradient_loss | -0.00334 | +| std | 1.3 | +| value_loss | 0.00902 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1737 | +| iterations | 190 | +| time_elapsed | 1791 | +| total_timesteps | 3112960 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1741 | +| iterations | 191 | +| time_elapsed | 1796 | +| total_timesteps | 3129344 | +| train/ | | +| approx_kl | 0.0068135276 | +| clip_fraction | 0.0721 | +| clip_range | 0.2 | +| entropy_loss | -3.35 | +| explained_variance | 0.933 | +| learning_rate | 0.0003 | +| loss | -0.036 | +| n_updates | 1900 | +| policy_gradient_loss | -0.00403 | +| std | 1.29 | +| value_loss | 0.00616 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1731 | +| iterations | 192 | +| time_elapsed | 1817 | +| total_timesteps | 3145728 | +| train/ | | +| approx_kl | 0.0061126407 | +| clip_fraction | 0.0615 | +| clip_range | 0.2 | +| entropy_loss | -3.35 | +| explained_variance | 0.921 | +| learning_rate | 0.0003 | +| loss | -0.0355 | +| n_updates | 1910 | +| policy_gradient_loss | -0.00318 | +| std | 1.3 | +| value_loss | 0.0104 | +------------------------------------------ +Eval num_timesteps=3150000, episode_reward=33.88 +/- 34.31 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 33.9 | +| time/ | | +| total_timesteps | 3150000 | +| train/ | | +| approx_kl | 0.007734685 | +| clip_fraction | 0.0778 | +| clip_range | 0.2 | +| entropy_loss | -3.35 | +| explained_variance | 0.899 | +| learning_rate | 0.0003 | +| loss | -0.0323 | +| n_updates | 1920 | +| policy_gradient_loss | -0.00432 | +| std | 1.3 | +| value_loss | 0.0091 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1714 | +| iterations | 193 | +| time_elapsed | 1844 | +| total_timesteps | 3162112 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1717 | +| iterations | 194 | +| time_elapsed | 1850 | +| total_timesteps | 3178496 | +| train/ | | +| approx_kl | 0.007997783 | +| clip_fraction | 0.0782 | +| clip_range | 0.2 | +| entropy_loss | -3.35 | +| explained_variance | 0.91 | +| learning_rate | 0.0003 | +| loss | -0.0525 | +| n_updates | 1930 | +| policy_gradient_loss | -0.00523 | +| std | 1.3 | +| value_loss | 0.00283 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1720 | +| iterations | 195 | +| time_elapsed | 1857 | +| total_timesteps | 3194880 | +| train/ | | +| approx_kl | 0.007701534 | +| clip_fraction | 0.0712 | +| clip_range | 0.2 | +| entropy_loss | -3.34 | +| explained_variance | 0.927 | +| learning_rate | 0.0003 | +| loss | -0.0367 | +| n_updates | 1940 | +| policy_gradient_loss | -0.00288 | +| std | 1.3 | +| value_loss | 0.0126 | +----------------------------------------- +Eval num_timesteps=3200000, episode_reward=46.55 +/- 34.01 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 46.6 | +| time/ | | +| total_timesteps | 3200000 | +| train/ | | +| approx_kl | 0.006747664 | +| clip_fraction | 0.0766 | +| clip_range | 0.2 | +| entropy_loss | -3.35 | +| explained_variance | 0.93 | +| learning_rate | 0.0003 | +| loss | -0.0411 | +| n_updates | 1950 | +| policy_gradient_loss | -0.00404 | +| std | 1.3 | +| value_loss | 0.00409 | +----------------------------------------- + +[Diag @ 3,200,000 | n_sheep=4 | success=0%] + NEVER_COMPACT 20/20 + action_mag mean=0.078 p10=0.005 p90=0.057 (0=stopped, 1=full speed) + min_flock_radius mean=8.76m best=6.32m (target <5m to compact) + min_dog_to_com mean=0.81m best=0.36m (FLEE_DIST=7m) + min_com_to_pen mean=13.75m best=6.91m + reward/step (mean): progress=-0.0020 alignment=+0.0384 pen_bonus=+0.0003 step_cost=-0.0200 complete=+0.0000 + +[Curriculum] leaving stage n_sheep=4 after 800,000 steps | training success rate (last 100 eps) = 0% +[Curriculum] → 5 sheep at step 3,200,000 + +-------------------------------- +| time/ | | +| fps | 1704 | +| iterations | 196 | +| time_elapsed | 1884 | +| total_timesteps | 3211264 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1707 | +| iterations | 197 | +| time_elapsed | 1889 | +| total_timesteps | 3227648 | +| train/ | | +| approx_kl | 0.0068222135 | +| clip_fraction | 0.0816 | +| clip_range | 0.2 | +| entropy_loss | -3.36 | +| explained_variance | 0.922 | +| learning_rate | 0.0003 | +| loss | -0.0386 | +| n_updates | 1960 | +| policy_gradient_loss | -0.00374 | +| std | 1.31 | +| value_loss | 0.0112 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1711 | +| iterations | 198 | +| time_elapsed | 1895 | +| total_timesteps | 3244032 | +| train/ | | +| approx_kl | 0.006939999 | +| clip_fraction | 0.0829 | +| clip_range | 0.2 | +| entropy_loss | -3.36 | +| explained_variance | 0.955 | +| learning_rate | 0.0003 | +| loss | -0.0439 | +| n_updates | 1970 | +| policy_gradient_loss | -0.00433 | +| std | 1.31 | +| value_loss | 0.00895 | +----------------------------------------- +Eval num_timesteps=3250000, episode_reward=21.19 +/- 37.18 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 21.2 | +| time/ | | +| total_timesteps | 3250000 | +| train/ | | +| approx_kl | 0.007944042 | +| clip_fraction | 0.0812 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.925 | +| learning_rate | 0.0003 | +| loss | -0.0379 | +| n_updates | 1980 | +| policy_gradient_loss | -0.00306 | +| std | 1.31 | +| value_loss | 0.00578 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1702 | +| iterations | 199 | +| time_elapsed | 1914 | +| total_timesteps | 3260416 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1706 | +| iterations | 200 | +| time_elapsed | 1920 | +| total_timesteps | 3276800 | +| train/ | | +| approx_kl | 0.007009124 | +| clip_fraction | 0.0786 | +| clip_range | 0.2 | +| entropy_loss | -3.36 | +| explained_variance | 0.945 | +| learning_rate | 0.0003 | +| loss | -0.0398 | +| n_updates | 1990 | +| policy_gradient_loss | -0.00469 | +| std | 1.31 | +| value_loss | 0.00344 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1709 | +| iterations | 201 | +| time_elapsed | 1926 | +| total_timesteps | 3293184 | +| train/ | | +| approx_kl | 0.007446406 | +| clip_fraction | 0.0736 | +| clip_range | 0.2 | +| entropy_loss | -3.36 | +| explained_variance | 0.957 | +| learning_rate | 0.0003 | +| loss | -0.0493 | +| n_updates | 2000 | +| policy_gradient_loss | -0.00431 | +| std | 1.31 | +| value_loss | 0.00262 | +----------------------------------------- +Eval num_timesteps=3300000, episode_reward=18.42 +/- 36.17 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 18.4 | +| time/ | | +| total_timesteps | 3300000 | +| train/ | | +| approx_kl | 0.007855328 | +| clip_fraction | 0.0783 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.951 | +| learning_rate | 0.0003 | +| loss | -0.0381 | +| n_updates | 2010 | +| policy_gradient_loss | -0.00422 | +| std | 1.32 | +| value_loss | 0.00379 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1701 | +| iterations | 202 | +| time_elapsed | 1945 | +| total_timesteps | 3309568 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1704 | +| iterations | 203 | +| time_elapsed | 1951 | +| total_timesteps | 3325952 | +| train/ | | +| approx_kl | 0.0073990654 | +| clip_fraction | 0.0773 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.89 | +| learning_rate | 0.0003 | +| loss | -0.0319 | +| n_updates | 2020 | +| policy_gradient_loss | -0.00507 | +| std | 1.32 | +| value_loss | 0.0165 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1707 | +| iterations | 204 | +| time_elapsed | 1956 | +| total_timesteps | 3342336 | +| train/ | | +| approx_kl | 0.0076738494 | +| clip_fraction | 0.0913 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.914 | +| learning_rate | 0.0003 | +| loss | -0.0326 | +| n_updates | 2030 | +| policy_gradient_loss | -0.00611 | +| std | 1.32 | +| value_loss | 0.00854 | +------------------------------------------ +Eval num_timesteps=3350000, episode_reward=39.75 +/- 38.09 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 39.8 | +| time/ | | +| total_timesteps | 3350000 | +| train/ | | +| approx_kl | 0.007704767 | +| clip_fraction | 0.0813 | +| clip_range | 0.2 | +| entropy_loss | -3.39 | +| explained_variance | 0.822 | +| learning_rate | 0.0003 | +| loss | -0.0351 | +| n_updates | 2040 | +| policy_gradient_loss | -0.0056 | +| std | 1.33 | +| value_loss | 0.0095 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1700 | +| iterations | 205 | +| time_elapsed | 1974 | +| total_timesteps | 3358720 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1703 | +| iterations | 206 | +| time_elapsed | 1980 | +| total_timesteps | 3375104 | +| train/ | | +| approx_kl | 0.006841295 | +| clip_fraction | 0.0682 | +| clip_range | 0.2 | +| entropy_loss | -3.39 | +| explained_variance | 0.973 | +| learning_rate | 0.0003 | +| loss | -0.04 | +| n_updates | 2050 | +| policy_gradient_loss | -0.00457 | +| std | 1.33 | +| value_loss | 0.00456 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1707 | +| iterations | 207 | +| time_elapsed | 1986 | +| total_timesteps | 3391488 | +| train/ | | +| approx_kl | 0.0063885115 | +| clip_fraction | 0.0749 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.962 | +| learning_rate | 0.0003 | +| loss | -0.041 | +| n_updates | 2060 | +| policy_gradient_loss | -0.00455 | +| std | 1.34 | +| value_loss | 0.00373 | +------------------------------------------ +Eval num_timesteps=3400000, episode_reward=26.62 +/- 43.12 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 26.6 | +| time/ | | +| total_timesteps | 3400000 | +| train/ | | +| approx_kl | 0.006273965 | +| clip_fraction | 0.0709 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.956 | +| learning_rate | 0.0003 | +| loss | -0.0465 | +| n_updates | 2070 | +| policy_gradient_loss | -0.00249 | +| std | 1.33 | +| value_loss | 0.00679 | +----------------------------------------- + +[Diag @ 3,400,000 | n_sheep=5 | success=0%] + NEVER_COMPACT 20/20 + action_mag mean=0.089 p10=0.005 p90=0.074 (0=stopped, 1=full speed) + min_flock_radius mean=9.14m best=5.59m (target <5m to compact) + min_dog_to_com mean=0.69m best=0.10m (FLEE_DIST=7m) + min_com_to_pen mean=12.77m best=5.15m + reward/step (mean): progress=-0.0015 alignment=+0.0368 pen_bonus=+0.0020 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1691 | +| iterations | 208 | +| time_elapsed | 2014 | +| total_timesteps | 3407872 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1695 | +| iterations | 209 | +| time_elapsed | 2019 | +| total_timesteps | 3424256 | +| train/ | | +| approx_kl | 0.006433293 | +| clip_fraction | 0.0727 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.932 | +| learning_rate | 0.0003 | +| loss | -0.0268 | +| n_updates | 2080 | +| policy_gradient_loss | -0.00365 | +| std | 1.33 | +| value_loss | 0.00657 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1698 | +| iterations | 210 | +| time_elapsed | 2025 | +| total_timesteps | 3440640 | +| train/ | | +| approx_kl | 0.007235542 | +| clip_fraction | 0.0839 | +| clip_range | 0.2 | +| entropy_loss | -3.39 | +| explained_variance | 0.935 | +| learning_rate | 0.0003 | +| loss | -0.0344 | +| n_updates | 2090 | +| policy_gradient_loss | -0.00417 | +| std | 1.32 | +| value_loss | 0.0137 | +----------------------------------------- +Eval num_timesteps=3450000, episode_reward=35.54 +/- 43.01 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 35.5 | +| time/ | | +| total_timesteps | 3450000 | +| train/ | | +| approx_kl | 0.007782845 | +| clip_fraction | 0.0859 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.924 | +| learning_rate | 0.0003 | +| loss | -0.044 | +| n_updates | 2100 | +| policy_gradient_loss | -0.00561 | +| std | 1.34 | +| value_loss | 0.0043 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1690 | +| iterations | 211 | +| time_elapsed | 2044 | +| total_timesteps | 3457024 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1693 | +| iterations | 212 | +| time_elapsed | 2050 | +| total_timesteps | 3473408 | +| train/ | | +| approx_kl | 0.0075765867 | +| clip_fraction | 0.0746 | +| clip_range | 0.2 | +| entropy_loss | -3.41 | +| explained_variance | 0.896 | +| learning_rate | 0.0003 | +| loss | -0.0293 | +| n_updates | 2110 | +| policy_gradient_loss | -0.00406 | +| std | 1.33 | +| value_loss | 0.011 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1696 | +| iterations | 213 | +| time_elapsed | 2056 | +| total_timesteps | 3489792 | +| train/ | | +| approx_kl | 0.0072322125 | +| clip_fraction | 0.071 | +| clip_range | 0.2 | +| entropy_loss | -3.41 | +| explained_variance | 0.949 | +| learning_rate | 0.0003 | +| loss | -0.0498 | +| n_updates | 2120 | +| policy_gradient_loss | -0.00421 | +| std | 1.34 | +| value_loss | 0.006 | +------------------------------------------ +Eval num_timesteps=3500000, episode_reward=54.69 +/- 47.39 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 54.7 | +| time/ | | +| total_timesteps | 3500000 | +| train/ | | +| approx_kl | 0.0073479656 | +| clip_fraction | 0.0778 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.824 | +| learning_rate | 0.0003 | +| loss | -0.0408 | +| n_updates | 2130 | +| policy_gradient_loss | -0.00465 | +| std | 1.32 | +| value_loss | 0.00657 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1688 | +| iterations | 214 | +| time_elapsed | 2076 | +| total_timesteps | 3506176 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1692 | +| iterations | 215 | +| time_elapsed | 2081 | +| total_timesteps | 3522560 | +| train/ | | +| approx_kl | 0.007274649 | +| clip_fraction | 0.0798 | +| clip_range | 0.2 | +| entropy_loss | -3.39 | +| explained_variance | 0.951 | +| learning_rate | 0.0003 | +| loss | -0.0356 | +| n_updates | 2140 | +| policy_gradient_loss | -0.00383 | +| std | 1.33 | +| value_loss | 0.00355 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1695 | +| iterations | 216 | +| time_elapsed | 2087 | +| total_timesteps | 3538944 | +| train/ | | +| approx_kl | 0.0068056686 | +| clip_fraction | 0.0726 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.955 | +| learning_rate | 0.0003 | +| loss | -0.0428 | +| n_updates | 2150 | +| policy_gradient_loss | -0.00356 | +| std | 1.32 | +| value_loss | 0.00378 | +------------------------------------------ +Eval num_timesteps=3550000, episode_reward=8.69 +/- 39.03 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 8.69 | +| time/ | | +| total_timesteps | 3550000 | +| train/ | | +| approx_kl | 0.008211401 | +| clip_fraction | 0.0801 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.972 | +| learning_rate | 0.0003 | +| loss | -0.0366 | +| n_updates | 2160 | +| policy_gradient_loss | -0.00453 | +| std | 1.32 | +| value_loss | 0.00445 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1687 | +| iterations | 217 | +| time_elapsed | 2106 | +| total_timesteps | 3555328 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1690 | +| iterations | 218 | +| time_elapsed | 2112 | +| total_timesteps | 3571712 | +| train/ | | +| approx_kl | 0.008278061 | +| clip_fraction | 0.0871 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.931 | +| learning_rate | 0.0003 | +| loss | -0.0324 | +| n_updates | 2170 | +| policy_gradient_loss | -0.00486 | +| std | 1.32 | +| value_loss | 0.00377 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1693 | +| iterations | 219 | +| time_elapsed | 2119 | +| total_timesteps | 3588096 | +| train/ | | +| approx_kl | 0.007908824 | +| clip_fraction | 0.0777 | +| clip_range | 0.2 | +| entropy_loss | -3.39 | +| explained_variance | 0.951 | +| learning_rate | 0.0003 | +| loss | -0.0353 | +| n_updates | 2180 | +| policy_gradient_loss | -0.00318 | +| std | 1.32 | +| value_loss | 0.00768 | +----------------------------------------- +Eval num_timesteps=3600000, episode_reward=26.00 +/- 35.20 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 26 | +| time/ | | +| total_timesteps | 3600000 | +| train/ | | +| approx_kl | 0.0068260087 | +| clip_fraction | 0.0761 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.946 | +| learning_rate | 0.0003 | +| loss | -0.0257 | +| n_updates | 2190 | +| policy_gradient_loss | -0.00375 | +| std | 1.32 | +| value_loss | 0.00745 | +------------------------------------------ + +[Diag @ 3,600,000 | n_sheep=5 | success=0%] + NEVER_COMPACT 20/20 + action_mag mean=0.114 p10=0.006 p90=0.281 (0=stopped, 1=full speed) + min_flock_radius mean=9.62m best=5.04m (target <5m to compact) + min_dog_to_com mean=0.77m best=0.40m (FLEE_DIST=7m) + min_com_to_pen mean=13.31m best=6.37m + reward/step (mean): progress=+0.0071 alignment=+0.0385 pen_bonus=+0.0008 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1677 | +| iterations | 220 | +| time_elapsed | 2148 | +| total_timesteps | 3604480 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1680 | +| iterations | 221 | +| time_elapsed | 2154 | +| total_timesteps | 3620864 | +| train/ | | +| approx_kl | 0.0084966235 | +| clip_fraction | 0.0849 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.936 | +| learning_rate | 0.0003 | +| loss | -0.0498 | +| n_updates | 2200 | +| policy_gradient_loss | -0.00478 | +| std | 1.32 | +| value_loss | 0.00856 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1683 | +| iterations | 222 | +| time_elapsed | 2160 | +| total_timesteps | 3637248 | +| train/ | | +| approx_kl | 0.007236682 | +| clip_fraction | 0.072 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.956 | +| learning_rate | 0.0003 | +| loss | -0.0436 | +| n_updates | 2210 | +| policy_gradient_loss | -0.0054 | +| std | 1.31 | +| value_loss | 0.00748 | +----------------------------------------- +Eval num_timesteps=3650000, episode_reward=48.26 +/- 45.24 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 48.3 | +| time/ | | +| total_timesteps | 3650000 | +| train/ | | +| approx_kl | 0.0076099336 | +| clip_fraction | 0.0694 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.942 | +| learning_rate | 0.0003 | +| loss | -0.037 | +| n_updates | 2220 | +| policy_gradient_loss | -0.00369 | +| std | 1.31 | +| value_loss | 0.00888 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1676 | +| iterations | 223 | +| time_elapsed | 2179 | +| total_timesteps | 3653632 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1679 | +| iterations | 224 | +| time_elapsed | 2185 | +| total_timesteps | 3670016 | +| train/ | | +| approx_kl | 0.007888832 | +| clip_fraction | 0.0783 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.914 | +| learning_rate | 0.0003 | +| loss | -0.0298 | +| n_updates | 2230 | +| policy_gradient_loss | -0.00449 | +| std | 1.32 | +| value_loss | 0.00867 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1682 | +| iterations | 225 | +| time_elapsed | 2190 | +| total_timesteps | 3686400 | +| train/ | | +| approx_kl | 0.0069514583 | +| clip_fraction | 0.0791 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.946 | +| learning_rate | 0.0003 | +| loss | -0.0283 | +| n_updates | 2240 | +| policy_gradient_loss | -0.00427 | +| std | 1.32 | +| value_loss | 0.00382 | +------------------------------------------ +Eval num_timesteps=3700000, episode_reward=19.29 +/- 50.45 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 19.3 | +| time/ | | +| total_timesteps | 3700000 | +| train/ | | +| approx_kl | 0.008142319 | +| clip_fraction | 0.0865 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.92 | +| learning_rate | 0.0003 | +| loss | -0.0467 | +| n_updates | 2250 | +| policy_gradient_loss | -0.00506 | +| std | 1.31 | +| value_loss | 0.00547 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1674 | +| iterations | 226 | +| time_elapsed | 2210 | +| total_timesteps | 3702784 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1677 | +| iterations | 227 | +| time_elapsed | 2216 | +| total_timesteps | 3719168 | +| train/ | | +| approx_kl | 0.0077144434 | +| clip_fraction | 0.0783 | +| clip_range | 0.2 | +| entropy_loss | -3.36 | +| explained_variance | 0.931 | +| learning_rate | 0.0003 | +| loss | -0.0331 | +| n_updates | 2260 | +| policy_gradient_loss | -0.00529 | +| std | 1.31 | +| value_loss | 0.00486 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1667 | +| iterations | 228 | +| time_elapsed | 2239 | +| total_timesteps | 3735552 | +| train/ | | +| approx_kl | 0.007820845 | +| clip_fraction | 0.087 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.95 | +| learning_rate | 0.0003 | +| loss | -0.0321 | +| n_updates | 2270 | +| policy_gradient_loss | -0.00493 | +| std | 1.31 | +| value_loss | 0.00531 | +----------------------------------------- +Eval num_timesteps=3750000, episode_reward=35.91 +/- 47.57 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 35.9 | +| time/ | | +| total_timesteps | 3750000 | +| train/ | | +| approx_kl | 0.008380983 | +| clip_fraction | 0.0868 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.927 | +| learning_rate | 0.0003 | +| loss | -0.0318 | +| n_updates | 2280 | +| policy_gradient_loss | -0.0046 | +| std | 1.32 | +| value_loss | 0.00684 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1639 | +| iterations | 229 | +| time_elapsed | 2289 | +| total_timesteps | 3751936 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1642 | +| iterations | 230 | +| time_elapsed | 2294 | +| total_timesteps | 3768320 | +| train/ | | +| approx_kl | 0.007415652 | +| clip_fraction | 0.0758 | +| clip_range | 0.2 | +| entropy_loss | -3.37 | +| explained_variance | 0.953 | +| learning_rate | 0.0003 | +| loss | -0.0354 | +| n_updates | 2290 | +| policy_gradient_loss | -0.00557 | +| std | 1.31 | +| value_loss | 0.0122 | +----------------------------------------- +------------------------------------------ +| time/ | | +| fps | 1646 | +| iterations | 231 | +| time_elapsed | 2299 | +| total_timesteps | 3784704 | +| train/ | | +| approx_kl | 0.0071868873 | +| clip_fraction | 0.0736 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.954 | +| learning_rate | 0.0003 | +| loss | -0.0457 | +| n_updates | 2300 | +| policy_gradient_loss | -0.00442 | +| std | 1.33 | +| value_loss | 0.0201 | +------------------------------------------ +Eval num_timesteps=3800000, episode_reward=31.58 +/- 50.62 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 31.6 | +| time/ | | +| total_timesteps | 3800000 | +| train/ | | +| approx_kl | 0.0074889637 | +| clip_fraction | 0.0805 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.95 | +| learning_rate | 0.0003 | +| loss | -0.0355 | +| n_updates | 2310 | +| policy_gradient_loss | -0.00474 | +| std | 1.33 | +| value_loss | 0.00892 | +------------------------------------------ + +[Diag @ 3,800,000 | n_sheep=5 | success=0%] + NEVER_COMPACT 19/20 + COMPACT_CANT_DRIVE 1/20 + action_mag mean=0.128 p10=0.005 p90=0.475 (0=stopped, 1=full speed) + min_flock_radius mean=8.35m best=4.80m (target <5m to compact) + min_dog_to_com mean=0.71m best=0.23m (FLEE_DIST=7m) + min_com_to_pen mean=13.72m best=8.54m + reward/step (mean): progress=+0.0063 alignment=+0.0388 pen_bonus=+0.0010 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1633 | +| iterations | 232 | +| time_elapsed | 2326 | +| total_timesteps | 3801088 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1636 | +| iterations | 233 | +| time_elapsed | 2332 | +| total_timesteps | 3817472 | +| train/ | | +| approx_kl | 0.0070604184 | +| clip_fraction | 0.0765 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.953 | +| learning_rate | 0.0003 | +| loss | -0.0398 | +| n_updates | 2320 | +| policy_gradient_loss | -0.00453 | +| std | 1.33 | +| value_loss | 0.00675 | +------------------------------------------ +----------------------------------------- +| time/ | | +| fps | 1640 | +| iterations | 234 | +| time_elapsed | 2336 | +| total_timesteps | 3833856 | +| train/ | | +| approx_kl | 0.007709453 | +| clip_fraction | 0.0816 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.943 | +| learning_rate | 0.0003 | +| loss | -0.0359 | +| n_updates | 2330 | +| policy_gradient_loss | -0.00423 | +| std | 1.34 | +| value_loss | 0.00754 | +----------------------------------------- +Eval num_timesteps=3850000, episode_reward=42.98 +/- 33.36 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 43 | +| time/ | | +| total_timesteps | 3850000 | +| train/ | | +| approx_kl | 0.007679659 | +| clip_fraction | 0.0858 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.961 | +| learning_rate | 0.0003 | +| loss | -0.032 | +| n_updates | 2340 | +| policy_gradient_loss | -0.00716 | +| std | 1.33 | +| value_loss | 0.00907 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1635 | +| iterations | 235 | +| time_elapsed | 2354 | +| total_timesteps | 3850240 | +-------------------------------- +------------------------------------------ +| time/ | | +| fps | 1638 | +| iterations | 236 | +| time_elapsed | 2360 | +| total_timesteps | 3866624 | +| train/ | | +| approx_kl | 0.0077598644 | +| clip_fraction | 0.0848 | +| clip_range | 0.2 | +| entropy_loss | -3.38 | +| explained_variance | 0.96 | +| learning_rate | 0.0003 | +| loss | -0.0468 | +| n_updates | 2350 | +| policy_gradient_loss | -0.005 | +| std | 1.33 | +| value_loss | 0.0101 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1641 | +| iterations | 237 | +| time_elapsed | 2366 | +| total_timesteps | 3883008 | +| train/ | | +| approx_kl | 0.0068941545 | +| clip_fraction | 0.0673 | +| clip_range | 0.2 | +| entropy_loss | -3.39 | +| explained_variance | 0.96 | +| learning_rate | 0.0003 | +| loss | -0.0398 | +| n_updates | 2360 | +| policy_gradient_loss | -0.0047 | +| std | 1.33 | +| value_loss | 0.0113 | +------------------------------------------ +------------------------------------------ +| time/ | | +| fps | 1643 | +| iterations | 238 | +| time_elapsed | 2372 | +| total_timesteps | 3899392 | +| train/ | | +| approx_kl | 0.0073663425 | +| clip_fraction | 0.0785 | +| clip_range | 0.2 | +| entropy_loss | -3.41 | +| explained_variance | 0.963 | +| learning_rate | 0.0003 | +| loss | -0.0319 | +| n_updates | 2370 | +| policy_gradient_loss | -0.00458 | +| std | 1.35 | +| value_loss | 0.0036 | +------------------------------------------ +Eval num_timesteps=3900000, episode_reward=33.74 +/- 40.96 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 33.7 | +| time/ | | +| total_timesteps | 3900000 | +| train/ | | +| approx_kl | 0.007122398 | +| clip_fraction | 0.0759 | +| clip_range | 0.2 | +| entropy_loss | -3.41 | +| explained_variance | 0.972 | +| learning_rate | 0.0003 | +| loss | -0.0383 | +| n_updates | 2380 | +| policy_gradient_loss | -0.00446 | +| std | 1.35 | +| value_loss | 0.00445 | +----------------------------------------- +-------------------------------- +| time/ | | +| fps | 1637 | +| iterations | 239 | +| time_elapsed | 2391 | +| total_timesteps | 3915776 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1640 | +| iterations | 240 | +| time_elapsed | 2396 | +| total_timesteps | 3932160 | +| train/ | | +| approx_kl | 0.008265208 | +| clip_fraction | 0.0845 | +| clip_range | 0.2 | +| entropy_loss | -3.41 | +| explained_variance | 0.926 | +| learning_rate | 0.0003 | +| loss | -0.0361 | +| n_updates | 2390 | +| policy_gradient_loss | -0.00536 | +| std | 1.34 | +| value_loss | 0.00846 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1643 | +| iterations | 241 | +| time_elapsed | 2402 | +| total_timesteps | 3948544 | +| train/ | | +| approx_kl | 0.008583728 | +| clip_fraction | 0.0893 | +| clip_range | 0.2 | +| entropy_loss | -3.42 | +| explained_variance | 0.915 | +| learning_rate | 0.0003 | +| loss | -0.0297 | +| n_updates | 2400 | +| policy_gradient_loss | -0.00592 | +| std | 1.35 | +| value_loss | 0.0068 | +----------------------------------------- +Eval num_timesteps=3950000, episode_reward=46.06 +/- 34.67 +Episode length: 2000.00 +/- 0.00 +------------------------------------------ +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 46.1 | +| time/ | | +| total_timesteps | 3950000 | +| train/ | | +| approx_kl | 0.0060660206 | +| clip_fraction | 0.0654 | +| clip_range | 0.2 | +| entropy_loss | -3.42 | +| explained_variance | 0.942 | +| learning_rate | 0.0003 | +| loss | -0.0359 | +| n_updates | 2410 | +| policy_gradient_loss | -0.0038 | +| std | 1.35 | +| value_loss | 0.00296 | +------------------------------------------ +-------------------------------- +| time/ | | +| fps | 1637 | +| iterations | 242 | +| time_elapsed | 2421 | +| total_timesteps | 3964928 | +-------------------------------- +----------------------------------------- +| time/ | | +| fps | 1639 | +| iterations | 243 | +| time_elapsed | 2427 | +| total_timesteps | 3981312 | +| train/ | | +| approx_kl | 0.007591601 | +| clip_fraction | 0.0808 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.964 | +| learning_rate | 0.0003 | +| loss | -0.0386 | +| n_updates | 2420 | +| policy_gradient_loss | -0.00575 | +| std | 1.34 | +| value_loss | 0.00714 | +----------------------------------------- +----------------------------------------- +| time/ | | +| fps | 1642 | +| iterations | 244 | +| time_elapsed | 2433 | +| total_timesteps | 3997696 | +| train/ | | +| approx_kl | 0.006255053 | +| clip_fraction | 0.0663 | +| clip_range | 0.2 | +| entropy_loss | -3.41 | +| explained_variance | 0.939 | +| learning_rate | 0.0003 | +| loss | -0.0304 | +| n_updates | 2430 | +| policy_gradient_loss | -0.00497 | +| std | 1.35 | +| value_loss | 0.00585 | +----------------------------------------- +Eval num_timesteps=4000000, episode_reward=19.52 +/- 38.43 +Episode length: 2000.00 +/- 0.00 +----------------------------------------- +| eval/ | | +| mean_ep_length | 2e+03 | +| mean_reward | 19.5 | +| time/ | | +| total_timesteps | 4000000 | +| train/ | | +| approx_kl | 0.008279499 | +| clip_fraction | 0.0814 | +| clip_range | 0.2 | +| entropy_loss | -3.4 | +| explained_variance | 0.958 | +| learning_rate | 0.0003 | +| loss | -0.0379 | +| n_updates | 2440 | +| policy_gradient_loss | -0.00568 | +| std | 1.34 | +| value_loss | 0.00469 | +----------------------------------------- + +[Diag @ 4,000,000 | n_sheep=5 | success=0%] + NEVER_COMPACT 20/20 + action_mag mean=0.158 p10=0.006 p90=0.744 (0=stopped, 1=full speed) + min_flock_radius mean=8.94m best=6.34m (target <5m to compact) + min_dog_to_com mean=0.82m best=0.49m (FLEE_DIST=7m) + min_com_to_pen mean=13.86m best=7.80m + reward/step (mean): progress=+0.0029 alignment=+0.0397 pen_bonus=+0.0003 step_cost=-0.0200 complete=+0.0000 +-------------------------------- +| time/ | | +| fps | 1630 | +| iterations | 245 | +| time_elapsed | 2461 | +| total_timesteps | 4014080 | +-------------------------------- + +Training complete. Artefacts saved to runs/ppo_debug/ diff --git a/training/runs/ppo_debug/best_model/best_model.zip b/training/runs/ppo_debug/best_model/best_model.zip new file mode 100644 index 0000000..2618c2c Binary files /dev/null and b/training/runs/ppo_debug/best_model/best_model.zip differ diff --git a/training/runs/ppo_debug/evaluations.npz b/training/runs/ppo_debug/evaluations.npz new file mode 100644 index 0000000..84fd19d Binary files /dev/null and b/training/runs/ppo_debug/evaluations.npz differ diff --git a/training/runs/ppo_debug/final_model.zip b/training/runs/ppo_debug/final_model.zip new file mode 100644 index 0000000..e3be97e Binary files /dev/null and b/training/runs/ppo_debug/final_model.zip differ diff --git a/training/runs/ppo_debug/vecnorm.pkl b/training/runs/ppo_debug/vecnorm.pkl new file mode 100644 index 0000000..c17b706 Binary files /dev/null and b/training/runs/ppo_debug/vecnorm.pkl differ diff --git a/training/runs/ppo_v3/best_model/best_model.zip b/training/runs/ppo_v3/best_model/best_model.zip new file mode 100644 index 0000000..82d0259 Binary files /dev/null and b/training/runs/ppo_v3/best_model/best_model.zip differ diff --git a/training/runs/ppo_v3/evaluations.npz b/training/runs/ppo_v3/evaluations.npz new file mode 100644 index 0000000..1d5ee82 Binary files /dev/null and b/training/runs/ppo_v3/evaluations.npz differ diff --git a/training/runs/ppo_v3/final_model.zip b/training/runs/ppo_v3/final_model.zip new file mode 100644 index 0000000..ce84843 Binary files /dev/null and b/training/runs/ppo_v3/final_model.zip differ diff --git a/training/runs/ppo_v3/vecnorm.pkl b/training/runs/ppo_v3/vecnorm.pkl new file mode 100644 index 0000000..4729c11 Binary files /dev/null and b/training/runs/ppo_v3/vecnorm.pkl differ