diff --git a/training/runs/ppo_debug.log b/training/runs/ppo_debug.log
new file mode 100644
index 0000000..81b308e
--- /dev/null
+++ b/training/runs/ppo_debug.log
@@ -0,0 +1,5569 @@
+Using cpu device
+Logging to runs/ppo_debug/ppo_1
+------------------------------
+| time/              |       |
+|    fps             | 5496  |
+|    iterations      | 1     |
+|    time_elapsed    | 2     |
+|    total_timesteps | 16384 |
+------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 4317         |
+|    iterations           | 2            |
+|    time_elapsed         | 7            |
+|    total_timesteps      | 32768        |
+| train/                  |              |
+|    approx_kl            | 0.0036917897 |
+|    clip_fraction        | 0.0212       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.352        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0118      |
+|    n_updates            | 10           |
+|    policy_gradient_loss | -0.000544    |
+|    std                  | 0.999        |
+|    value_loss           | 0.0658       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 3946         |
+|    iterations           | 3            |
+|    time_elapsed         | 12           |
+|    total_timesteps      | 49152        |
+| train/                  |              |
+|    approx_kl            | 0.0033213054 |
+|    clip_fraction        | 0.0266       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.502        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0255      |
+|    n_updates            | 20           |
+|    policy_gradient_loss | -0.00158     |
+|    std                  | 0.997        |
+|    value_loss           | 0.08         |
+------------------------------------------
+/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.
+  warnings.warn(
+Eval num_timesteps=50000, episode_reward=-32.92 +/- 15.12
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -32.9       |
+| time/                   |             |
+|    total_timesteps      | 50000       |
+| train/                  |             |
+|    approx_kl            | 0.005147726 |
+|    clip_fraction        | 0.0478      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.84       |
+|    explained_variance   | 0.893       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0145     |
+|    n_updates            | 30          |
+|    policy_gradient_loss | -0.00318    |
+|    std                  | 1           |
+|    value_loss           | 0.0194      |
+-----------------------------------------
+New best mean reward!
+------------------------------
+| time/              |       |
+|    fps             | 2231  |
+|    iterations      | 4     |
+|    time_elapsed    | 29    |
+|    total_timesteps | 65536 |
+------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2444         |
+|    iterations           | 5            |
+|    time_elapsed         | 33           |
+|    total_timesteps      | 81920        |
+| train/                  |              |
+|    approx_kl            | 0.0054671075 |
+|    clip_fraction        | 0.0529       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.84        |
+|    explained_variance   | 0.914        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.021       |
+|    n_updates            | 40           |
+|    policy_gradient_loss | -0.00416     |
+|    std                  | 1            |
+|    value_loss           | 0.0247       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2616        |
+|    iterations           | 6           |
+|    time_elapsed         | 37          |
+|    total_timesteps      | 98304       |
+| train/                  |             |
+|    approx_kl            | 0.004603466 |
+|    clip_fraction        | 0.0379      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.83       |
+|    explained_variance   | 0.931       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0193     |
+|    n_updates            | 50          |
+|    policy_gradient_loss | -0.00284    |
+|    std                  | 0.995       |
+|    value_loss           | 0.0171      |
+-----------------------------------------
+/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.
+  warnings.warn(
+Eval num_timesteps=100000, episode_reward=-27.45 +/- 49.10
+Episode length: 1973.15 +/- 86.14
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.97e+03     |
+|    mean_reward          | -27.4        |
+| time/                   |              |
+|    total_timesteps      | 100000       |
+| train/                  |              |
+|    approx_kl            | 0.0053039393 |
+|    clip_fraction        | 0.0564       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.878        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0325      |
+|    n_updates            | 60           |
+|    policy_gradient_loss | -0.00404     |
+|    std                  | 0.998        |
+|    value_loss           | 0.0118       |
+------------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 2212   |
+|    iterations      | 7      |
+|    time_elapsed    | 51     |
+|    total_timesteps | 114688 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2332         |
+|    iterations           | 8            |
+|    time_elapsed         | 56           |
+|    total_timesteps      | 131072       |
+| train/                  |              |
+|    approx_kl            | 0.0048020086 |
+|    clip_fraction        | 0.0449       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.84        |
+|    explained_variance   | 0.839        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0375      |
+|    n_updates            | 70           |
+|    policy_gradient_loss | -0.00359     |
+|    std                  | 1            |
+|    value_loss           | 0.0102       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2429        |
+|    iterations           | 9           |
+|    time_elapsed         | 60          |
+|    total_timesteps      | 147456      |
+| train/                  |             |
+|    approx_kl            | 0.004460754 |
+|    clip_fraction        | 0.0349      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.874       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0293     |
+|    n_updates            | 80          |
+|    policy_gradient_loss | -0.00294    |
+|    std                  | 1.01        |
+|    value_loss           | 0.0132      |
+-----------------------------------------
+Eval num_timesteps=150000, episode_reward=-33.46 +/- 39.53
+Episode length: 1990.60 +/- 40.97
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.99e+03    |
+|    mean_reward          | -33.5       |
+| time/                   |             |
+|    total_timesteps      | 150000      |
+| train/                  |             |
+|    approx_kl            | 0.003831089 |
+|    clip_fraction        | 0.0196      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.82       |
+|    explained_variance   | 0.381       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0191     |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.00202    |
+|    std                  | 0.984       |
+|    value_loss           | 0.104       |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2147   |
+|    iterations      | 10     |
+|    time_elapsed    | 76     |
+|    total_timesteps | 163840 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2218         |
+|    iterations           | 11           |
+|    time_elapsed         | 81           |
+|    total_timesteps      | 180224       |
+| train/                  |              |
+|    approx_kl            | 0.0032510734 |
+|    clip_fraction        | 0.0246       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.82        |
+|    explained_variance   | 0.887        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0279      |
+|    n_updates            | 100          |
+|    policy_gradient_loss | -0.00207     |
+|    std                  | 0.993        |
+|    value_loss           | 0.045        |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2289         |
+|    iterations           | 12           |
+|    time_elapsed         | 85           |
+|    total_timesteps      | 196608       |
+| train/                  |              |
+|    approx_kl            | 0.0047060847 |
+|    clip_fraction        | 0.0387       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.896        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.00931      |
+|    n_updates            | 110          |
+|    policy_gradient_loss | -0.00305     |
+|    std                  | 0.994        |
+|    value_loss           | 0.0489       |
+------------------------------------------
+Eval num_timesteps=200000, episode_reward=-18.47 +/- 55.53
+Episode length: 1938.95 +/- 147.97
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.94e+03     |
+|    mean_reward          | -18.5        |
+| time/                   |              |
+|    total_timesteps      | 200000       |
+| train/                  |              |
+|    approx_kl            | 0.0047602034 |
+|    clip_fraction        | 0.0421       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.968        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0301      |
+|    n_updates            | 120          |
+|    policy_gradient_loss | -0.00281     |
+|    std                  | 1.01         |
+|    value_loss           | 0.0094       |
+------------------------------------------
+New best mean reward!
+
+[Diag @ 200,000 | n_sheep=1 | success=5%]
+  COMPACT_CANT_DRIVE         18/20
+  DROVE_NO_SHEEP             1/20
+  SUCCESS                    1/20
+  action_mag mean=0.269 p10=0.129 p90=0.447 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=3.86m best=1.91m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=11.22m best=2.44m
+  reward/step (mean): progress=-0.0022  alignment=+0.0006  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0026
+-------------------------------
+| time/              |        |
+|    fps             | 1964   |
+|    iterations      | 13     |
+|    time_elapsed    | 108    |
+|    total_timesteps | 212992 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2034         |
+|    iterations           | 14           |
+|    time_elapsed         | 112          |
+|    total_timesteps      | 229376       |
+| train/                  |              |
+|    approx_kl            | 0.0041663316 |
+|    clip_fraction        | 0.0373       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.88        |
+|    explained_variance   | 0.901        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0251      |
+|    n_updates            | 130          |
+|    policy_gradient_loss | -0.00223     |
+|    std                  | 1.03         |
+|    value_loss           | 0.00752      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2102         |
+|    iterations           | 15           |
+|    time_elapsed         | 116          |
+|    total_timesteps      | 245760       |
+| train/                  |              |
+|    approx_kl            | 0.0042076977 |
+|    clip_fraction        | 0.032        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.91        |
+|    explained_variance   | 0.939        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0333      |
+|    n_updates            | 140          |
+|    policy_gradient_loss | -0.00281     |
+|    std                  | 1.04         |
+|    value_loss           | 0.00934      |
+------------------------------------------
+Eval num_timesteps=250000, episode_reward=-37.07 +/- 35.02
+Episode length: 1938.20 +/- 269.38
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.94e+03     |
+|    mean_reward          | -37.1        |
+| time/                   |              |
+|    total_timesteps      | 250000       |
+| train/                  |              |
+|    approx_kl            | 0.0028561926 |
+|    clip_fraction        | 0.0171       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.92        |
+|    explained_variance   | 0.822        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0292      |
+|    n_updates            | 150          |
+|    policy_gradient_loss | -0.00113     |
+|    std                  | 1.04         |
+|    value_loss           | 0.0473       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1990   |
+|    iterations      | 16     |
+|    time_elapsed    | 131    |
+|    total_timesteps | 262144 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2042         |
+|    iterations           | 17           |
+|    time_elapsed         | 136          |
+|    total_timesteps      | 278528       |
+| train/                  |              |
+|    approx_kl            | 0.0054259067 |
+|    clip_fraction        | 0.0468       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.91        |
+|    explained_variance   | 0.891        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.032       |
+|    n_updates            | 160          |
+|    policy_gradient_loss | -0.00597     |
+|    std                  | 1.03         |
+|    value_loss           | 0.0128       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2085        |
+|    iterations           | 18          |
+|    time_elapsed         | 141         |
+|    total_timesteps      | 294912      |
+| train/                  |             |
+|    approx_kl            | 0.004205579 |
+|    clip_fraction        | 0.0291      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.91       |
+|    explained_variance   | 0.834       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0364     |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.00307    |
+|    std                  | 1.03        |
+|    value_loss           | 0.0107      |
+-----------------------------------------
+Eval num_timesteps=300000, episode_reward=-25.41 +/- 48.70
+Episode length: 1886.45 +/- 435.99
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.89e+03     |
+|    mean_reward          | -25.4        |
+| time/                   |              |
+|    total_timesteps      | 300000       |
+| train/                  |              |
+|    approx_kl            | 0.0045948992 |
+|    clip_fraction        | 0.0354       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.9         |
+|    explained_variance   | 0.806        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0242      |
+|    n_updates            | 180          |
+|    policy_gradient_loss | -0.00236     |
+|    std                  | 1.03         |
+|    value_loss           | 0.0371       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1981   |
+|    iterations      | 19     |
+|    time_elapsed    | 157    |
+|    total_timesteps | 311296 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2024        |
+|    iterations           | 20          |
+|    time_elapsed         | 161         |
+|    total_timesteps      | 327680      |
+| train/                  |             |
+|    approx_kl            | 0.005344864 |
+|    clip_fraction        | 0.0442      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.91       |
+|    explained_variance   | 0.877       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0369     |
+|    n_updates            | 190         |
+|    policy_gradient_loss | -0.00344    |
+|    std                  | 1.04        |
+|    value_loss           | 0.0104      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2066        |
+|    iterations           | 21          |
+|    time_elapsed         | 166         |
+|    total_timesteps      | 344064      |
+| train/                  |             |
+|    approx_kl            | 0.007574372 |
+|    clip_fraction        | 0.0753      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.92       |
+|    explained_variance   | 0.903       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0272     |
+|    n_updates            | 200         |
+|    policy_gradient_loss | -0.00726    |
+|    std                  | 1.04        |
+|    value_loss           | 0.0113      |
+-----------------------------------------
+Eval num_timesteps=350000, episode_reward=-21.14 +/- 37.01
+Episode length: 1959.80 +/- 175.23
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.96e+03     |
+|    mean_reward          | -21.1        |
+| time/                   |              |
+|    total_timesteps      | 350000       |
+| train/                  |              |
+|    approx_kl            | 0.0061714016 |
+|    clip_fraction        | 0.0569       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.91        |
+|    explained_variance   | 0.917        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.022       |
+|    n_updates            | 210          |
+|    policy_gradient_loss | -0.00598     |
+|    std                  | 1.04         |
+|    value_loss           | 0.0231       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1984   |
+|    iterations      | 22     |
+|    time_elapsed    | 181    |
+|    total_timesteps | 360448 |
+-------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 2026       |
+|    iterations           | 23         |
+|    time_elapsed         | 185        |
+|    total_timesteps      | 376832     |
+| train/                  |            |
+|    approx_kl            | 0.00587913 |
+|    clip_fraction        | 0.0501     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.92      |
+|    explained_variance   | 0.932      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0415    |
+|    n_updates            | 220        |
+|    policy_gradient_loss | -0.00484   |
+|    std                  | 1.04       |
+|    value_loss           | 0.0242     |
+----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2064        |
+|    iterations           | 24          |
+|    time_elapsed         | 190         |
+|    total_timesteps      | 393216      |
+| train/                  |             |
+|    approx_kl            | 0.006933649 |
+|    clip_fraction        | 0.081       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.91       |
+|    explained_variance   | 0.918       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.032      |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.00773    |
+|    std                  | 1.03        |
+|    value_loss           | 0.0233      |
+-----------------------------------------
+Eval num_timesteps=400000, episode_reward=-2.75 +/- 37.08
+Episode length: 1998.55 +/- 6.32
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -2.75        |
+| time/                   |              |
+|    total_timesteps      | 400000       |
+| train/                  |              |
+|    approx_kl            | 0.0064436095 |
+|    clip_fraction        | 0.0647       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.9         |
+|    explained_variance   | 0.853        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.0633       |
+|    n_updates            | 240          |
+|    policy_gradient_loss | -0.00551     |
+|    std                  | 1.03         |
+|    value_loss           | 0.128        |
+------------------------------------------
+New best mean reward!
+
+[Diag @ 400,000 | n_sheep=1 | success=0%]
+  DROVE_NO_SHEEP             13/20
+  COMPACT_CANT_DRIVE         7/20
+  action_mag mean=0.316 p10=0.057 p90=0.512 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=1.86m best=0.95m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=3.19m best=1.50m
+  reward/step (mean): progress=+0.0093  alignment=+0.0040  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+-------------------------------
+| time/              |        |
+|    fps             | 1925   |
+|    iterations      | 25     |
+|    time_elapsed    | 212    |
+|    total_timesteps | 409600 |
+-------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1961       |
+|    iterations           | 26         |
+|    time_elapsed         | 217        |
+|    total_timesteps      | 425984     |
+| train/                  |            |
+|    approx_kl            | 0.00806847 |
+|    clip_fraction        | 0.1        |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.88      |
+|    explained_variance   | 0.933      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0254    |
+|    n_updates            | 250        |
+|    policy_gradient_loss | -0.00871   |
+|    std                  | 1.02       |
+|    value_loss           | 0.0264     |
+----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1997        |
+|    iterations           | 27          |
+|    time_elapsed         | 221         |
+|    total_timesteps      | 442368      |
+| train/                  |             |
+|    approx_kl            | 0.005784355 |
+|    clip_fraction        | 0.0531      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.878       |
+|    learning_rate        | 0.0003      |
+|    loss                 | 0.00996     |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.00485    |
+|    std                  | 1           |
+|    value_loss           | 0.0868      |
+-----------------------------------------
+Eval num_timesteps=450000, episode_reward=51.79 +/- 20.61
+Episode length: 1912.30 +/- 382.28
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.91e+03    |
+|    mean_reward          | 51.8        |
+| time/                   |             |
+|    total_timesteps      | 450000      |
+| train/                  |             |
+|    approx_kl            | 0.005881632 |
+|    clip_fraction        | 0.0639      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.83       |
+|    explained_variance   | 0.952       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0187     |
+|    n_updates            | 270         |
+|    policy_gradient_loss | -0.00655    |
+|    std                  | 0.991       |
+|    value_loss           | 0.0226      |
+-----------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 1936   |
+|    iterations      | 28     |
+|    time_elapsed    | 236    |
+|    total_timesteps | 458752 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1965        |
+|    iterations           | 29          |
+|    time_elapsed         | 241         |
+|    total_timesteps      | 475136      |
+| train/                  |             |
+|    approx_kl            | 0.009020726 |
+|    clip_fraction        | 0.0982      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.81       |
+|    explained_variance   | 0.87        |
+|    learning_rate        | 0.0003      |
+|    loss                 | 0.0218      |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.0061     |
+|    std                  | 0.984       |
+|    value_loss           | 0.209       |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1999        |
+|    iterations           | 30          |
+|    time_elapsed         | 245         |
+|    total_timesteps      | 491520      |
+| train/                  |             |
+|    approx_kl            | 0.011525536 |
+|    clip_fraction        | 0.136       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.79       |
+|    explained_variance   | 0.92        |
+|    learning_rate        | 0.0003      |
+|    loss                 | 0.0306      |
+|    n_updates            | 290         |
+|    policy_gradient_loss | -0.00896    |
+|    std                  | 0.97        |
+|    value_loss           | 0.0903      |
+-----------------------------------------
+Eval num_timesteps=500000, episode_reward=87.01 +/- 42.12
+Episode length: 1359.85 +/- 815.95
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.36e+03    |
+|    mean_reward          | 87          |
+| time/                   |             |
+|    total_timesteps      | 500000      |
+| train/                  |             |
+|    approx_kl            | 0.012545023 |
+|    clip_fraction        | 0.171       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.78       |
+|    explained_variance   | 0.956       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0369     |
+|    n_updates            | 300         |
+|    policy_gradient_loss | -0.0069     |
+|    std                  | 0.972       |
+|    value_loss           | 0.034       |
+-----------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 1968   |
+|    iterations      | 31     |
+|    time_elapsed    | 258    |
+|    total_timesteps | 507904 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1996        |
+|    iterations           | 32          |
+|    time_elapsed         | 262         |
+|    total_timesteps      | 524288      |
+| train/                  |             |
+|    approx_kl            | 0.008305798 |
+|    clip_fraction        | 0.102       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.78       |
+|    explained_variance   | 0.975       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0285     |
+|    n_updates            | 310         |
+|    policy_gradient_loss | -0.00343    |
+|    std                  | 0.972       |
+|    value_loss           | 0.0162      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2021         |
+|    iterations           | 33           |
+|    time_elapsed         | 267          |
+|    total_timesteps      | 540672       |
+| train/                  |              |
+|    approx_kl            | 0.0074599315 |
+|    clip_fraction        | 0.0925       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.81        |
+|    explained_variance   | 0.976        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0282      |
+|    n_updates            | 320          |
+|    policy_gradient_loss | -0.0028      |
+|    std                  | 0.989        |
+|    value_loss           | 0.0136       |
+------------------------------------------
+Eval num_timesteps=550000, episode_reward=113.42 +/- 48.33
+Episode length: 926.05 +/- 792.99
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 926         |
+|    mean_reward          | 113         |
+| time/                   |             |
+|    total_timesteps      | 550000      |
+| train/                  |             |
+|    approx_kl            | 0.010888291 |
+|    clip_fraction        | 0.136       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.83       |
+|    explained_variance   | 0.981       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0226     |
+|    n_updates            | 330         |
+|    policy_gradient_loss | -0.00266    |
+|    std                  | 1           |
+|    value_loss           | 0.00643     |
+-----------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 2005   |
+|    iterations      | 34     |
+|    time_elapsed    | 277    |
+|    total_timesteps | 557056 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2030        |
+|    iterations           | 35          |
+|    time_elapsed         | 282         |
+|    total_timesteps      | 573440      |
+| train/                  |             |
+|    approx_kl            | 0.009418717 |
+|    clip_fraction        | 0.121       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.84       |
+|    explained_variance   | 0.975       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0234     |
+|    n_updates            | 340         |
+|    policy_gradient_loss | -0.00417    |
+|    std                  | 1           |
+|    value_loss           | 0.0219      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2054        |
+|    iterations           | 36          |
+|    time_elapsed         | 287         |
+|    total_timesteps      | 589824      |
+| train/                  |             |
+|    approx_kl            | 0.009153167 |
+|    clip_fraction        | 0.132       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.972       |
+|    learning_rate        | 0.0003      |
+|    loss                 | 0.00458     |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.00925    |
+|    std                  | 1.01        |
+|    value_loss           | 0.0644      |
+-----------------------------------------
+Eval num_timesteps=600000, episode_reward=142.43 +/- 15.10
+Episode length: 292.00 +/- 114.85
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 292          |
+|    mean_reward          | 142          |
+| time/                   |              |
+|    total_timesteps      | 600000       |
+| train/                  |              |
+|    approx_kl            | 0.0073751104 |
+|    clip_fraction        | 0.0817       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.85        |
+|    explained_variance   | 0.967        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.0205       |
+|    n_updates            | 360          |
+|    policy_gradient_loss | -0.0078      |
+|    std                  | 1.01         |
+|    value_loss           | 0.0854       |
+------------------------------------------
+New best mean reward!
+
+[Diag @ 600,000 | n_sheep=1 | success=100%]
+  SUCCESS                    20/20
+  action_mag mean=0.339 p10=0.246 p90=0.609 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=1.68m best=0.23m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=3.54m best=2.70m
+  reward/step (mean): progress=+0.0996  alignment=+0.0271  pen_bonus=+0.0302  step_cost=-0.0200  complete=+0.3022
+-------------------------------
+| time/              |        |
+|    fps             | 2059   |
+|    iterations      | 37     |
+|    time_elapsed    | 294    |
+|    total_timesteps | 606208 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2069        |
+|    iterations           | 38          |
+|    time_elapsed         | 300         |
+|    total_timesteps      | 622592      |
+| train/                  |             |
+|    approx_kl            | 0.006348365 |
+|    clip_fraction        | 0.0685      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.954       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0107     |
+|    n_updates            | 370         |
+|    policy_gradient_loss | -0.00403    |
+|    std                  | 1           |
+|    value_loss           | 0.0629      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2085         |
+|    iterations           | 39           |
+|    time_elapsed         | 306          |
+|    total_timesteps      | 638976       |
+| train/                  |              |
+|    approx_kl            | 0.0073653567 |
+|    clip_fraction        | 0.089        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.976        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0379      |
+|    n_updates            | 380          |
+|    policy_gradient_loss | -0.00635     |
+|    std                  | 0.993        |
+|    value_loss           | 0.0213       |
+------------------------------------------
+Eval num_timesteps=650000, episode_reward=148.63 +/- 11.08
+Episode length: 312.15 +/- 83.52
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 312          |
+|    mean_reward          | 149          |
+| time/                   |              |
+|    total_timesteps      | 650000       |
+| train/                  |              |
+|    approx_kl            | 0.0064217458 |
+|    clip_fraction        | 0.0662       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.81        |
+|    explained_variance   | 0.977        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0177      |
+|    n_updates            | 390          |
+|    policy_gradient_loss | -0.00451     |
+|    std                  | 0.983        |
+|    value_loss           | 0.0325       |
+------------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 2092   |
+|    iterations      | 40     |
+|    time_elapsed    | 313    |
+|    total_timesteps | 655360 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2107        |
+|    iterations           | 41          |
+|    time_elapsed         | 318         |
+|    total_timesteps      | 671744      |
+| train/                  |             |
+|    approx_kl            | 0.007330196 |
+|    clip_fraction        | 0.0823      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.79       |
+|    explained_variance   | 0.985       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0257     |
+|    n_updates            | 400         |
+|    policy_gradient_loss | -0.00559    |
+|    std                  | 0.971       |
+|    value_loss           | 0.0108      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2123         |
+|    iterations           | 42           |
+|    time_elapsed         | 323          |
+|    total_timesteps      | 688128       |
+| train/                  |              |
+|    approx_kl            | 0.0076610697 |
+|    clip_fraction        | 0.0876       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.77        |
+|    explained_variance   | 0.99         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.037       |
+|    n_updates            | 410          |
+|    policy_gradient_loss | -0.00581     |
+|    std                  | 0.966        |
+|    value_loss           | 0.00623      |
+------------------------------------------
+Eval num_timesteps=700000, episode_reward=137.38 +/- 18.54
+Episode length: 255.10 +/- 119.47
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 255          |
+|    mean_reward          | 137          |
+| time/                   |              |
+|    total_timesteps      | 700000       |
+| train/                  |              |
+|    approx_kl            | 0.0072219693 |
+|    clip_fraction        | 0.0734       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.76        |
+|    explained_variance   | 0.989        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0383      |
+|    n_updates            | 420          |
+|    policy_gradient_loss | -0.00416     |
+|    std                  | 0.961        |
+|    value_loss           | 0.00951      |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2128   |
+|    iterations      | 43     |
+|    time_elapsed    | 331    |
+|    total_timesteps | 704512 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2144         |
+|    iterations           | 44           |
+|    time_elapsed         | 336          |
+|    total_timesteps      | 720896       |
+| train/                  |              |
+|    approx_kl            | 0.0075956425 |
+|    clip_fraction        | 0.0895       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.75        |
+|    explained_variance   | 0.993        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0433      |
+|    n_updates            | 430          |
+|    policy_gradient_loss | -0.00475     |
+|    std                  | 0.953        |
+|    value_loss           | 0.00343      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2160         |
+|    iterations           | 45           |
+|    time_elapsed         | 341          |
+|    total_timesteps      | 737280       |
+| train/                  |              |
+|    approx_kl            | 0.0062526334 |
+|    clip_fraction        | 0.0699       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.72        |
+|    explained_variance   | 0.99         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0329      |
+|    n_updates            | 440          |
+|    policy_gradient_loss | -0.00355     |
+|    std                  | 0.942        |
+|    value_loss           | 0.0113       |
+------------------------------------------
+Eval num_timesteps=750000, episode_reward=145.04 +/- 16.56
+Episode length: 291.10 +/- 132.25
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 291          |
+|    mean_reward          | 145          |
+| time/                   |              |
+|    total_timesteps      | 750000       |
+| train/                  |              |
+|    approx_kl            | 0.0058749127 |
+|    clip_fraction        | 0.0607       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.71        |
+|    explained_variance   | 0.993        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0281      |
+|    n_updates            | 450          |
+|    policy_gradient_loss | -0.00324     |
+|    std                  | 0.934        |
+|    value_loss           | 0.00811      |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2161   |
+|    iterations      | 46     |
+|    time_elapsed    | 348    |
+|    total_timesteps | 753664 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2176         |
+|    iterations           | 47           |
+|    time_elapsed         | 353          |
+|    total_timesteps      | 770048       |
+| train/                  |              |
+|    approx_kl            | 0.0070656985 |
+|    clip_fraction        | 0.0763       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.68        |
+|    explained_variance   | 0.996        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0322      |
+|    n_updates            | 460          |
+|    policy_gradient_loss | -0.00485     |
+|    std                  | 0.92         |
+|    value_loss           | 0.00234      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2193        |
+|    iterations           | 48          |
+|    time_elapsed         | 358         |
+|    total_timesteps      | 786432      |
+| train/                  |             |
+|    approx_kl            | 0.008987564 |
+|    clip_fraction        | 0.112       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.66       |
+|    explained_variance   | 0.997       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0471     |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.00864    |
+|    std                  | 0.909       |
+|    value_loss           | 0.00178     |
+-----------------------------------------
+Eval num_timesteps=800000, episode_reward=141.03 +/- 13.75
+Episode length: 256.90 +/- 100.39
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 257         |
+|    mean_reward          | 141         |
+| time/                   |             |
+|    total_timesteps      | 800000      |
+| train/                  |             |
+|    approx_kl            | 0.008297143 |
+|    clip_fraction        | 0.0945      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.67       |
+|    explained_variance   | 0.989       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0173     |
+|    n_updates            | 480         |
+|    policy_gradient_loss | -0.00352    |
+|    std                  | 0.921       |
+|    value_loss           | 0.00934     |
+-----------------------------------------
+
+[Diag @ 800,000 | n_sheep=1 | success=100%]
+  SUCCESS                    20/20
+  action_mag mean=0.333 p10=0.244 p90=0.332 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=1.40m best=0.75m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=3.47m best=1.58m
+  reward/step (mean): progress=+0.1108  alignment=+0.0328  pen_bonus=+0.0366  step_cost=-0.0200  complete=+0.3664
+
+[Curriculum] leaving stage n_sheep=1 after 800,000 steps | training success rate (last 100 eps) = 100%
+[Curriculum] → 2 sheep at step 800,000
+
+-------------------------------
+| time/              |        |
+|    fps             | 2187   |
+|    iterations      | 49     |
+|    time_elapsed    | 367    |
+|    total_timesteps | 802816 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2201        |
+|    iterations           | 50          |
+|    time_elapsed         | 372         |
+|    total_timesteps      | 819200      |
+| train/                  |             |
+|    approx_kl            | 0.006534174 |
+|    clip_fraction        | 0.0754      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.7        |
+|    explained_variance   | 0.968       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0252     |
+|    n_updates            | 490         |
+|    policy_gradient_loss | 0.00248     |
+|    std                  | 0.942       |
+|    value_loss           | 0.021       |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2213        |
+|    iterations           | 51          |
+|    time_elapsed         | 377         |
+|    total_timesteps      | 835584      |
+| train/                  |             |
+|    approx_kl            | 0.012509884 |
+|    clip_fraction        | 0.182       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.73       |
+|    explained_variance   | 0.51        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0127     |
+|    n_updates            | 500         |
+|    policy_gradient_loss | 0.00321     |
+|    std                  | 0.953       |
+|    value_loss           | 0.0093      |
+-----------------------------------------
+Eval num_timesteps=850000, episode_reward=-30.43 +/- 29.94
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -30.4       |
+| time/                   |             |
+|    total_timesteps      | 850000      |
+| train/                  |             |
+|    approx_kl            | 0.009752454 |
+|    clip_fraction        | 0.146       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.865       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0289     |
+|    n_updates            | 510         |
+|    policy_gradient_loss | 0.00274     |
+|    std                  | 0.95        |
+|    value_loss           | 0.0117      |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2153   |
+|    iterations      | 52     |
+|    time_elapsed    | 395    |
+|    total_timesteps | 851968 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2166        |
+|    iterations           | 53          |
+|    time_elapsed         | 400         |
+|    total_timesteps      | 868352      |
+| train/                  |             |
+|    approx_kl            | 0.011746319 |
+|    clip_fraction        | 0.133       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.75       |
+|    explained_variance   | 0.953       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0316     |
+|    n_updates            | 520         |
+|    policy_gradient_loss | 0.00116     |
+|    std                  | 0.958       |
+|    value_loss           | 0.00603     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2179        |
+|    iterations           | 54          |
+|    time_elapsed         | 405         |
+|    total_timesteps      | 884736      |
+| train/                  |             |
+|    approx_kl            | 0.008340008 |
+|    clip_fraction        | 0.111       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.75       |
+|    explained_variance   | 0.959       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0317     |
+|    n_updates            | 530         |
+|    policy_gradient_loss | 0.000628    |
+|    std                  | 0.955       |
+|    value_loss           | 0.00663     |
+-----------------------------------------
+Eval num_timesteps=900000, episode_reward=-21.80 +/- 34.98
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -21.8       |
+| time/                   |             |
+|    total_timesteps      | 900000      |
+| train/                  |             |
+|    approx_kl            | 0.010461532 |
+|    clip_fraction        | 0.13        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.88        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.00905    |
+|    n_updates            | 540         |
+|    policy_gradient_loss | -0.000256   |
+|    std                  | 0.951       |
+|    value_loss           | 0.00567     |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2128   |
+|    iterations      | 55     |
+|    time_elapsed    | 423    |
+|    total_timesteps | 901120 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2139         |
+|    iterations           | 56           |
+|    time_elapsed         | 428          |
+|    total_timesteps      | 917504       |
+| train/                  |              |
+|    approx_kl            | 0.0071650296 |
+|    clip_fraction        | 0.0988       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.75        |
+|    explained_variance   | 0.931        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0294      |
+|    n_updates            | 550          |
+|    policy_gradient_loss | -0.000672    |
+|    std                  | 0.957        |
+|    value_loss           | 0.00545      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2152        |
+|    iterations           | 57          |
+|    time_elapsed         | 433         |
+|    total_timesteps      | 933888      |
+| train/                  |             |
+|    approx_kl            | 0.009678386 |
+|    clip_fraction        | 0.112       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.927       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0308     |
+|    n_updates            | 560         |
+|    policy_gradient_loss | -0.000959   |
+|    std                  | 0.953       |
+|    value_loss           | 0.00409     |
+-----------------------------------------
+Eval num_timesteps=950000, episode_reward=-34.37 +/- 35.50
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -34.4       |
+| time/                   |             |
+|    total_timesteps      | 950000      |
+| train/                  |             |
+|    approx_kl            | 0.008903094 |
+|    clip_fraction        | 0.111       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.939       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0259     |
+|    n_updates            | 570         |
+|    policy_gradient_loss | -0.000299   |
+|    std                  | 0.955       |
+|    value_loss           | 0.00432     |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2108   |
+|    iterations      | 58     |
+|    time_elapsed    | 450    |
+|    total_timesteps | 950272 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2117        |
+|    iterations           | 59          |
+|    time_elapsed         | 456         |
+|    total_timesteps      | 966656      |
+| train/                  |             |
+|    approx_kl            | 0.008592881 |
+|    clip_fraction        | 0.0954      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.929       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0173     |
+|    n_updates            | 580         |
+|    policy_gradient_loss | 0.00103     |
+|    std                  | 0.95        |
+|    value_loss           | 0.00265     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2129        |
+|    iterations           | 60          |
+|    time_elapsed         | 461         |
+|    total_timesteps      | 983040      |
+| train/                  |             |
+|    approx_kl            | 0.010225108 |
+|    clip_fraction        | 0.108       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.972       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0135     |
+|    n_updates            | 590         |
+|    policy_gradient_loss | -0.000738   |
+|    std                  | 0.954       |
+|    value_loss           | 0.0029      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2137        |
+|    iterations           | 61          |
+|    time_elapsed         | 467         |
+|    total_timesteps      | 999424      |
+| train/                  |             |
+|    approx_kl            | 0.008312117 |
+|    clip_fraction        | 0.0887      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.75       |
+|    explained_variance   | 0.898       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0262     |
+|    n_updates            | 600         |
+|    policy_gradient_loss | -0.000497   |
+|    std                  | 0.958       |
+|    value_loss           | 0.00511     |
+-----------------------------------------
+Eval num_timesteps=1000000, episode_reward=-32.64 +/- 38.38
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | -32.6      |
+| time/                   |            |
+|    total_timesteps      | 1000000    |
+| train/                  |            |
+|    approx_kl            | 0.00942917 |
+|    clip_fraction        | 0.105      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.76      |
+|    explained_variance   | 0.961      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0331    |
+|    n_updates            | 610        |
+|    policy_gradient_loss | -0.0023    |
+|    std                  | 0.966      |
+|    value_loss           | 0.00282    |
+----------------------------------------
+
+[Diag @ 1,000,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         14/20
+  NEVER_COMPACT              6/20
+  action_mag mean=0.216 p10=0.000 p90=0.805 (0=stopped, 1=full speed)
+  min_flock_radius mean=3.39m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=1.18m best=0.11m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.11m best=7.44m
+  reward/step (mean): progress=-0.0011  alignment=+0.0106  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 2057    |
+|    iterations      | 62      |
+|    time_elapsed    | 493     |
+|    total_timesteps | 1015808 |
+--------------------------------
+---------------------------------------
+| time/                   |           |
+|    fps                  | 2067      |
+|    iterations           | 63        |
+|    time_elapsed         | 499       |
+|    total_timesteps      | 1032192   |
+| train/                  |           |
+|    approx_kl            | 0.008683  |
+|    clip_fraction        | 0.0967    |
+|    clip_range           | 0.2       |
+|    entropy_loss         | -2.77     |
+|    explained_variance   | 0.93      |
+|    learning_rate        | 0.0003    |
+|    loss                 | -0.029    |
+|    n_updates            | 620       |
+|    policy_gradient_loss | -0.000765 |
+|    std                  | 0.965     |
+|    value_loss           | 0.00446   |
+---------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2077        |
+|    iterations           | 64          |
+|    time_elapsed         | 504         |
+|    total_timesteps      | 1048576     |
+| train/                  |             |
+|    approx_kl            | 0.009014329 |
+|    clip_fraction        | 0.113       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.76       |
+|    explained_variance   | 0.984       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0279     |
+|    n_updates            | 630         |
+|    policy_gradient_loss | -0.00211    |
+|    std                  | 0.962       |
+|    value_loss           | 0.00312     |
+-----------------------------------------
+Eval num_timesteps=1050000, episode_reward=-31.51 +/- 42.52
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -31.5       |
+| time/                   |             |
+|    total_timesteps      | 1050000     |
+| train/                  |             |
+|    approx_kl            | 0.008500135 |
+|    clip_fraction        | 0.105       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.75       |
+|    explained_variance   | 0.968       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0306     |
+|    n_updates            | 640         |
+|    policy_gradient_loss | -0.00312    |
+|    std                  | 0.955       |
+|    value_loss           | 0.00288     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 2042    |
+|    iterations      | 65      |
+|    time_elapsed    | 521     |
+|    total_timesteps | 1064960 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2056         |
+|    iterations           | 66           |
+|    time_elapsed         | 525          |
+|    total_timesteps      | 1081344      |
+| train/                  |              |
+|    approx_kl            | 0.0069593494 |
+|    clip_fraction        | 0.0923       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.74        |
+|    explained_variance   | 0.835        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0291      |
+|    n_updates            | 650          |
+|    policy_gradient_loss | -0.000469    |
+|    std                  | 0.952        |
+|    value_loss           | 0.00186      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2064        |
+|    iterations           | 67          |
+|    time_elapsed         | 531         |
+|    total_timesteps      | 1097728     |
+| train/                  |             |
+|    approx_kl            | 0.007817726 |
+|    clip_fraction        | 0.0933      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.922       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0206     |
+|    n_updates            | 660         |
+|    policy_gradient_loss | -0.00208    |
+|    std                  | 0.953       |
+|    value_loss           | 0.00234     |
+-----------------------------------------
+Eval num_timesteps=1100000, episode_reward=-22.82 +/- 33.61
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -22.8       |
+| time/                   |             |
+|    total_timesteps      | 1100000     |
+| train/                  |             |
+|    approx_kl            | 0.006177975 |
+|    clip_fraction        | 0.0806      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.951       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.026      |
+|    n_updates            | 670         |
+|    policy_gradient_loss | -5.8e-05    |
+|    std                  | 0.951       |
+|    value_loss           | 0.00184     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 2035    |
+|    iterations      | 68      |
+|    time_elapsed    | 547     |
+|    total_timesteps | 1114112 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2048        |
+|    iterations           | 69          |
+|    time_elapsed         | 551         |
+|    total_timesteps      | 1130496     |
+| train/                  |             |
+|    approx_kl            | 0.009605391 |
+|    clip_fraction        | 0.102       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.74       |
+|    explained_variance   | 0.951       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0344     |
+|    n_updates            | 680         |
+|    policy_gradient_loss | -0.0022     |
+|    std                  | 0.957       |
+|    value_loss           | 0.00221     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2060         |
+|    iterations           | 70           |
+|    time_elapsed         | 556          |
+|    total_timesteps      | 1146880      |
+| train/                  |              |
+|    approx_kl            | 0.0064521013 |
+|    clip_fraction        | 0.0953       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.75        |
+|    explained_variance   | 0.898        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0348      |
+|    n_updates            | 690          |
+|    policy_gradient_loss | -0.00112     |
+|    std                  | 0.96         |
+|    value_loss           | 0.00221      |
+------------------------------------------
+Eval num_timesteps=1150000, episode_reward=-26.36 +/- 35.49
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | -26.4      |
+| time/                   |            |
+|    total_timesteps      | 1150000    |
+| train/                  |            |
+|    approx_kl            | 0.00777065 |
+|    clip_fraction        | 0.0837     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.76      |
+|    explained_variance   | 0.907      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0198    |
+|    n_updates            | 700        |
+|    policy_gradient_loss | -0.000371  |
+|    std                  | 0.963      |
+|    value_loss           | 0.00182    |
+----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 2031    |
+|    iterations      | 71      |
+|    time_elapsed    | 572     |
+|    total_timesteps | 1163264 |
+--------------------------------
+---------------------------------------
+| time/                   |           |
+|    fps                  | 2044      |
+|    iterations           | 72        |
+|    time_elapsed         | 577       |
+|    total_timesteps      | 1179648   |
+| train/                  |           |
+|    approx_kl            | 0.006344  |
+|    clip_fraction        | 0.0719    |
+|    clip_range           | 0.2       |
+|    entropy_loss         | -2.76     |
+|    explained_variance   | 0.908     |
+|    learning_rate        | 0.0003    |
+|    loss                 | -0.0347   |
+|    n_updates            | 710       |
+|    policy_gradient_loss | -0.000455 |
+|    std                  | 0.961     |
+|    value_loss           | 0.00145   |
+---------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2054         |
+|    iterations           | 73           |
+|    time_elapsed         | 582          |
+|    total_timesteps      | 1196032      |
+| train/                  |              |
+|    approx_kl            | 0.0060829036 |
+|    clip_fraction        | 0.0854       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.75        |
+|    explained_variance   | 0.896        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0232      |
+|    n_updates            | 720          |
+|    policy_gradient_loss | -0.00108     |
+|    std                  | 0.957        |
+|    value_loss           | 0.00152      |
+------------------------------------------
+Eval num_timesteps=1200000, episode_reward=-14.33 +/- 30.83
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -14.3        |
+| time/                   |              |
+|    total_timesteps      | 1200000      |
+| train/                  |              |
+|    approx_kl            | 0.0073732347 |
+|    clip_fraction        | 0.0783       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.76        |
+|    explained_variance   | 0.948        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0267      |
+|    n_updates            | 730          |
+|    policy_gradient_loss | -0.00212     |
+|    std                  | 0.968        |
+|    value_loss           | 0.00253      |
+------------------------------------------
+
+[Diag @ 1,200,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         15/20
+  NEVER_COMPACT              5/20
+  action_mag mean=0.273 p10=0.004 p90=1.008 (0=stopped, 1=full speed)
+  min_flock_radius mean=3.94m best=0.97m  (target <5m to compact)
+  min_dog_to_com   mean=1.16m best=0.35m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.54m best=4.20m
+  reward/step (mean): progress=+0.0001  alignment=+0.0121  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1998    |
+|    iterations      | 74      |
+|    time_elapsed    | 606     |
+|    total_timesteps | 1212416 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2008        |
+|    iterations           | 75          |
+|    time_elapsed         | 611         |
+|    total_timesteps      | 1228800     |
+| train/                  |             |
+|    approx_kl            | 0.006109112 |
+|    clip_fraction        | 0.0814      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.78       |
+|    explained_variance   | 0.86        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0205     |
+|    n_updates            | 740         |
+|    policy_gradient_loss | -0.000541   |
+|    std                  | 0.973       |
+|    value_loss           | 0.00171     |
+-----------------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 2016       |
+|    iterations           | 76         |
+|    time_elapsed         | 617        |
+|    total_timesteps      | 1245184    |
+| train/                  |            |
+|    approx_kl            | 0.00703271 |
+|    clip_fraction        | 0.0781     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.78      |
+|    explained_variance   | 0.934      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0394    |
+|    n_updates            | 750        |
+|    policy_gradient_loss | -0.00105   |
+|    std                  | 0.975      |
+|    value_loss           | 0.00168    |
+----------------------------------------
+Eval num_timesteps=1250000, episode_reward=-18.12 +/- 39.82
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -18.1        |
+| time/                   |              |
+|    total_timesteps      | 1250000      |
+| train/                  |              |
+|    approx_kl            | 0.0064994176 |
+|    clip_fraction        | 0.0698       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.8         |
+|    explained_variance   | 0.919        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0166      |
+|    n_updates            | 760          |
+|    policy_gradient_loss | -0.000919    |
+|    std                  | 0.985        |
+|    value_loss           | 0.000832     |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1989    |
+|    iterations      | 77      |
+|    time_elapsed    | 634     |
+|    total_timesteps | 1261568 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2001        |
+|    iterations           | 78          |
+|    time_elapsed         | 638         |
+|    total_timesteps      | 1277952     |
+| train/                  |             |
+|    approx_kl            | 0.008321709 |
+|    clip_fraction        | 0.0902      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.81       |
+|    explained_variance   | 0.874       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0295     |
+|    n_updates            | 770         |
+|    policy_gradient_loss | -0.00219    |
+|    std                  | 0.991       |
+|    value_loss           | 0.00127     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2010        |
+|    iterations           | 79          |
+|    time_elapsed         | 643         |
+|    total_timesteps      | 1294336     |
+| train/                  |             |
+|    approx_kl            | 0.009220061 |
+|    clip_fraction        | 0.112       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.82       |
+|    explained_variance   | 0.952       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0379     |
+|    n_updates            | 780         |
+|    policy_gradient_loss | -0.00411    |
+|    std                  | 0.994       |
+|    value_loss           | 0.00295     |
+-----------------------------------------
+Eval num_timesteps=1300000, episode_reward=-22.41 +/- 35.57
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -22.4        |
+| time/                   |              |
+|    total_timesteps      | 1300000      |
+| train/                  |              |
+|    approx_kl            | 0.0071307076 |
+|    clip_fraction        | 0.0826       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.948        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0281      |
+|    n_updates            | 790          |
+|    policy_gradient_loss | -0.00178     |
+|    std                  | 0.995        |
+|    value_loss           | 0.00169      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1986    |
+|    iterations      | 80      |
+|    time_elapsed    | 659     |
+|    total_timesteps | 1310720 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1996        |
+|    iterations           | 81          |
+|    time_elapsed         | 664         |
+|    total_timesteps      | 1327104     |
+| train/                  |             |
+|    approx_kl            | 0.008566003 |
+|    clip_fraction        | 0.0857      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.84       |
+|    explained_variance   | 0.904       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0369     |
+|    n_updates            | 800         |
+|    policy_gradient_loss | -0.00199    |
+|    std                  | 1.01        |
+|    value_loss           | 0.00203     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2006         |
+|    iterations           | 82           |
+|    time_elapsed         | 669          |
+|    total_timesteps      | 1343488      |
+| train/                  |              |
+|    approx_kl            | 0.0082352655 |
+|    clip_fraction        | 0.0989       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.918        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0297      |
+|    n_updates            | 810          |
+|    policy_gradient_loss | -0.0023      |
+|    std                  | 1.01         |
+|    value_loss           | 0.00203      |
+------------------------------------------
+Eval num_timesteps=1350000, episode_reward=-14.21 +/- 38.53
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -14.2        |
+| time/                   |              |
+|    total_timesteps      | 1350000      |
+| train/                  |              |
+|    approx_kl            | 0.0066830693 |
+|    clip_fraction        | 0.0831       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.923        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0331      |
+|    n_updates            | 820          |
+|    policy_gradient_loss | -0.00226     |
+|    std                  | 1.01         |
+|    value_loss           | 0.00125      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1983    |
+|    iterations      | 83      |
+|    time_elapsed    | 685     |
+|    total_timesteps | 1359872 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1991        |
+|    iterations           | 84          |
+|    time_elapsed         | 691         |
+|    total_timesteps      | 1376256     |
+| train/                  |             |
+|    approx_kl            | 0.008341949 |
+|    clip_fraction        | 0.101       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.928       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0156     |
+|    n_updates            | 830         |
+|    policy_gradient_loss | -0.00132    |
+|    std                  | 1.01        |
+|    value_loss           | 0.00407     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1999        |
+|    iterations           | 85          |
+|    time_elapsed         | 696         |
+|    total_timesteps      | 1392640     |
+| train/                  |             |
+|    approx_kl            | 0.010089031 |
+|    clip_fraction        | 0.109       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.84       |
+|    explained_variance   | 0.914       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0249     |
+|    n_updates            | 840         |
+|    policy_gradient_loss | -0.00202    |
+|    std                  | 0.999       |
+|    value_loss           | 0.00555     |
+-----------------------------------------
+Eval num_timesteps=1400000, episode_reward=-5.74 +/- 37.76
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | -5.74      |
+| time/                   |            |
+|    total_timesteps      | 1400000    |
+| train/                  |            |
+|    approx_kl            | 0.00840036 |
+|    clip_fraction        | 0.112      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.84      |
+|    explained_variance   | 0.915      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0267    |
+|    n_updates            | 850        |
+|    policy_gradient_loss | -0.00422   |
+|    std                  | 1          |
+|    value_loss           | 0.0017     |
+----------------------------------------
+
+[Diag @ 1,400,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         16/20
+  NEVER_COMPACT              4/20
+  action_mag mean=0.258 p10=0.000 p90=1.004 (0=stopped, 1=full speed)
+  min_flock_radius mean=3.30m best=0.61m  (target <5m to compact)
+  min_dog_to_com   mean=0.76m best=0.22m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.16m best=4.08m
+  reward/step (mean): progress=+0.0035  alignment=+0.0165  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1954    |
+|    iterations      | 86      |
+|    time_elapsed    | 720     |
+|    total_timesteps | 1409024 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1964        |
+|    iterations           | 87          |
+|    time_elapsed         | 725         |
+|    total_timesteps      | 1425408     |
+| train/                  |             |
+|    approx_kl            | 0.007908808 |
+|    clip_fraction        | 0.0839      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.755       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.018      |
+|    n_updates            | 860         |
+|    policy_gradient_loss | -0.00223    |
+|    std                  | 1.01        |
+|    value_loss           | 0.00248     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1972        |
+|    iterations           | 88          |
+|    time_elapsed         | 730         |
+|    total_timesteps      | 1441792     |
+| train/                  |             |
+|    approx_kl            | 0.007957449 |
+|    clip_fraction        | 0.0864      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.868       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0315     |
+|    n_updates            | 870         |
+|    policy_gradient_loss | -0.00288    |
+|    std                  | 1.01        |
+|    value_loss           | 0.00145     |
+-----------------------------------------
+Eval num_timesteps=1450000, episode_reward=-13.10 +/- 29.51
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -13.1       |
+| time/                   |             |
+|    total_timesteps      | 1450000     |
+| train/                  |             |
+|    approx_kl            | 0.007803983 |
+|    clip_fraction        | 0.083       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.83        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0212     |
+|    n_updates            | 880         |
+|    policy_gradient_loss | -0.00119    |
+|    std                  | 1.01        |
+|    value_loss           | 0.00191     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1952    |
+|    iterations      | 89      |
+|    time_elapsed    | 746     |
+|    total_timesteps | 1458176 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1961        |
+|    iterations           | 90          |
+|    time_elapsed         | 751         |
+|    total_timesteps      | 1474560     |
+| train/                  |             |
+|    approx_kl            | 0.010021031 |
+|    clip_fraction        | 0.097       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.88       |
+|    explained_variance   | 0.902       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0221     |
+|    n_updates            | 890         |
+|    policy_gradient_loss | -0.00294    |
+|    std                  | 1.02        |
+|    value_loss           | 0.00136     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1970         |
+|    iterations           | 91           |
+|    time_elapsed         | 756          |
+|    total_timesteps      | 1490944      |
+| train/                  |              |
+|    approx_kl            | 0.0076614916 |
+|    clip_fraction        | 0.0963       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.89        |
+|    explained_variance   | 0.945        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0273      |
+|    n_updates            | 900          |
+|    policy_gradient_loss | -0.00355     |
+|    std                  | 1.03         |
+|    value_loss           | 0.00181      |
+------------------------------------------
+Eval num_timesteps=1500000, episode_reward=5.01 +/- 34.23
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 5.01        |
+| time/                   |             |
+|    total_timesteps      | 1500000     |
+| train/                  |             |
+|    approx_kl            | 0.005815446 |
+|    clip_fraction        | 0.0675      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.9        |
+|    explained_variance   | 0.934       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0316     |
+|    n_updates            | 910         |
+|    policy_gradient_loss | -0.00215    |
+|    std                  | 1.03        |
+|    value_loss           | 0.00162     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1950    |
+|    iterations      | 92      |
+|    time_elapsed    | 772     |
+|    total_timesteps | 1507328 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1959         |
+|    iterations           | 93           |
+|    time_elapsed         | 777          |
+|    total_timesteps      | 1523712      |
+| train/                  |              |
+|    approx_kl            | 0.0071218535 |
+|    clip_fraction        | 0.0897       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.9         |
+|    explained_variance   | 0.937        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0219      |
+|    n_updates            | 920          |
+|    policy_gradient_loss | -0.00225     |
+|    std                  | 1.03         |
+|    value_loss           | 0.00463      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1967        |
+|    iterations           | 94          |
+|    time_elapsed         | 782         |
+|    total_timesteps      | 1540096     |
+| train/                  |             |
+|    approx_kl            | 0.006857206 |
+|    clip_fraction        | 0.0809      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.89       |
+|    explained_variance   | 0.933       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0252     |
+|    n_updates            | 930         |
+|    policy_gradient_loss | -0.00219    |
+|    std                  | 1.02        |
+|    value_loss           | 0.00436     |
+-----------------------------------------
+Eval num_timesteps=1550000, episode_reward=-4.04 +/- 33.69
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -4.04       |
+| time/                   |             |
+|    total_timesteps      | 1550000     |
+| train/                  |             |
+|    approx_kl            | 0.006146897 |
+|    clip_fraction        | 0.0821      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.87       |
+|    explained_variance   | 0.913       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0352     |
+|    n_updates            | 940         |
+|    policy_gradient_loss | -0.00258    |
+|    std                  | 1.02        |
+|    value_loss           | 0.00325     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1948    |
+|    iterations      | 95      |
+|    time_elapsed    | 798     |
+|    total_timesteps | 1556480 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1958         |
+|    iterations           | 96           |
+|    time_elapsed         | 803          |
+|    total_timesteps      | 1572864      |
+| train/                  |              |
+|    approx_kl            | 0.0069321445 |
+|    clip_fraction        | 0.0778       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.94         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.013       |
+|    n_updates            | 950          |
+|    policy_gradient_loss | -0.00214     |
+|    std                  | 1.01         |
+|    value_loss           | 0.00162      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1965         |
+|    iterations           | 97           |
+|    time_elapsed         | 808          |
+|    total_timesteps      | 1589248      |
+| train/                  |              |
+|    approx_kl            | 0.0066491435 |
+|    clip_fraction        | 0.0714       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.88        |
+|    explained_variance   | 0.941        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0304      |
+|    n_updates            | 960          |
+|    policy_gradient_loss | -0.00212     |
+|    std                  | 1.03         |
+|    value_loss           | 0.0011       |
+------------------------------------------
+Eval num_timesteps=1600000, episode_reward=12.65 +/- 31.73
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 12.6         |
+| time/                   |              |
+|    total_timesteps      | 1600000      |
+| train/                  |              |
+|    approx_kl            | 0.0050257677 |
+|    clip_fraction        | 0.0588       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.9         |
+|    explained_variance   | 0.939        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0359      |
+|    n_updates            | 970          |
+|    policy_gradient_loss | -0.0013      |
+|    std                  | 1.04         |
+|    value_loss           | 0.00201      |
+------------------------------------------
+
+[Diag @ 1,600,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         13/20
+  NEVER_COMPACT              7/20
+  action_mag mean=0.252 p10=0.004 p90=0.980 (0=stopped, 1=full speed)
+  min_flock_radius mean=4.30m best=0.92m  (target <5m to compact)
+  min_dog_to_com   mean=0.74m best=0.38m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.76m best=5.49m
+  reward/step (mean): progress=-0.0006  alignment=+0.0287  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+
+[Curriculum] leaving stage n_sheep=2 after 800,000 steps | training success rate (last 100 eps) = 0%
+[Curriculum] → 3 sheep at step 1,600,000
+
+--------------------------------
+| time/              |         |
+|    fps             | 1930    |
+|    iterations      | 98      |
+|    time_elapsed    | 831     |
+|    total_timesteps | 1605632 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1937         |
+|    iterations           | 99           |
+|    time_elapsed         | 837          |
+|    total_timesteps      | 1622016      |
+| train/                  |              |
+|    approx_kl            | 0.0085028205 |
+|    clip_fraction        | 0.0905       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.89        |
+|    explained_variance   | 0.909        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0346      |
+|    n_updates            | 980          |
+|    policy_gradient_loss | -0.00245     |
+|    std                  | 1.02         |
+|    value_loss           | 0.00492      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1945        |
+|    iterations           | 100         |
+|    time_elapsed         | 842         |
+|    total_timesteps      | 1638400     |
+| train/                  |             |
+|    approx_kl            | 0.009084044 |
+|    clip_fraction        | 0.118       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.91       |
+|    explained_variance   | 0.964       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0416     |
+|    n_updates            | 990         |
+|    policy_gradient_loss | 0.0025      |
+|    std                  | 1.04        |
+|    value_loss           | 0.00194     |
+-----------------------------------------
+Eval num_timesteps=1650000, episode_reward=3.05 +/- 36.42
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 3.05        |
+| time/                   |             |
+|    total_timesteps      | 1650000     |
+| train/                  |             |
+|    approx_kl            | 0.009275759 |
+|    clip_fraction        | 0.108       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.92       |
+|    explained_variance   | 0.965       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0336     |
+|    n_updates            | 1000        |
+|    policy_gradient_loss | 0.000149    |
+|    std                  | 1.04        |
+|    value_loss           | 0.00185     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1926    |
+|    iterations      | 101     |
+|    time_elapsed    | 859     |
+|    total_timesteps | 1654784 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1934        |
+|    iterations           | 102         |
+|    time_elapsed         | 864         |
+|    total_timesteps      | 1671168     |
+| train/                  |             |
+|    approx_kl            | 0.008650862 |
+|    clip_fraction        | 0.117       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.92       |
+|    explained_variance   | 0.938       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0279     |
+|    n_updates            | 1010        |
+|    policy_gradient_loss | -0.000545   |
+|    std                  | 1.04        |
+|    value_loss           | 0.00611     |
+-----------------------------------------
+---------------------------------------
+| time/                   |           |
+|    fps                  | 1939      |
+|    iterations           | 103       |
+|    time_elapsed         | 869       |
+|    total_timesteps      | 1687552   |
+| train/                  |           |
+|    approx_kl            | 0.0080826 |
+|    clip_fraction        | 0.0992    |
+|    clip_range           | 0.2       |
+|    entropy_loss         | -2.93     |
+|    explained_variance   | 0.952     |
+|    learning_rate        | 0.0003    |
+|    loss                 | -0.0415   |
+|    n_updates            | 1020      |
+|    policy_gradient_loss | -0.00201  |
+|    std                  | 1.05      |
+|    value_loss           | 0.00251   |
+---------------------------------------
+Eval num_timesteps=1700000, episode_reward=-4.66 +/- 36.05
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | -4.66      |
+| time/                   |            |
+|    total_timesteps      | 1700000    |
+| train/                  |            |
+|    approx_kl            | 0.00786162 |
+|    clip_fraction        | 0.0921     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.95      |
+|    explained_variance   | 0.893      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0301    |
+|    n_updates            | 1030       |
+|    policy_gradient_loss | -0.000631  |
+|    std                  | 1.06       |
+|    value_loss           | 0.00158    |
+----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1922    |
+|    iterations      | 104     |
+|    time_elapsed    | 886     |
+|    total_timesteps | 1703936 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1930        |
+|    iterations           | 105         |
+|    time_elapsed         | 891         |
+|    total_timesteps      | 1720320     |
+| train/                  |             |
+|    approx_kl            | 0.008055547 |
+|    clip_fraction        | 0.0842      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.96       |
+|    explained_variance   | 0.918       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.027      |
+|    n_updates            | 1040        |
+|    policy_gradient_loss | -6.56e-05   |
+|    std                  | 1.07        |
+|    value_loss           | 0.00193     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1937        |
+|    iterations           | 106         |
+|    time_elapsed         | 896         |
+|    total_timesteps      | 1736704     |
+| train/                  |             |
+|    approx_kl            | 0.008067045 |
+|    clip_fraction        | 0.087       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.97       |
+|    explained_variance   | 0.878       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0281     |
+|    n_updates            | 1050        |
+|    policy_gradient_loss | -0.00194    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0082      |
+-----------------------------------------
+Eval num_timesteps=1750000, episode_reward=-0.31 +/- 42.66
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -0.309       |
+| time/                   |              |
+|    total_timesteps      | 1750000      |
+| train/                  |              |
+|    approx_kl            | 0.0066514863 |
+|    clip_fraction        | 0.0808       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.99        |
+|    explained_variance   | 0.888        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0335      |
+|    n_updates            | 1060         |
+|    policy_gradient_loss | -0.00108     |
+|    std                  | 1.08         |
+|    value_loss           | 0.00303      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1921    |
+|    iterations      | 107     |
+|    time_elapsed    | 912     |
+|    total_timesteps | 1753088 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1927        |
+|    iterations           | 108         |
+|    time_elapsed         | 917         |
+|    total_timesteps      | 1769472     |
+| train/                  |             |
+|    approx_kl            | 0.008252729 |
+|    clip_fraction        | 0.093       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3          |
+|    explained_variance   | 0.959       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0413     |
+|    n_updates            | 1070        |
+|    policy_gradient_loss | -0.00241    |
+|    std                  | 1.09        |
+|    value_loss           | 0.00122     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1935         |
+|    iterations           | 109          |
+|    time_elapsed         | 922          |
+|    total_timesteps      | 1785856      |
+| train/                  |              |
+|    approx_kl            | 0.0073527684 |
+|    clip_fraction        | 0.0822       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.01        |
+|    explained_variance   | 0.883        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.018       |
+|    n_updates            | 1080         |
+|    policy_gradient_loss | -0.00172     |
+|    std                  | 1.1          |
+|    value_loss           | 0.00172      |
+------------------------------------------
+Eval num_timesteps=1800000, episode_reward=8.99 +/- 39.35
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 8.99        |
+| time/                   |             |
+|    total_timesteps      | 1800000     |
+| train/                  |             |
+|    approx_kl            | 0.006149094 |
+|    clip_fraction        | 0.0771      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.03       |
+|    explained_variance   | 0.911       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0315     |
+|    n_updates            | 1090        |
+|    policy_gradient_loss | -0.000744   |
+|    std                  | 1.1         |
+|    value_loss           | 0.00456     |
+-----------------------------------------
+
+[Diag @ 1,800,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              19/20
+  COMPACT_CANT_DRIVE         1/20
+  action_mag mean=0.049 p10=0.007 p90=0.049 (0=stopped, 1=full speed)
+  min_flock_radius mean=7.79m best=4.73m  (target <5m to compact)
+  min_dog_to_com   mean=0.92m best=0.25m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.27m best=7.54m
+  reward/step (mean): progress=-0.0043  alignment=+0.0208  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1899    |
+|    iterations      | 110     |
+|    time_elapsed    | 948     |
+|    total_timesteps | 1802240 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1906        |
+|    iterations           | 111         |
+|    time_elapsed         | 953         |
+|    total_timesteps      | 1818624     |
+| train/                  |             |
+|    approx_kl            | 0.007161974 |
+|    clip_fraction        | 0.0871      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.03       |
+|    explained_variance   | 0.914       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0359     |
+|    n_updates            | 1100        |
+|    policy_gradient_loss | -0.00186    |
+|    std                  | 1.1         |
+|    value_loss           | 0.00214     |
+-----------------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1914       |
+|    iterations           | 112        |
+|    time_elapsed         | 958        |
+|    total_timesteps      | 1835008    |
+| train/                  |            |
+|    approx_kl            | 0.00886854 |
+|    clip_fraction        | 0.103      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.04      |
+|    explained_variance   | 0.94       |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.04      |
+|    n_updates            | 1110       |
+|    policy_gradient_loss | -0.00333   |
+|    std                  | 1.11       |
+|    value_loss           | 0.00456    |
+----------------------------------------
+Eval num_timesteps=1850000, episode_reward=14.49 +/- 36.35
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 14.5         |
+| time/                   |              |
+|    total_timesteps      | 1850000      |
+| train/                  |              |
+|    approx_kl            | 0.0058414284 |
+|    clip_fraction        | 0.0642       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.05        |
+|    explained_variance   | 0.871        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.033       |
+|    n_updates            | 1120         |
+|    policy_gradient_loss | -0.000891    |
+|    std                  | 1.11         |
+|    value_loss           | 0.00394      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1898    |
+|    iterations      | 113     |
+|    time_elapsed    | 975     |
+|    total_timesteps | 1851392 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1906        |
+|    iterations           | 114         |
+|    time_elapsed         | 979         |
+|    total_timesteps      | 1867776     |
+| train/                  |             |
+|    approx_kl            | 0.008916938 |
+|    clip_fraction        | 0.0916      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.05       |
+|    explained_variance   | 0.937       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0334     |
+|    n_updates            | 1130        |
+|    policy_gradient_loss | -0.00257    |
+|    std                  | 1.12        |
+|    value_loss           | 0.00285     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1913        |
+|    iterations           | 115         |
+|    time_elapsed         | 984         |
+|    total_timesteps      | 1884160     |
+| train/                  |             |
+|    approx_kl            | 0.008523149 |
+|    clip_fraction        | 0.0907      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.06       |
+|    explained_variance   | 0.954       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0339     |
+|    n_updates            | 1140        |
+|    policy_gradient_loss | -0.0034     |
+|    std                  | 1.12        |
+|    value_loss           | 0.00209     |
+-----------------------------------------
+Eval num_timesteps=1900000, episode_reward=9.85 +/- 42.18
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 9.85         |
+| time/                   |              |
+|    total_timesteps      | 1900000      |
+| train/                  |              |
+|    approx_kl            | 0.0075978916 |
+|    clip_fraction        | 0.0819       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.06        |
+|    explained_variance   | 0.96         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0313      |
+|    n_updates            | 1150         |
+|    policy_gradient_loss | -0.00272     |
+|    std                  | 1.12         |
+|    value_loss           | 0.00332      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1896    |
+|    iterations      | 116     |
+|    time_elapsed    | 1002    |
+|    total_timesteps | 1900544 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1902        |
+|    iterations           | 117         |
+|    time_elapsed         | 1007        |
+|    total_timesteps      | 1916928     |
+| train/                  |             |
+|    approx_kl            | 0.008376695 |
+|    clip_fraction        | 0.0935      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.964       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0392     |
+|    n_updates            | 1160        |
+|    policy_gradient_loss | -0.00354    |
+|    std                  | 1.12        |
+|    value_loss           | 0.00203     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1909         |
+|    iterations           | 118          |
+|    time_elapsed         | 1012         |
+|    total_timesteps      | 1933312      |
+| train/                  |              |
+|    approx_kl            | 0.0077100536 |
+|    clip_fraction        | 0.0854       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.07        |
+|    explained_variance   | 0.933        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0467      |
+|    n_updates            | 1170         |
+|    policy_gradient_loss | -0.00421     |
+|    std                  | 1.12         |
+|    value_loss           | 0.00132      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1915        |
+|    iterations           | 119         |
+|    time_elapsed         | 1018        |
+|    total_timesteps      | 1949696     |
+| train/                  |             |
+|    approx_kl            | 0.006848542 |
+|    clip_fraction        | 0.0674      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.959       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0335     |
+|    n_updates            | 1180        |
+|    policy_gradient_loss | -0.00229    |
+|    std                  | 1.13        |
+|    value_loss           | 0.00138     |
+-----------------------------------------
+Eval num_timesteps=1950000, episode_reward=29.72 +/- 38.42
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 29.7        |
+| time/                   |             |
+|    total_timesteps      | 1950000     |
+| train/                  |             |
+|    approx_kl            | 0.007300608 |
+|    clip_fraction        | 0.0824      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.977       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0358     |
+|    n_updates            | 1190        |
+|    policy_gradient_loss | -0.00364    |
+|    std                  | 1.12        |
+|    value_loss           | 0.00159     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1899    |
+|    iterations      | 120     |
+|    time_elapsed    | 1034    |
+|    total_timesteps | 1966080 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1906         |
+|    iterations           | 121          |
+|    time_elapsed         | 1040         |
+|    total_timesteps      | 1982464      |
+| train/                  |              |
+|    approx_kl            | 0.0072772675 |
+|    clip_fraction        | 0.0703       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.07        |
+|    explained_variance   | 0.882        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0357      |
+|    n_updates            | 1200         |
+|    policy_gradient_loss | -0.00163     |
+|    std                  | 1.13         |
+|    value_loss           | 0.00471      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1912        |
+|    iterations           | 122         |
+|    time_elapsed         | 1045        |
+|    total_timesteps      | 1998848     |
+| train/                  |             |
+|    approx_kl            | 0.007866079 |
+|    clip_fraction        | 0.0898      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.962       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0304     |
+|    n_updates            | 1210        |
+|    policy_gradient_loss | -0.0052     |
+|    std                  | 1.13        |
+|    value_loss           | 0.0014      |
+-----------------------------------------
+Eval num_timesteps=2000000, episode_reward=14.20 +/- 34.02
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 14.2         |
+| time/                   |              |
+|    total_timesteps      | 2000000      |
+| train/                  |              |
+|    approx_kl            | 0.0073383995 |
+|    clip_fraction        | 0.083        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.07        |
+|    explained_variance   | 0.95         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0369      |
+|    n_updates            | 1220         |
+|    policy_gradient_loss | -0.00296     |
+|    std                  | 1.12         |
+|    value_loss           | 0.00336      |
+------------------------------------------
+
+[Diag @ 2,000,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              12/20
+  COMPACT_CANT_DRIVE         8/20
+  action_mag mean=0.076 p10=0.007 p90=0.097 (0=stopped, 1=full speed)
+  min_flock_radius mean=5.33m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=1.01m best=0.16m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.40m best=6.50m
+  reward/step (mean): progress=+0.0041  alignment=+0.0263  pen_bonus=+0.0013  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1881    |
+|    iterations      | 123     |
+|    time_elapsed    | 1071    |
+|    total_timesteps | 2015232 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1887         |
+|    iterations           | 124          |
+|    time_elapsed         | 1076         |
+|    total_timesteps      | 2031616      |
+| train/                  |              |
+|    approx_kl            | 0.0060287267 |
+|    clip_fraction        | 0.0716       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.07        |
+|    explained_variance   | 0.902        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0402      |
+|    n_updates            | 1230         |
+|    policy_gradient_loss | -0.00308     |
+|    std                  | 1.13         |
+|    value_loss           | 0.00475      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1894         |
+|    iterations           | 125          |
+|    time_elapsed         | 1081         |
+|    total_timesteps      | 2048000      |
+| train/                  |              |
+|    approx_kl            | 0.0073304214 |
+|    clip_fraction        | 0.08         |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.95         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0436      |
+|    n_updates            | 1240         |
+|    policy_gradient_loss | -0.00373     |
+|    std                  | 1.13         |
+|    value_loss           | 0.00138      |
+------------------------------------------
+Eval num_timesteps=2050000, episode_reward=18.68 +/- 36.20
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 18.7         |
+| time/                   |              |
+|    total_timesteps      | 2050000      |
+| train/                  |              |
+|    approx_kl            | 0.0068036346 |
+|    clip_fraction        | 0.0768       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.07        |
+|    explained_variance   | 0.897        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0461      |
+|    n_updates            | 1250         |
+|    policy_gradient_loss | -0.00392     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0013       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1880    |
+|    iterations      | 126     |
+|    time_elapsed    | 1097    |
+|    total_timesteps | 2064384 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1886        |
+|    iterations           | 127         |
+|    time_elapsed         | 1102        |
+|    total_timesteps      | 2080768     |
+| train/                  |             |
+|    approx_kl            | 0.006960577 |
+|    clip_fraction        | 0.0689      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.917       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0302     |
+|    n_updates            | 1260        |
+|    policy_gradient_loss | -0.00248    |
+|    std                  | 1.12        |
+|    value_loss           | 0.00841     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1892        |
+|    iterations           | 128         |
+|    time_elapsed         | 1108        |
+|    total_timesteps      | 2097152     |
+| train/                  |             |
+|    approx_kl            | 0.007300884 |
+|    clip_fraction        | 0.0705      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.09       |
+|    explained_variance   | 0.915       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0338     |
+|    n_updates            | 1270        |
+|    policy_gradient_loss | -0.00351    |
+|    std                  | 1.14        |
+|    value_loss           | 0.00336     |
+-----------------------------------------
+Eval num_timesteps=2100000, episode_reward=37.33 +/- 41.91
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 37.3        |
+| time/                   |             |
+|    total_timesteps      | 2100000     |
+| train/                  |             |
+|    approx_kl            | 0.007571588 |
+|    clip_fraction        | 0.076       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.1        |
+|    explained_variance   | 0.907       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0278     |
+|    n_updates            | 1280        |
+|    policy_gradient_loss | -0.00336    |
+|    std                  | 1.14        |
+|    value_loss           | 0.00228     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1878    |
+|    iterations      | 129     |
+|    time_elapsed    | 1124    |
+|    total_timesteps | 2113536 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1884        |
+|    iterations           | 130         |
+|    time_elapsed         | 1130        |
+|    total_timesteps      | 2129920     |
+| train/                  |             |
+|    approx_kl            | 0.007885255 |
+|    clip_fraction        | 0.088       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.11       |
+|    explained_variance   | 0.939       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0388     |
+|    n_updates            | 1290        |
+|    policy_gradient_loss | -0.00498    |
+|    std                  | 1.15        |
+|    value_loss           | 0.00231     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1890         |
+|    iterations           | 131          |
+|    time_elapsed         | 1135         |
+|    total_timesteps      | 2146304      |
+| train/                  |              |
+|    approx_kl            | 0.0073760273 |
+|    clip_fraction        | 0.0769       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.11        |
+|    explained_variance   | 0.955        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0277      |
+|    n_updates            | 1300         |
+|    policy_gradient_loss | -0.00306     |
+|    std                  | 1.15         |
+|    value_loss           | 0.00294      |
+------------------------------------------
+Eval num_timesteps=2150000, episode_reward=31.84 +/- 38.92
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 31.8        |
+| time/                   |             |
+|    total_timesteps      | 2150000     |
+| train/                  |             |
+|    approx_kl            | 0.006736047 |
+|    clip_fraction        | 0.0685      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.12       |
+|    explained_variance   | 0.913       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0302     |
+|    n_updates            | 1310        |
+|    policy_gradient_loss | -0.0021     |
+|    std                  | 1.16        |
+|    value_loss           | 0.00422     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1872    |
+|    iterations      | 132     |
+|    time_elapsed    | 1155    |
+|    total_timesteps | 2162688 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1878        |
+|    iterations           | 133         |
+|    time_elapsed         | 1160        |
+|    total_timesteps      | 2179072     |
+| train/                  |             |
+|    approx_kl            | 0.006166819 |
+|    clip_fraction        | 0.0668      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.13       |
+|    explained_variance   | 0.956       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0473     |
+|    n_updates            | 1320        |
+|    policy_gradient_loss | -0.00364    |
+|    std                  | 1.16        |
+|    value_loss           | 0.00158     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1884         |
+|    iterations           | 134          |
+|    time_elapsed         | 1165         |
+|    total_timesteps      | 2195456      |
+| train/                  |              |
+|    approx_kl            | 0.0075986157 |
+|    clip_fraction        | 0.0769       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.14        |
+|    explained_variance   | 0.966        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0317      |
+|    n_updates            | 1330         |
+|    policy_gradient_loss | -0.00398     |
+|    std                  | 1.17         |
+|    value_loss           | 0.00307      |
+------------------------------------------
+Eval num_timesteps=2200000, episode_reward=26.98 +/- 37.84
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 27          |
+| time/                   |             |
+|    total_timesteps      | 2200000     |
+| train/                  |             |
+|    approx_kl            | 0.008170303 |
+|    clip_fraction        | 0.0981      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.14       |
+|    explained_variance   | 0.964       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0326     |
+|    n_updates            | 1340        |
+|    policy_gradient_loss | -0.00415    |
+|    std                  | 1.16        |
+|    value_loss           | 0.00349     |
+-----------------------------------------
+
+[Diag @ 2,200,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              16/20
+  COMPACT_CANT_DRIVE         4/20
+  action_mag mean=0.067 p10=0.003 p90=0.067 (0=stopped, 1=full speed)
+  min_flock_radius mean=7.25m best=1.61m  (target <5m to compact)
+  min_dog_to_com   mean=0.97m best=0.20m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.28m best=5.53m
+  reward/step (mean): progress=+0.0007  alignment=+0.0353  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1832    |
+|    iterations      | 135     |
+|    time_elapsed    | 1206    |
+|    total_timesteps | 2211840 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1805        |
+|    iterations           | 136         |
+|    time_elapsed         | 1234        |
+|    total_timesteps      | 2228224     |
+| train/                  |             |
+|    approx_kl            | 0.006131858 |
+|    clip_fraction        | 0.067       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.13       |
+|    explained_variance   | 0.927       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0328     |
+|    n_updates            | 1350        |
+|    policy_gradient_loss | -0.0022     |
+|    std                  | 1.16        |
+|    value_loss           | 0.000981    |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1811         |
+|    iterations           | 137          |
+|    time_elapsed         | 1239         |
+|    total_timesteps      | 2244608      |
+| train/                  |              |
+|    approx_kl            | 0.0071705403 |
+|    clip_fraction        | 0.0699       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.12        |
+|    explained_variance   | 0.913        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0391      |
+|    n_updates            | 1360         |
+|    policy_gradient_loss | -0.0032      |
+|    std                  | 1.15         |
+|    value_loss           | 0.00639      |
+------------------------------------------
+Eval num_timesteps=2250000, episode_reward=28.55 +/- 29.67
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 28.5        |
+| time/                   |             |
+|    total_timesteps      | 2250000     |
+| train/                  |             |
+|    approx_kl            | 0.007929602 |
+|    clip_fraction        | 0.0812      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.14       |
+|    explained_variance   | 0.933       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0592     |
+|    n_updates            | 1370        |
+|    policy_gradient_loss | -0.00434    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00337     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1800    |
+|    iterations      | 138     |
+|    time_elapsed    | 1255    |
+|    total_timesteps | 2260992 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1806         |
+|    iterations           | 139          |
+|    time_elapsed         | 1260         |
+|    total_timesteps      | 2277376      |
+| train/                  |              |
+|    approx_kl            | 0.0062256474 |
+|    clip_fraction        | 0.0592       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.935        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0368      |
+|    n_updates            | 1380         |
+|    policy_gradient_loss | -0.00242     |
+|    std                  | 1.17         |
+|    value_loss           | 0.00787      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1812         |
+|    iterations           | 140          |
+|    time_elapsed         | 1265         |
+|    total_timesteps      | 2293760      |
+| train/                  |              |
+|    approx_kl            | 0.0075241774 |
+|    clip_fraction        | 0.0885       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.14        |
+|    explained_variance   | 0.948        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0385      |
+|    n_updates            | 1390         |
+|    policy_gradient_loss | -0.00346     |
+|    std                  | 1.16         |
+|    value_loss           | 0.00172      |
+------------------------------------------
+Eval num_timesteps=2300000, episode_reward=43.34 +/- 34.73
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 43.3         |
+| time/                   |              |
+|    total_timesteps      | 2300000      |
+| train/                  |              |
+|    approx_kl            | 0.0073855575 |
+|    clip_fraction        | 0.0753       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.12        |
+|    explained_variance   | 0.911        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0377      |
+|    n_updates            | 1400         |
+|    policy_gradient_loss | -0.0034      |
+|    std                  | 1.15         |
+|    value_loss           | 0.00645      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1801    |
+|    iterations      | 141     |
+|    time_elapsed    | 1282    |
+|    total_timesteps | 2310144 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1806        |
+|    iterations           | 142         |
+|    time_elapsed         | 1287        |
+|    total_timesteps      | 2326528     |
+| train/                  |             |
+|    approx_kl            | 0.007232903 |
+|    clip_fraction        | 0.0845      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.13       |
+|    explained_variance   | 0.956       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0346     |
+|    n_updates            | 1410        |
+|    policy_gradient_loss | -0.003      |
+|    std                  | 1.16        |
+|    value_loss           | 0.00134     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1812        |
+|    iterations           | 143         |
+|    time_elapsed         | 1292        |
+|    total_timesteps      | 2342912     |
+| train/                  |             |
+|    approx_kl            | 0.007283367 |
+|    clip_fraction        | 0.0785      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.14       |
+|    explained_variance   | 0.913       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0306     |
+|    n_updates            | 1420        |
+|    policy_gradient_loss | -0.00368    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00385     |
+-----------------------------------------
+Eval num_timesteps=2350000, episode_reward=33.49 +/- 34.79
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 33.5        |
+| time/                   |             |
+|    total_timesteps      | 2350000     |
+| train/                  |             |
+|    approx_kl            | 0.006632698 |
+|    clip_fraction        | 0.0647      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.934       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0469     |
+|    n_updates            | 1430        |
+|    policy_gradient_loss | -0.00327    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00793     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1800    |
+|    iterations      | 144     |
+|    time_elapsed    | 1310    |
+|    total_timesteps | 2359296 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1805        |
+|    iterations           | 145         |
+|    time_elapsed         | 1315        |
+|    total_timesteps      | 2375680     |
+| train/                  |             |
+|    approx_kl            | 0.008364577 |
+|    clip_fraction        | 0.089       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.957       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0464     |
+|    n_updates            | 1440        |
+|    policy_gradient_loss | -0.00453    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00507     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1810        |
+|    iterations           | 146         |
+|    time_elapsed         | 1321        |
+|    total_timesteps      | 2392064     |
+| train/                  |             |
+|    approx_kl            | 0.007854694 |
+|    clip_fraction        | 0.0927      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.953       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0436     |
+|    n_updates            | 1450        |
+|    policy_gradient_loss | -0.00519    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00289     |
+-----------------------------------------
+Eval num_timesteps=2400000, episode_reward=34.64 +/- 37.27
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 34.6         |
+| time/                   |              |
+|    total_timesteps      | 2400000      |
+| train/                  |              |
+|    approx_kl            | 0.0076201856 |
+|    clip_fraction        | 0.0844       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.945        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0431      |
+|    n_updates            | 1460         |
+|    policy_gradient_loss | -0.00554     |
+|    std                  | 1.17         |
+|    value_loss           | 0.00196      |
+------------------------------------------
+
+[Diag @ 2,400,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              15/20
+  COMPACT_CANT_DRIVE         5/20
+  action_mag mean=0.058 p10=0.006 p90=0.053 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.68m best=0.96m  (target <5m to compact)
+  min_dog_to_com   mean=0.92m best=0.16m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.18m best=5.62m
+  reward/step (mean): progress=+0.0034  alignment=+0.0352  pen_bonus=+0.0010  step_cost=-0.0200  complete=+0.0000
+
+[Curriculum] leaving stage n_sheep=3 after 800,000 steps | training success rate (last 100 eps) = 0%
+[Curriculum] → 4 sheep at step 2,400,000
+
+--------------------------------
+| time/              |         |
+|    fps             | 1788    |
+|    iterations      | 147     |
+|    time_elapsed    | 1346    |
+|    total_timesteps | 2408448 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1794        |
+|    iterations           | 148         |
+|    time_elapsed         | 1351        |
+|    total_timesteps      | 2424832     |
+| train/                  |             |
+|    approx_kl            | 0.006801254 |
+|    clip_fraction        | 0.0797      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.16       |
+|    explained_variance   | 0.922       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0313     |
+|    n_updates            | 1470        |
+|    policy_gradient_loss | -0.00418    |
+|    std                  | 1.18        |
+|    value_loss           | 0.00724     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1798        |
+|    iterations           | 149         |
+|    time_elapsed         | 1357        |
+|    total_timesteps      | 2441216     |
+| train/                  |             |
+|    approx_kl            | 0.007604986 |
+|    clip_fraction        | 0.0758      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.18       |
+|    explained_variance   | 0.937       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0354     |
+|    n_updates            | 1480        |
+|    policy_gradient_loss | -0.00189    |
+|    std                  | 1.19        |
+|    value_loss           | 0.00591     |
+-----------------------------------------
+Eval num_timesteps=2450000, episode_reward=27.82 +/- 47.76
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 27.8         |
+| time/                   |              |
+|    total_timesteps      | 2450000      |
+| train/                  |              |
+|    approx_kl            | 0.0070674624 |
+|    clip_fraction        | 0.0749       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.2         |
+|    explained_variance   | 0.893        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0327      |
+|    n_updates            | 1490         |
+|    policy_gradient_loss | -0.00322     |
+|    std                  | 1.2          |
+|    value_loss           | 0.0105       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1788    |
+|    iterations      | 150     |
+|    time_elapsed    | 1374    |
+|    total_timesteps | 2457600 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1792        |
+|    iterations           | 151         |
+|    time_elapsed         | 1380        |
+|    total_timesteps      | 2473984     |
+| train/                  |             |
+|    approx_kl            | 0.008372683 |
+|    clip_fraction        | 0.0874      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.21       |
+|    explained_variance   | 0.932       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0381     |
+|    n_updates            | 1500        |
+|    policy_gradient_loss | -0.00471    |
+|    std                  | 1.21        |
+|    value_loss           | 0.00563     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1796        |
+|    iterations           | 152         |
+|    time_elapsed         | 1385        |
+|    total_timesteps      | 2490368     |
+| train/                  |             |
+|    approx_kl            | 0.007761459 |
+|    clip_fraction        | 0.0794      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.23       |
+|    explained_variance   | 0.929       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0345     |
+|    n_updates            | 1510        |
+|    policy_gradient_loss | -0.00402    |
+|    std                  | 1.22        |
+|    value_loss           | 0.00736     |
+-----------------------------------------
+Eval num_timesteps=2500000, episode_reward=25.79 +/- 28.60
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 25.8         |
+| time/                   |              |
+|    total_timesteps      | 2500000      |
+| train/                  |              |
+|    approx_kl            | 0.0070840344 |
+|    clip_fraction        | 0.0711       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.22        |
+|    explained_variance   | 0.9          |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0322      |
+|    n_updates            | 1520         |
+|    policy_gradient_loss | -0.00397     |
+|    std                  | 1.21         |
+|    value_loss           | 0.00517      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1785    |
+|    iterations      | 153     |
+|    time_elapsed    | 1404    |
+|    total_timesteps | 2506752 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1788         |
+|    iterations           | 154          |
+|    time_elapsed         | 1410         |
+|    total_timesteps      | 2523136      |
+| train/                  |              |
+|    approx_kl            | 0.0062630484 |
+|    clip_fraction        | 0.069        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.22        |
+|    explained_variance   | 0.93         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0363      |
+|    n_updates            | 1530         |
+|    policy_gradient_loss | -0.00382     |
+|    std                  | 1.21         |
+|    value_loss           | 0.00546      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1792        |
+|    iterations           | 155         |
+|    time_elapsed         | 1416        |
+|    total_timesteps      | 2539520     |
+| train/                  |             |
+|    approx_kl            | 0.007609036 |
+|    clip_fraction        | 0.0815      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.23       |
+|    explained_variance   | 0.832       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0404     |
+|    n_updates            | 1540        |
+|    policy_gradient_loss | -0.00347    |
+|    std                  | 1.22        |
+|    value_loss           | 0.00902     |
+-----------------------------------------
+Eval num_timesteps=2550000, episode_reward=26.76 +/- 38.76
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 26.8         |
+| time/                   |              |
+|    total_timesteps      | 2550000      |
+| train/                  |              |
+|    approx_kl            | 0.0070117847 |
+|    clip_fraction        | 0.0808       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.23        |
+|    explained_variance   | 0.863        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0357      |
+|    n_updates            | 1550         |
+|    policy_gradient_loss | -0.00279     |
+|    std                  | 1.22         |
+|    value_loss           | 0.0114       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1780    |
+|    iterations      | 156     |
+|    time_elapsed    | 1435    |
+|    total_timesteps | 2555904 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1786         |
+|    iterations           | 157          |
+|    time_elapsed         | 1440         |
+|    total_timesteps      | 2572288      |
+| train/                  |              |
+|    approx_kl            | 0.0070258966 |
+|    clip_fraction        | 0.0817       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.23        |
+|    explained_variance   | 0.941        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.039       |
+|    n_updates            | 1560         |
+|    policy_gradient_loss | -0.00488     |
+|    std                  | 1.22         |
+|    value_loss           | 0.00696      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1791        |
+|    iterations           | 158         |
+|    time_elapsed         | 1445        |
+|    total_timesteps      | 2588672     |
+| train/                  |             |
+|    approx_kl            | 0.007600763 |
+|    clip_fraction        | 0.0842      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.23       |
+|    explained_variance   | 0.912       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0363     |
+|    n_updates            | 1570        |
+|    policy_gradient_loss | -0.00544    |
+|    std                  | 1.22        |
+|    value_loss           | 0.00556     |
+-----------------------------------------
+Eval num_timesteps=2600000, episode_reward=19.53 +/- 46.34
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | 19.5       |
+| time/                   |            |
+|    total_timesteps      | 2600000    |
+| train/                  |            |
+|    approx_kl            | 0.00714178 |
+|    clip_fraction        | 0.0783     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.23      |
+|    explained_variance   | 0.92       |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0352    |
+|    n_updates            | 1580       |
+|    policy_gradient_loss | -0.00468   |
+|    std                  | 1.22       |
+|    value_loss           | 0.00364    |
+----------------------------------------
+
+[Diag @ 2,600,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              20/20
+  action_mag mean=0.061 p10=0.006 p90=0.047 (0=stopped, 1=full speed)
+  min_flock_radius mean=7.84m best=5.75m  (target <5m to compact)
+  min_dog_to_com   mean=0.66m best=0.09m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.60m best=6.52m
+  reward/step (mean): progress=-0.0028  alignment=+0.0337  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1768    |
+|    iterations      | 159     |
+|    time_elapsed    | 1473    |
+|    total_timesteps | 2605056 |
+--------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1771       |
+|    iterations           | 160        |
+|    time_elapsed         | 1479       |
+|    total_timesteps      | 2621440    |
+| train/                  |            |
+|    approx_kl            | 0.00681924 |
+|    clip_fraction        | 0.0779     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.23      |
+|    explained_variance   | 0.946      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0409    |
+|    n_updates            | 1590       |
+|    policy_gradient_loss | -0.00346   |
+|    std                  | 1.22       |
+|    value_loss           | 0.00377    |
+----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1775        |
+|    iterations           | 161         |
+|    time_elapsed         | 1485        |
+|    total_timesteps      | 2637824     |
+| train/                  |             |
+|    approx_kl            | 0.008016385 |
+|    clip_fraction        | 0.0888      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.24       |
+|    explained_variance   | 0.931       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0311     |
+|    n_updates            | 1600        |
+|    policy_gradient_loss | -0.00526    |
+|    std                  | 1.22        |
+|    value_loss           | 0.00681     |
+-----------------------------------------
+Eval num_timesteps=2650000, episode_reward=28.98 +/- 40.07
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 29          |
+| time/                   |             |
+|    total_timesteps      | 2650000     |
+| train/                  |             |
+|    approx_kl            | 0.006836592 |
+|    clip_fraction        | 0.0778      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.24       |
+|    explained_variance   | 0.9         |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0304     |
+|    n_updates            | 1610        |
+|    policy_gradient_loss | -0.00255    |
+|    std                  | 1.23        |
+|    value_loss           | 0.00574     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1765    |
+|    iterations      | 162     |
+|    time_elapsed    | 1503    |
+|    total_timesteps | 2654208 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1770         |
+|    iterations           | 163          |
+|    time_elapsed         | 1508         |
+|    total_timesteps      | 2670592      |
+| train/                  |              |
+|    approx_kl            | 0.0072684484 |
+|    clip_fraction        | 0.0764       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.23        |
+|    explained_variance   | 0.948        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0295      |
+|    n_updates            | 1620         |
+|    policy_gradient_loss | -0.00325     |
+|    std                  | 1.22         |
+|    value_loss           | 0.00254      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1775        |
+|    iterations           | 164         |
+|    time_elapsed         | 1513        |
+|    total_timesteps      | 2686976     |
+| train/                  |             |
+|    approx_kl            | 0.007457966 |
+|    clip_fraction        | 0.0845      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.23       |
+|    explained_variance   | 0.919       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0473     |
+|    n_updates            | 1630        |
+|    policy_gradient_loss | -0.00505    |
+|    std                  | 1.22        |
+|    value_loss           | 0.004       |
+-----------------------------------------
+Eval num_timesteps=2700000, episode_reward=33.96 +/- 32.11
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | 34         |
+| time/                   |            |
+|    total_timesteps      | 2700000    |
+| train/                  |            |
+|    approx_kl            | 0.00796853 |
+|    clip_fraction        | 0.0782     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.22      |
+|    explained_variance   | 0.959      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0336    |
+|    n_updates            | 1640       |
+|    policy_gradient_loss | -0.00288   |
+|    std                  | 1.21       |
+|    value_loss           | 0.00235    |
+----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1761    |
+|    iterations      | 165     |
+|    time_elapsed    | 1534    |
+|    total_timesteps | 2703360 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1764         |
+|    iterations           | 166          |
+|    time_elapsed         | 1541         |
+|    total_timesteps      | 2719744      |
+| train/                  |              |
+|    approx_kl            | 0.0073700505 |
+|    clip_fraction        | 0.0857       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.21        |
+|    explained_variance   | 0.875        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0255      |
+|    n_updates            | 1650         |
+|    policy_gradient_loss | -0.00495     |
+|    std                  | 1.21         |
+|    value_loss           | 0.00846      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1768        |
+|    iterations           | 167         |
+|    time_elapsed         | 1546        |
+|    total_timesteps      | 2736128     |
+| train/                  |             |
+|    approx_kl            | 0.007965144 |
+|    clip_fraction        | 0.0858      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.22       |
+|    explained_variance   | 0.898       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0451     |
+|    n_updates            | 1660        |
+|    policy_gradient_loss | -0.00518    |
+|    std                  | 1.22        |
+|    value_loss           | 0.00395     |
+-----------------------------------------
+Eval num_timesteps=2750000, episode_reward=23.58 +/- 34.37
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 23.6         |
+| time/                   |              |
+|    total_timesteps      | 2750000      |
+| train/                  |              |
+|    approx_kl            | 0.0065765316 |
+|    clip_fraction        | 0.0682       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.23        |
+|    explained_variance   | 0.934        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0429      |
+|    n_updates            | 1670         |
+|    policy_gradient_loss | -0.00379     |
+|    std                  | 1.23         |
+|    value_loss           | 0.00677      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1756    |
+|    iterations      | 168     |
+|    time_elapsed    | 1566    |
+|    total_timesteps | 2752512 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1761         |
+|    iterations           | 169          |
+|    time_elapsed         | 1571         |
+|    total_timesteps      | 2768896      |
+| train/                  |              |
+|    approx_kl            | 0.0066236854 |
+|    clip_fraction        | 0.0619       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.25        |
+|    explained_variance   | 0.935        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0365      |
+|    n_updates            | 1680         |
+|    policy_gradient_loss | -0.00239     |
+|    std                  | 1.23         |
+|    value_loss           | 0.00922      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1766        |
+|    iterations           | 170         |
+|    time_elapsed         | 1576        |
+|    total_timesteps      | 2785280     |
+| train/                  |             |
+|    approx_kl            | 0.007887056 |
+|    clip_fraction        | 0.0836      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.25       |
+|    explained_variance   | 0.899       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0353     |
+|    n_updates            | 1690        |
+|    policy_gradient_loss | -0.0053     |
+|    std                  | 1.24        |
+|    value_loss           | 0.00635     |
+-----------------------------------------
+Eval num_timesteps=2800000, episode_reward=33.57 +/- 35.56
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 33.6         |
+| time/                   |              |
+|    total_timesteps      | 2800000      |
+| train/                  |              |
+|    approx_kl            | 0.0067548407 |
+|    clip_fraction        | 0.0804       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.25        |
+|    explained_variance   | 0.887        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0408      |
+|    n_updates            | 1700         |
+|    policy_gradient_loss | -0.00444     |
+|    std                  | 1.24         |
+|    value_loss           | 0.0101       |
+------------------------------------------
+
+[Diag @ 2,800,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              19/20
+  COMPACT_CANT_DRIVE         1/20
+  action_mag mean=0.050 p10=0.003 p90=0.039 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.42m best=4.84m  (target <5m to compact)
+  min_dog_to_com   mean=0.73m best=0.12m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.29m best=7.66m
+  reward/step (mean): progress=-0.0027  alignment=+0.0365  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1746    |
+|    iterations      | 171     |
+|    time_elapsed    | 1604    |
+|    total_timesteps | 2801664 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1750         |
+|    iterations           | 172          |
+|    time_elapsed         | 1609         |
+|    total_timesteps      | 2818048      |
+| train/                  |              |
+|    approx_kl            | 0.0069283517 |
+|    clip_fraction        | 0.0847       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.24        |
+|    explained_variance   | 0.899        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0476      |
+|    n_updates            | 1710         |
+|    policy_gradient_loss | -0.00499     |
+|    std                  | 1.23         |
+|    value_loss           | 0.00708      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1754        |
+|    iterations           | 173         |
+|    time_elapsed         | 1615        |
+|    total_timesteps      | 2834432     |
+| train/                  |             |
+|    approx_kl            | 0.008303071 |
+|    clip_fraction        | 0.082       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.25       |
+|    explained_variance   | 0.911       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0484     |
+|    n_updates            | 1720        |
+|    policy_gradient_loss | -0.00388    |
+|    std                  | 1.23        |
+|    value_loss           | 0.0061      |
+-----------------------------------------
+Eval num_timesteps=2850000, episode_reward=34.42 +/- 32.01
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 34.4         |
+| time/                   |              |
+|    total_timesteps      | 2850000      |
+| train/                  |              |
+|    approx_kl            | 0.0063731004 |
+|    clip_fraction        | 0.069        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.26        |
+|    explained_variance   | 0.951        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.029       |
+|    n_updates            | 1730         |
+|    policy_gradient_loss | -0.00384     |
+|    std                  | 1.25         |
+|    value_loss           | 0.00528      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1745    |
+|    iterations      | 174     |
+|    time_elapsed    | 1633    |
+|    total_timesteps | 2850816 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1749        |
+|    iterations           | 175         |
+|    time_elapsed         | 1638        |
+|    total_timesteps      | 2867200     |
+| train/                  |             |
+|    approx_kl            | 0.008163793 |
+|    clip_fraction        | 0.0812      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.28       |
+|    explained_variance   | 0.935       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0374     |
+|    n_updates            | 1740        |
+|    policy_gradient_loss | -0.0032     |
+|    std                  | 1.26        |
+|    value_loss           | 0.00432     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1754         |
+|    iterations           | 176          |
+|    time_elapsed         | 1643         |
+|    total_timesteps      | 2883584      |
+| train/                  |              |
+|    approx_kl            | 0.0063439216 |
+|    clip_fraction        | 0.0743       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.29        |
+|    explained_variance   | 0.89         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0372      |
+|    n_updates            | 1750         |
+|    policy_gradient_loss | -0.00403     |
+|    std                  | 1.26         |
+|    value_loss           | 0.00654      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1759        |
+|    iterations           | 177         |
+|    time_elapsed         | 1648        |
+|    total_timesteps      | 2899968     |
+| train/                  |             |
+|    approx_kl            | 0.006967159 |
+|    clip_fraction        | 0.0761      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.29       |
+|    explained_variance   | 0.929       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0462     |
+|    n_updates            | 1760        |
+|    policy_gradient_loss | -0.00382    |
+|    std                  | 1.26        |
+|    value_loss           | 0.00381     |
+-----------------------------------------
+Eval num_timesteps=2900000, episode_reward=40.78 +/- 43.99
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 40.8         |
+| time/                   |              |
+|    total_timesteps      | 2900000      |
+| train/                  |              |
+|    approx_kl            | 0.0075211767 |
+|    clip_fraction        | 0.0727       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.29        |
+|    explained_variance   | 0.955        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0178      |
+|    n_updates            | 1770         |
+|    policy_gradient_loss | -0.00285     |
+|    std                  | 1.27         |
+|    value_loss           | 0.00798      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1751    |
+|    iterations      | 178     |
+|    time_elapsed    | 1664    |
+|    total_timesteps | 2916352 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1756        |
+|    iterations           | 179         |
+|    time_elapsed         | 1669        |
+|    total_timesteps      | 2932736     |
+| train/                  |             |
+|    approx_kl            | 0.006763531 |
+|    clip_fraction        | 0.0678      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.3        |
+|    explained_variance   | 0.91        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0349     |
+|    n_updates            | 1780        |
+|    policy_gradient_loss | -0.00361    |
+|    std                  | 1.27        |
+|    value_loss           | 0.00528     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1760         |
+|    iterations           | 180          |
+|    time_elapsed         | 1675         |
+|    total_timesteps      | 2949120      |
+| train/                  |              |
+|    approx_kl            | 0.0067441636 |
+|    clip_fraction        | 0.0732       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.3         |
+|    explained_variance   | 0.888        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0261      |
+|    n_updates            | 1790         |
+|    policy_gradient_loss | -0.00291     |
+|    std                  | 1.27         |
+|    value_loss           | 0.00582      |
+------------------------------------------
+Eval num_timesteps=2950000, episode_reward=48.39 +/- 31.91
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 48.4         |
+| time/                   |              |
+|    total_timesteps      | 2950000      |
+| train/                  |              |
+|    approx_kl            | 0.0076025603 |
+|    clip_fraction        | 0.0858       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.31        |
+|    explained_variance   | 0.92         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0394      |
+|    n_updates            | 1800         |
+|    policy_gradient_loss | -0.00443     |
+|    std                  | 1.27         |
+|    value_loss           | 0.00647      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1751    |
+|    iterations      | 181     |
+|    time_elapsed    | 1693    |
+|    total_timesteps | 2965504 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1754        |
+|    iterations           | 182         |
+|    time_elapsed         | 1699        |
+|    total_timesteps      | 2981888     |
+| train/                  |             |
+|    approx_kl            | 0.008041672 |
+|    clip_fraction        | 0.0795      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.3        |
+|    explained_variance   | 0.939       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0344     |
+|    n_updates            | 1810        |
+|    policy_gradient_loss | -0.00456    |
+|    std                  | 1.27        |
+|    value_loss           | 0.00404     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1758         |
+|    iterations           | 183          |
+|    time_elapsed         | 1704         |
+|    total_timesteps      | 2998272      |
+| train/                  |              |
+|    approx_kl            | 0.0066829836 |
+|    clip_fraction        | 0.0712       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.32        |
+|    explained_variance   | 0.921        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0361      |
+|    n_updates            | 1820         |
+|    policy_gradient_loss | -0.00379     |
+|    std                  | 1.28         |
+|    value_loss           | 0.00818      |
+------------------------------------------
+Eval num_timesteps=3000000, episode_reward=33.06 +/- 47.57
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 33.1        |
+| time/                   |             |
+|    total_timesteps      | 3000000     |
+| train/                  |             |
+|    approx_kl            | 0.006152373 |
+|    clip_fraction        | 0.0633      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.33       |
+|    explained_variance   | 0.912       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0316     |
+|    n_updates            | 1830        |
+|    policy_gradient_loss | -0.00335    |
+|    std                  | 1.29        |
+|    value_loss           | 0.00404     |
+-----------------------------------------
+
+[Diag @ 3,000,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              20/20
+  action_mag mean=0.049 p10=0.005 p90=0.046 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.21m best=5.29m  (target <5m to compact)
+  min_dog_to_com   mean=0.76m best=0.22m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.62m best=4.77m
+  reward/step (mean): progress=+0.0089  alignment=+0.0386  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1740    |
+|    iterations      | 184     |
+|    time_elapsed    | 1731    |
+|    total_timesteps | 3014656 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1745        |
+|    iterations           | 185         |
+|    time_elapsed         | 1736        |
+|    total_timesteps      | 3031040     |
+| train/                  |             |
+|    approx_kl            | 0.006385569 |
+|    clip_fraction        | 0.0703      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.34       |
+|    explained_variance   | 0.919       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0313     |
+|    n_updates            | 1840        |
+|    policy_gradient_loss | -0.00274    |
+|    std                  | 1.3         |
+|    value_loss           | 0.00503     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1748        |
+|    iterations           | 186         |
+|    time_elapsed         | 1743        |
+|    total_timesteps      | 3047424     |
+| train/                  |             |
+|    approx_kl            | 0.007695101 |
+|    clip_fraction        | 0.0784      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.36       |
+|    explained_variance   | 0.935       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0244     |
+|    n_updates            | 1850        |
+|    policy_gradient_loss | -0.00342    |
+|    std                  | 1.31        |
+|    value_loss           | 0.0051      |
+-----------------------------------------
+Eval num_timesteps=3050000, episode_reward=45.25 +/- 31.57
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 45.2         |
+| time/                   |              |
+|    total_timesteps      | 3050000      |
+| train/                  |              |
+|    approx_kl            | 0.0067556566 |
+|    clip_fraction        | 0.082        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.37        |
+|    explained_variance   | 0.868        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0349      |
+|    n_updates            | 1860         |
+|    policy_gradient_loss | -0.00353     |
+|    std                  | 1.31         |
+|    value_loss           | 0.00931      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1738    |
+|    iterations      | 187     |
+|    time_elapsed    | 1762    |
+|    total_timesteps | 3063808 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1741        |
+|    iterations           | 188         |
+|    time_elapsed         | 1768        |
+|    total_timesteps      | 3080192     |
+| train/                  |             |
+|    approx_kl            | 0.008263266 |
+|    clip_fraction        | 0.0792      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.36       |
+|    explained_variance   | 0.924       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0411     |
+|    n_updates            | 1870        |
+|    policy_gradient_loss | -0.00382    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00429     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1746        |
+|    iterations           | 189         |
+|    time_elapsed         | 1773        |
+|    total_timesteps      | 3096576     |
+| train/                  |             |
+|    approx_kl            | 0.008488305 |
+|    clip_fraction        | 0.08        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.925       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0292     |
+|    n_updates            | 1880        |
+|    policy_gradient_loss | -0.00441    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00748     |
+-----------------------------------------
+Eval num_timesteps=3100000, episode_reward=30.63 +/- 33.70
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 30.6         |
+| time/                   |              |
+|    total_timesteps      | 3100000      |
+| train/                  |              |
+|    approx_kl            | 0.0065515246 |
+|    clip_fraction        | 0.0736       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.35        |
+|    explained_variance   | 0.932        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.00192      |
+|    n_updates            | 1890         |
+|    policy_gradient_loss | -0.00334     |
+|    std                  | 1.3          |
+|    value_loss           | 0.00902      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1737    |
+|    iterations      | 190     |
+|    time_elapsed    | 1791    |
+|    total_timesteps | 3112960 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1741         |
+|    iterations           | 191          |
+|    time_elapsed         | 1796         |
+|    total_timesteps      | 3129344      |
+| train/                  |              |
+|    approx_kl            | 0.0068135276 |
+|    clip_fraction        | 0.0721       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.35        |
+|    explained_variance   | 0.933        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.036       |
+|    n_updates            | 1900         |
+|    policy_gradient_loss | -0.00403     |
+|    std                  | 1.29         |
+|    value_loss           | 0.00616      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1731         |
+|    iterations           | 192          |
+|    time_elapsed         | 1817         |
+|    total_timesteps      | 3145728      |
+| train/                  |              |
+|    approx_kl            | 0.0061126407 |
+|    clip_fraction        | 0.0615       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.35        |
+|    explained_variance   | 0.921        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0355      |
+|    n_updates            | 1910         |
+|    policy_gradient_loss | -0.00318     |
+|    std                  | 1.3          |
+|    value_loss           | 0.0104       |
+------------------------------------------
+Eval num_timesteps=3150000, episode_reward=33.88 +/- 34.31
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 33.9        |
+| time/                   |             |
+|    total_timesteps      | 3150000     |
+| train/                  |             |
+|    approx_kl            | 0.007734685 |
+|    clip_fraction        | 0.0778      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.35       |
+|    explained_variance   | 0.899       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0323     |
+|    n_updates            | 1920        |
+|    policy_gradient_loss | -0.00432    |
+|    std                  | 1.3         |
+|    value_loss           | 0.0091      |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1714    |
+|    iterations      | 193     |
+|    time_elapsed    | 1844    |
+|    total_timesteps | 3162112 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1717        |
+|    iterations           | 194         |
+|    time_elapsed         | 1850        |
+|    total_timesteps      | 3178496     |
+| train/                  |             |
+|    approx_kl            | 0.007997783 |
+|    clip_fraction        | 0.0782      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.35       |
+|    explained_variance   | 0.91        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0525     |
+|    n_updates            | 1930        |
+|    policy_gradient_loss | -0.00523    |
+|    std                  | 1.3         |
+|    value_loss           | 0.00283     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1720        |
+|    iterations           | 195         |
+|    time_elapsed         | 1857        |
+|    total_timesteps      | 3194880     |
+| train/                  |             |
+|    approx_kl            | 0.007701534 |
+|    clip_fraction        | 0.0712      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.34       |
+|    explained_variance   | 0.927       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0367     |
+|    n_updates            | 1940        |
+|    policy_gradient_loss | -0.00288    |
+|    std                  | 1.3         |
+|    value_loss           | 0.0126      |
+-----------------------------------------
+Eval num_timesteps=3200000, episode_reward=46.55 +/- 34.01
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 46.6        |
+| time/                   |             |
+|    total_timesteps      | 3200000     |
+| train/                  |             |
+|    approx_kl            | 0.006747664 |
+|    clip_fraction        | 0.0766      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.35       |
+|    explained_variance   | 0.93        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0411     |
+|    n_updates            | 1950        |
+|    policy_gradient_loss | -0.00404    |
+|    std                  | 1.3         |
+|    value_loss           | 0.00409     |
+-----------------------------------------
+
+[Diag @ 3,200,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              20/20
+  action_mag mean=0.078 p10=0.005 p90=0.057 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.76m best=6.32m  (target <5m to compact)
+  min_dog_to_com   mean=0.81m best=0.36m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.75m best=6.91m
+  reward/step (mean): progress=-0.0020  alignment=+0.0384  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+
+[Curriculum] leaving stage n_sheep=4 after 800,000 steps | training success rate (last 100 eps) = 0%
+[Curriculum] → 5 sheep at step 3,200,000
+
+--------------------------------
+| time/              |         |
+|    fps             | 1704    |
+|    iterations      | 196     |
+|    time_elapsed    | 1884    |
+|    total_timesteps | 3211264 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1707         |
+|    iterations           | 197          |
+|    time_elapsed         | 1889         |
+|    total_timesteps      | 3227648      |
+| train/                  |              |
+|    approx_kl            | 0.0068222135 |
+|    clip_fraction        | 0.0816       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.36        |
+|    explained_variance   | 0.922        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0386      |
+|    n_updates            | 1960         |
+|    policy_gradient_loss | -0.00374     |
+|    std                  | 1.31         |
+|    value_loss           | 0.0112       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1711        |
+|    iterations           | 198         |
+|    time_elapsed         | 1895        |
+|    total_timesteps      | 3244032     |
+| train/                  |             |
+|    approx_kl            | 0.006939999 |
+|    clip_fraction        | 0.0829      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.36       |
+|    explained_variance   | 0.955       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0439     |
+|    n_updates            | 1970        |
+|    policy_gradient_loss | -0.00433    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00895     |
+-----------------------------------------
+Eval num_timesteps=3250000, episode_reward=21.19 +/- 37.18
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 21.2        |
+| time/                   |             |
+|    total_timesteps      | 3250000     |
+| train/                  |             |
+|    approx_kl            | 0.007944042 |
+|    clip_fraction        | 0.0812      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.925       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0379     |
+|    n_updates            | 1980        |
+|    policy_gradient_loss | -0.00306    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00578     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1702    |
+|    iterations      | 199     |
+|    time_elapsed    | 1914    |
+|    total_timesteps | 3260416 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1706        |
+|    iterations           | 200         |
+|    time_elapsed         | 1920        |
+|    total_timesteps      | 3276800     |
+| train/                  |             |
+|    approx_kl            | 0.007009124 |
+|    clip_fraction        | 0.0786      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.36       |
+|    explained_variance   | 0.945       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0398     |
+|    n_updates            | 1990        |
+|    policy_gradient_loss | -0.00469    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00344     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1709        |
+|    iterations           | 201         |
+|    time_elapsed         | 1926        |
+|    total_timesteps      | 3293184     |
+| train/                  |             |
+|    approx_kl            | 0.007446406 |
+|    clip_fraction        | 0.0736      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.36       |
+|    explained_variance   | 0.957       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0493     |
+|    n_updates            | 2000        |
+|    policy_gradient_loss | -0.00431    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00262     |
+-----------------------------------------
+Eval num_timesteps=3300000, episode_reward=18.42 +/- 36.17
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 18.4        |
+| time/                   |             |
+|    total_timesteps      | 3300000     |
+| train/                  |             |
+|    approx_kl            | 0.007855328 |
+|    clip_fraction        | 0.0783      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.38       |
+|    explained_variance   | 0.951       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0381     |
+|    n_updates            | 2010        |
+|    policy_gradient_loss | -0.00422    |
+|    std                  | 1.32        |
+|    value_loss           | 0.00379     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1701    |
+|    iterations      | 202     |
+|    time_elapsed    | 1945    |
+|    total_timesteps | 3309568 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1704         |
+|    iterations           | 203          |
+|    time_elapsed         | 1951         |
+|    total_timesteps      | 3325952      |
+| train/                  |              |
+|    approx_kl            | 0.0073990654 |
+|    clip_fraction        | 0.0773       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.89         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0319      |
+|    n_updates            | 2020         |
+|    policy_gradient_loss | -0.00507     |
+|    std                  | 1.32         |
+|    value_loss           | 0.0165       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1707         |
+|    iterations           | 204          |
+|    time_elapsed         | 1956         |
+|    total_timesteps      | 3342336      |
+| train/                  |              |
+|    approx_kl            | 0.0076738494 |
+|    clip_fraction        | 0.0913       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.914        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0326      |
+|    n_updates            | 2030         |
+|    policy_gradient_loss | -0.00611     |
+|    std                  | 1.32         |
+|    value_loss           | 0.00854      |
+------------------------------------------
+Eval num_timesteps=3350000, episode_reward=39.75 +/- 38.09
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 39.8        |
+| time/                   |             |
+|    total_timesteps      | 3350000     |
+| train/                  |             |
+|    approx_kl            | 0.007704767 |
+|    clip_fraction        | 0.0813      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.39       |
+|    explained_variance   | 0.822       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0351     |
+|    n_updates            | 2040        |
+|    policy_gradient_loss | -0.0056     |
+|    std                  | 1.33        |
+|    value_loss           | 0.0095      |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1700    |
+|    iterations      | 205     |
+|    time_elapsed    | 1974    |
+|    total_timesteps | 3358720 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1703        |
+|    iterations           | 206         |
+|    time_elapsed         | 1980        |
+|    total_timesteps      | 3375104     |
+| train/                  |             |
+|    approx_kl            | 0.006841295 |
+|    clip_fraction        | 0.0682      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.39       |
+|    explained_variance   | 0.973       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.04       |
+|    n_updates            | 2050        |
+|    policy_gradient_loss | -0.00457    |
+|    std                  | 1.33        |
+|    value_loss           | 0.00456     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1707         |
+|    iterations           | 207          |
+|    time_elapsed         | 1986         |
+|    total_timesteps      | 3391488      |
+| train/                  |              |
+|    approx_kl            | 0.0063885115 |
+|    clip_fraction        | 0.0749       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.4         |
+|    explained_variance   | 0.962        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.041       |
+|    n_updates            | 2060         |
+|    policy_gradient_loss | -0.00455     |
+|    std                  | 1.34         |
+|    value_loss           | 0.00373      |
+------------------------------------------
+Eval num_timesteps=3400000, episode_reward=26.62 +/- 43.12
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 26.6        |
+| time/                   |             |
+|    total_timesteps      | 3400000     |
+| train/                  |             |
+|    approx_kl            | 0.006273965 |
+|    clip_fraction        | 0.0709      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.956       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0465     |
+|    n_updates            | 2070        |
+|    policy_gradient_loss | -0.00249    |
+|    std                  | 1.33        |
+|    value_loss           | 0.00679     |
+-----------------------------------------
+
+[Diag @ 3,400,000 | n_sheep=5 | success=0%]
+  NEVER_COMPACT              20/20
+  action_mag mean=0.089 p10=0.005 p90=0.074 (0=stopped, 1=full speed)
+  min_flock_radius mean=9.14m best=5.59m  (target <5m to compact)
+  min_dog_to_com   mean=0.69m best=0.10m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.77m best=5.15m
+  reward/step (mean): progress=-0.0015  alignment=+0.0368  pen_bonus=+0.0020  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1691    |
+|    iterations      | 208     |
+|    time_elapsed    | 2014    |
+|    total_timesteps | 3407872 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1695        |
+|    iterations           | 209         |
+|    time_elapsed         | 2019        |
+|    total_timesteps      | 3424256     |
+| train/                  |             |
+|    approx_kl            | 0.006433293 |
+|    clip_fraction        | 0.0727      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.932       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0268     |
+|    n_updates            | 2080        |
+|    policy_gradient_loss | -0.00365    |
+|    std                  | 1.33        |
+|    value_loss           | 0.00657     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1698        |
+|    iterations           | 210         |
+|    time_elapsed         | 2025        |
+|    total_timesteps      | 3440640     |
+| train/                  |             |
+|    approx_kl            | 0.007235542 |
+|    clip_fraction        | 0.0839      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.39       |
+|    explained_variance   | 0.935       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0344     |
+|    n_updates            | 2090        |
+|    policy_gradient_loss | -0.00417    |
+|    std                  | 1.32        |
+|    value_loss           | 0.0137      |
+-----------------------------------------
+Eval num_timesteps=3450000, episode_reward=35.54 +/- 43.01
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 35.5        |
+| time/                   |             |
+|    total_timesteps      | 3450000     |
+| train/                  |             |
+|    approx_kl            | 0.007782845 |
+|    clip_fraction        | 0.0859      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.924       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.044      |
+|    n_updates            | 2100        |
+|    policy_gradient_loss | -0.00561    |
+|    std                  | 1.34        |
+|    value_loss           | 0.0043      |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1690    |
+|    iterations      | 211     |
+|    time_elapsed    | 2044    |
+|    total_timesteps | 3457024 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1693         |
+|    iterations           | 212          |
+|    time_elapsed         | 2050         |
+|    total_timesteps      | 3473408      |
+| train/                  |              |
+|    approx_kl            | 0.0075765867 |
+|    clip_fraction        | 0.0746       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.41        |
+|    explained_variance   | 0.896        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0293      |
+|    n_updates            | 2110         |
+|    policy_gradient_loss | -0.00406     |
+|    std                  | 1.33         |
+|    value_loss           | 0.011        |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1696         |
+|    iterations           | 213          |
+|    time_elapsed         | 2056         |
+|    total_timesteps      | 3489792      |
+| train/                  |              |
+|    approx_kl            | 0.0072322125 |
+|    clip_fraction        | 0.071        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.41        |
+|    explained_variance   | 0.949        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0498      |
+|    n_updates            | 2120         |
+|    policy_gradient_loss | -0.00421     |
+|    std                  | 1.34         |
+|    value_loss           | 0.006        |
+------------------------------------------
+Eval num_timesteps=3500000, episode_reward=54.69 +/- 47.39
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 54.7         |
+| time/                   |              |
+|    total_timesteps      | 3500000      |
+| train/                  |              |
+|    approx_kl            | 0.0073479656 |
+|    clip_fraction        | 0.0778       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.4         |
+|    explained_variance   | 0.824        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0408      |
+|    n_updates            | 2130         |
+|    policy_gradient_loss | -0.00465     |
+|    std                  | 1.32         |
+|    value_loss           | 0.00657      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1688    |
+|    iterations      | 214     |
+|    time_elapsed    | 2076    |
+|    total_timesteps | 3506176 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1692        |
+|    iterations           | 215         |
+|    time_elapsed         | 2081        |
+|    total_timesteps      | 3522560     |
+| train/                  |             |
+|    approx_kl            | 0.007274649 |
+|    clip_fraction        | 0.0798      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.39       |
+|    explained_variance   | 0.951       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0356     |
+|    n_updates            | 2140        |
+|    policy_gradient_loss | -0.00383    |
+|    std                  | 1.33        |
+|    value_loss           | 0.00355     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1695         |
+|    iterations           | 216          |
+|    time_elapsed         | 2087         |
+|    total_timesteps      | 3538944      |
+| train/                  |              |
+|    approx_kl            | 0.0068056686 |
+|    clip_fraction        | 0.0726       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.955        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0428      |
+|    n_updates            | 2150         |
+|    policy_gradient_loss | -0.00356     |
+|    std                  | 1.32         |
+|    value_loss           | 0.00378      |
+------------------------------------------
+Eval num_timesteps=3550000, episode_reward=8.69 +/- 39.03
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 8.69        |
+| time/                   |             |
+|    total_timesteps      | 3550000     |
+| train/                  |             |
+|    approx_kl            | 0.008211401 |
+|    clip_fraction        | 0.0801      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.38       |
+|    explained_variance   | 0.972       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0366     |
+|    n_updates            | 2160        |
+|    policy_gradient_loss | -0.00453    |
+|    std                  | 1.32        |
+|    value_loss           | 0.00445     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1687    |
+|    iterations      | 217     |
+|    time_elapsed    | 2106    |
+|    total_timesteps | 3555328 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1690        |
+|    iterations           | 218         |
+|    time_elapsed         | 2112        |
+|    total_timesteps      | 3571712     |
+| train/                  |             |
+|    approx_kl            | 0.008278061 |
+|    clip_fraction        | 0.0871      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.38       |
+|    explained_variance   | 0.931       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0324     |
+|    n_updates            | 2170        |
+|    policy_gradient_loss | -0.00486    |
+|    std                  | 1.32        |
+|    value_loss           | 0.00377     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1693        |
+|    iterations           | 219         |
+|    time_elapsed         | 2119        |
+|    total_timesteps      | 3588096     |
+| train/                  |             |
+|    approx_kl            | 0.007908824 |
+|    clip_fraction        | 0.0777      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.39       |
+|    explained_variance   | 0.951       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0353     |
+|    n_updates            | 2180        |
+|    policy_gradient_loss | -0.00318    |
+|    std                  | 1.32        |
+|    value_loss           | 0.00768     |
+-----------------------------------------
+Eval num_timesteps=3600000, episode_reward=26.00 +/- 35.20
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 26           |
+| time/                   |              |
+|    total_timesteps      | 3600000      |
+| train/                  |              |
+|    approx_kl            | 0.0068260087 |
+|    clip_fraction        | 0.0761       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.946        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0257      |
+|    n_updates            | 2190         |
+|    policy_gradient_loss | -0.00375     |
+|    std                  | 1.32         |
+|    value_loss           | 0.00745      |
+------------------------------------------
+
+[Diag @ 3,600,000 | n_sheep=5 | success=0%]
+  NEVER_COMPACT              20/20
+  action_mag mean=0.114 p10=0.006 p90=0.281 (0=stopped, 1=full speed)
+  min_flock_radius mean=9.62m best=5.04m  (target <5m to compact)
+  min_dog_to_com   mean=0.77m best=0.40m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.31m best=6.37m
+  reward/step (mean): progress=+0.0071  alignment=+0.0385  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1677    |
+|    iterations      | 220     |
+|    time_elapsed    | 2148    |
+|    total_timesteps | 3604480 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1680         |
+|    iterations           | 221          |
+|    time_elapsed         | 2154         |
+|    total_timesteps      | 3620864      |
+| train/                  |              |
+|    approx_kl            | 0.0084966235 |
+|    clip_fraction        | 0.0849       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.936        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0498      |
+|    n_updates            | 2200         |
+|    policy_gradient_loss | -0.00478     |
+|    std                  | 1.32         |
+|    value_loss           | 0.00856      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1683        |
+|    iterations           | 222         |
+|    time_elapsed         | 2160        |
+|    total_timesteps      | 3637248     |
+| train/                  |             |
+|    approx_kl            | 0.007236682 |
+|    clip_fraction        | 0.072       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.956       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0436     |
+|    n_updates            | 2210        |
+|    policy_gradient_loss | -0.0054     |
+|    std                  | 1.31        |
+|    value_loss           | 0.00748     |
+-----------------------------------------
+Eval num_timesteps=3650000, episode_reward=48.26 +/- 45.24
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 48.3         |
+| time/                   |              |
+|    total_timesteps      | 3650000      |
+| train/                  |              |
+|    approx_kl            | 0.0076099336 |
+|    clip_fraction        | 0.0694       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.37        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.037       |
+|    n_updates            | 2220         |
+|    policy_gradient_loss | -0.00369     |
+|    std                  | 1.31         |
+|    value_loss           | 0.00888      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1676    |
+|    iterations      | 223     |
+|    time_elapsed    | 2179    |
+|    total_timesteps | 3653632 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1679        |
+|    iterations           | 224         |
+|    time_elapsed         | 2185        |
+|    total_timesteps      | 3670016     |
+| train/                  |             |
+|    approx_kl            | 0.007888832 |
+|    clip_fraction        | 0.0783      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.914       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0298     |
+|    n_updates            | 2230        |
+|    policy_gradient_loss | -0.00449    |
+|    std                  | 1.32        |
+|    value_loss           | 0.00867     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1682         |
+|    iterations           | 225          |
+|    time_elapsed         | 2190         |
+|    total_timesteps      | 3686400      |
+| train/                  |              |
+|    approx_kl            | 0.0069514583 |
+|    clip_fraction        | 0.0791       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.946        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0283      |
+|    n_updates            | 2240         |
+|    policy_gradient_loss | -0.00427     |
+|    std                  | 1.32         |
+|    value_loss           | 0.00382      |
+------------------------------------------
+Eval num_timesteps=3700000, episode_reward=19.29 +/- 50.45
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 19.3        |
+| time/                   |             |
+|    total_timesteps      | 3700000     |
+| train/                  |             |
+|    approx_kl            | 0.008142319 |
+|    clip_fraction        | 0.0865      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.92        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0467     |
+|    n_updates            | 2250        |
+|    policy_gradient_loss | -0.00506    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00547     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1674    |
+|    iterations      | 226     |
+|    time_elapsed    | 2210    |
+|    total_timesteps | 3702784 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1677         |
+|    iterations           | 227          |
+|    time_elapsed         | 2216         |
+|    total_timesteps      | 3719168      |
+| train/                  |              |
+|    approx_kl            | 0.0077144434 |
+|    clip_fraction        | 0.0783       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.36        |
+|    explained_variance   | 0.931        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0331      |
+|    n_updates            | 2260         |
+|    policy_gradient_loss | -0.00529     |
+|    std                  | 1.31         |
+|    value_loss           | 0.00486      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1667        |
+|    iterations           | 228         |
+|    time_elapsed         | 2239        |
+|    total_timesteps      | 3735552     |
+| train/                  |             |
+|    approx_kl            | 0.007820845 |
+|    clip_fraction        | 0.087       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.95        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0321     |
+|    n_updates            | 2270        |
+|    policy_gradient_loss | -0.00493    |
+|    std                  | 1.31        |
+|    value_loss           | 0.00531     |
+-----------------------------------------
+Eval num_timesteps=3750000, episode_reward=35.91 +/- 47.57
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 35.9        |
+| time/                   |             |
+|    total_timesteps      | 3750000     |
+| train/                  |             |
+|    approx_kl            | 0.008380983 |
+|    clip_fraction        | 0.0868      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.927       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0318     |
+|    n_updates            | 2280        |
+|    policy_gradient_loss | -0.0046     |
+|    std                  | 1.32        |
+|    value_loss           | 0.00684     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1639    |
+|    iterations      | 229     |
+|    time_elapsed    | 2289    |
+|    total_timesteps | 3751936 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1642        |
+|    iterations           | 230         |
+|    time_elapsed         | 2294        |
+|    total_timesteps      | 3768320     |
+| train/                  |             |
+|    approx_kl            | 0.007415652 |
+|    clip_fraction        | 0.0758      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.37       |
+|    explained_variance   | 0.953       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0354     |
+|    n_updates            | 2290        |
+|    policy_gradient_loss | -0.00557    |
+|    std                  | 1.31        |
+|    value_loss           | 0.0122      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1646         |
+|    iterations           | 231          |
+|    time_elapsed         | 2299         |
+|    total_timesteps      | 3784704      |
+| train/                  |              |
+|    approx_kl            | 0.0071868873 |
+|    clip_fraction        | 0.0736       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.954        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0457      |
+|    n_updates            | 2300         |
+|    policy_gradient_loss | -0.00442     |
+|    std                  | 1.33         |
+|    value_loss           | 0.0201       |
+------------------------------------------
+Eval num_timesteps=3800000, episode_reward=31.58 +/- 50.62
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 31.6         |
+| time/                   |              |
+|    total_timesteps      | 3800000      |
+| train/                  |              |
+|    approx_kl            | 0.0074889637 |
+|    clip_fraction        | 0.0805       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.4         |
+|    explained_variance   | 0.95         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0355      |
+|    n_updates            | 2310         |
+|    policy_gradient_loss | -0.00474     |
+|    std                  | 1.33         |
+|    value_loss           | 0.00892      |
+------------------------------------------
+
+[Diag @ 3,800,000 | n_sheep=5 | success=0%]
+  NEVER_COMPACT              19/20
+  COMPACT_CANT_DRIVE         1/20
+  action_mag mean=0.128 p10=0.005 p90=0.475 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.35m best=4.80m  (target <5m to compact)
+  min_dog_to_com   mean=0.71m best=0.23m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.72m best=8.54m
+  reward/step (mean): progress=+0.0063  alignment=+0.0388  pen_bonus=+0.0010  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1633    |
+|    iterations      | 232     |
+|    time_elapsed    | 2326    |
+|    total_timesteps | 3801088 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1636         |
+|    iterations           | 233          |
+|    time_elapsed         | 2332         |
+|    total_timesteps      | 3817472      |
+| train/                  |              |
+|    approx_kl            | 0.0070604184 |
+|    clip_fraction        | 0.0765       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.4         |
+|    explained_variance   | 0.953        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0398      |
+|    n_updates            | 2320         |
+|    policy_gradient_loss | -0.00453     |
+|    std                  | 1.33         |
+|    value_loss           | 0.00675      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1640        |
+|    iterations           | 234         |
+|    time_elapsed         | 2336        |
+|    total_timesteps      | 3833856     |
+| train/                  |             |
+|    approx_kl            | 0.007709453 |
+|    clip_fraction        | 0.0816      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.943       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0359     |
+|    n_updates            | 2330        |
+|    policy_gradient_loss | -0.00423    |
+|    std                  | 1.34        |
+|    value_loss           | 0.00754     |
+-----------------------------------------
+Eval num_timesteps=3850000, episode_reward=42.98 +/- 33.36
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 43          |
+| time/                   |             |
+|    total_timesteps      | 3850000     |
+| train/                  |             |
+|    approx_kl            | 0.007679659 |
+|    clip_fraction        | 0.0858      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.961       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.032      |
+|    n_updates            | 2340        |
+|    policy_gradient_loss | -0.00716    |
+|    std                  | 1.33        |
+|    value_loss           | 0.00907     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1635    |
+|    iterations      | 235     |
+|    time_elapsed    | 2354    |
+|    total_timesteps | 3850240 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1638         |
+|    iterations           | 236          |
+|    time_elapsed         | 2360         |
+|    total_timesteps      | 3866624      |
+| train/                  |              |
+|    approx_kl            | 0.0077598644 |
+|    clip_fraction        | 0.0848       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.96         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0468      |
+|    n_updates            | 2350         |
+|    policy_gradient_loss | -0.005       |
+|    std                  | 1.33         |
+|    value_loss           | 0.0101       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1641         |
+|    iterations           | 237          |
+|    time_elapsed         | 2366         |
+|    total_timesteps      | 3883008      |
+| train/                  |              |
+|    approx_kl            | 0.0068941545 |
+|    clip_fraction        | 0.0673       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.39        |
+|    explained_variance   | 0.96         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0398      |
+|    n_updates            | 2360         |
+|    policy_gradient_loss | -0.0047      |
+|    std                  | 1.33         |
+|    value_loss           | 0.0113       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1643         |
+|    iterations           | 238          |
+|    time_elapsed         | 2372         |
+|    total_timesteps      | 3899392      |
+| train/                  |              |
+|    approx_kl            | 0.0073663425 |
+|    clip_fraction        | 0.0785       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.41        |
+|    explained_variance   | 0.963        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0319      |
+|    n_updates            | 2370         |
+|    policy_gradient_loss | -0.00458     |
+|    std                  | 1.35         |
+|    value_loss           | 0.0036       |
+------------------------------------------
+Eval num_timesteps=3900000, episode_reward=33.74 +/- 40.96
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 33.7        |
+| time/                   |             |
+|    total_timesteps      | 3900000     |
+| train/                  |             |
+|    approx_kl            | 0.007122398 |
+|    clip_fraction        | 0.0759      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.41       |
+|    explained_variance   | 0.972       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0383     |
+|    n_updates            | 2380        |
+|    policy_gradient_loss | -0.00446    |
+|    std                  | 1.35        |
+|    value_loss           | 0.00445     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1637    |
+|    iterations      | 239     |
+|    time_elapsed    | 2391    |
+|    total_timesteps | 3915776 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1640        |
+|    iterations           | 240         |
+|    time_elapsed         | 2396        |
+|    total_timesteps      | 3932160     |
+| train/                  |             |
+|    approx_kl            | 0.008265208 |
+|    clip_fraction        | 0.0845      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.41       |
+|    explained_variance   | 0.926       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0361     |
+|    n_updates            | 2390        |
+|    policy_gradient_loss | -0.00536    |
+|    std                  | 1.34        |
+|    value_loss           | 0.00846     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1643        |
+|    iterations           | 241         |
+|    time_elapsed         | 2402        |
+|    total_timesteps      | 3948544     |
+| train/                  |             |
+|    approx_kl            | 0.008583728 |
+|    clip_fraction        | 0.0893      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.42       |
+|    explained_variance   | 0.915       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0297     |
+|    n_updates            | 2400        |
+|    policy_gradient_loss | -0.00592    |
+|    std                  | 1.35        |
+|    value_loss           | 0.0068      |
+-----------------------------------------
+Eval num_timesteps=3950000, episode_reward=46.06 +/- 34.67
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | 46.1         |
+| time/                   |              |
+|    total_timesteps      | 3950000      |
+| train/                  |              |
+|    approx_kl            | 0.0060660206 |
+|    clip_fraction        | 0.0654       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.42        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0359      |
+|    n_updates            | 2410         |
+|    policy_gradient_loss | -0.0038      |
+|    std                  | 1.35         |
+|    value_loss           | 0.00296      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1637    |
+|    iterations      | 242     |
+|    time_elapsed    | 2421    |
+|    total_timesteps | 3964928 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1639        |
+|    iterations           | 243         |
+|    time_elapsed         | 2427        |
+|    total_timesteps      | 3981312     |
+| train/                  |             |
+|    approx_kl            | 0.007591601 |
+|    clip_fraction        | 0.0808      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.964       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0386     |
+|    n_updates            | 2420        |
+|    policy_gradient_loss | -0.00575    |
+|    std                  | 1.34        |
+|    value_loss           | 0.00714     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1642        |
+|    iterations           | 244         |
+|    time_elapsed         | 2433        |
+|    total_timesteps      | 3997696     |
+| train/                  |             |
+|    approx_kl            | 0.006255053 |
+|    clip_fraction        | 0.0663      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.41       |
+|    explained_variance   | 0.939       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0304     |
+|    n_updates            | 2430        |
+|    policy_gradient_loss | -0.00497    |
+|    std                  | 1.35        |
+|    value_loss           | 0.00585     |
+-----------------------------------------
+Eval num_timesteps=4000000, episode_reward=19.52 +/- 38.43
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | 19.5        |
+| time/                   |             |
+|    total_timesteps      | 4000000     |
+| train/                  |             |
+|    approx_kl            | 0.008279499 |
+|    clip_fraction        | 0.0814      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.958       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0379     |
+|    n_updates            | 2440        |
+|    policy_gradient_loss | -0.00568    |
+|    std                  | 1.34        |
+|    value_loss           | 0.00469     |
+-----------------------------------------
+
+[Diag @ 4,000,000 | n_sheep=5 | success=0%]
+  NEVER_COMPACT              20/20
+  action_mag mean=0.158 p10=0.006 p90=0.744 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.94m best=6.34m  (target <5m to compact)
+  min_dog_to_com   mean=0.82m best=0.49m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.86m best=7.80m
+  reward/step (mean): progress=+0.0029  alignment=+0.0397  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1630    |
+|    iterations      | 245     |
+|    time_elapsed    | 2461    |
+|    total_timesteps | 4014080 |
+--------------------------------
+
+Training complete. Artefacts saved to runs/ppo_debug/
diff --git a/training/runs/ppo_debug/best_model/best_model.zip b/training/runs/ppo_debug/best_model/best_model.zip
new file mode 100644
index 0000000..2618c2c
Binary files /dev/null and b/training/runs/ppo_debug/best_model/best_model.zip differ
diff --git a/training/runs/ppo_debug/evaluations.npz b/training/runs/ppo_debug/evaluations.npz
new file mode 100644
index 0000000..84fd19d
Binary files /dev/null and b/training/runs/ppo_debug/evaluations.npz differ
diff --git a/training/runs/ppo_debug/final_model.zip b/training/runs/ppo_debug/final_model.zip
new file mode 100644
index 0000000..e3be97e
Binary files /dev/null and b/training/runs/ppo_debug/final_model.zip differ
diff --git a/training/runs/ppo_debug/vecnorm.pkl b/training/runs/ppo_debug/vecnorm.pkl
new file mode 100644
index 0000000..c17b706
Binary files /dev/null and b/training/runs/ppo_debug/vecnorm.pkl differ
diff --git a/training/runs/ppo_v3/best_model/best_model.zip b/training/runs/ppo_v3/best_model/best_model.zip
new file mode 100644
index 0000000..82d0259
Binary files /dev/null and b/training/runs/ppo_v3/best_model/best_model.zip differ
diff --git a/training/runs/ppo_v3/evaluations.npz b/training/runs/ppo_v3/evaluations.npz
new file mode 100644
index 0000000..1d5ee82
Binary files /dev/null and b/training/runs/ppo_v3/evaluations.npz differ
diff --git a/training/runs/ppo_v3/final_model.zip b/training/runs/ppo_v3/final_model.zip
new file mode 100644
index 0000000..ce84843
Binary files /dev/null and b/training/runs/ppo_v3/final_model.zip differ
diff --git a/training/runs/ppo_v3/vecnorm.pkl b/training/runs/ppo_v3/vecnorm.pkl
new file mode 100644
index 0000000..4729c11
Binary files /dev/null and b/training/runs/ppo_v3/vecnorm.pkl differ