diff --git a/training/runs/ppo_fix_check.log b/training/runs/ppo_fix_check.log
new file mode 100644
index 0000000..39ace5a
--- /dev/null
+++ b/training/runs/ppo_fix_check.log
@@ -0,0 +1,3388 @@
+Using cpu device
+Logging to runs/ppo_fix_check/ppo_1
+------------------------------
+| time/              |       |
+|    fps             | 5021  |
+|    iterations      | 1     |
+|    time_elapsed    | 3     |
+|    total_timesteps | 16384 |
+------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 4241         |
+|    iterations           | 2            |
+|    time_elapsed         | 7            |
+|    total_timesteps      | 32768        |
+| train/                  |              |
+|    approx_kl            | 0.0047510993 |
+|    clip_fraction        | 0.0344       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.85        |
+|    explained_variance   | 0.786        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.00995     |
+|    n_updates            | 10           |
+|    policy_gradient_loss | -0.00156     |
+|    std                  | 1.01         |
+|    value_loss           | 0.0657       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 4026         |
+|    iterations           | 3            |
+|    time_elapsed         | 12           |
+|    total_timesteps      | 49152        |
+| train/                  |              |
+|    approx_kl            | 0.0032065492 |
+|    clip_fraction        | 0.0328       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.88        |
+|    explained_variance   | 0.868        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0327      |
+|    n_updates            | 20           |
+|    policy_gradient_loss | -0.00152     |
+|    std                  | 1.02         |
+|    value_loss           | 0.0172       |
+------------------------------------------
+/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.
+  warnings.warn(
+Eval num_timesteps=50000, episode_reward=-25.33 +/- 56.30
+Episode length: 1859.00 +/- 393.69
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.86e+03     |
+|    mean_reward          | -25.3        |
+| time/                   |              |
+|    total_timesteps      | 50000        |
+| train/                  |              |
+|    approx_kl            | 0.0038272792 |
+|    clip_fraction        | 0.0312       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.89        |
+|    explained_variance   | 0.891        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0224      |
+|    n_updates            | 30           |
+|    policy_gradient_loss | -0.0019      |
+|    std                  | 1.02         |
+|    value_loss           | 0.0227       |
+------------------------------------------
+New best mean reward!
+------------------------------
+| time/              |       |
+|    fps             | 2387  |
+|    iterations      | 4     |
+|    time_elapsed    | 27    |
+|    total_timesteps | 65536 |
+------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2563         |
+|    iterations           | 5            |
+|    time_elapsed         | 31           |
+|    total_timesteps      | 81920        |
+| train/                  |              |
+|    approx_kl            | 0.0040233894 |
+|    clip_fraction        | 0.0323       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.87        |
+|    explained_variance   | 0.878        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0251      |
+|    n_updates            | 40           |
+|    policy_gradient_loss | -0.00247     |
+|    std                  | 1.01         |
+|    value_loss           | 0.0169       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2719        |
+|    iterations           | 6           |
+|    time_elapsed         | 36          |
+|    total_timesteps      | 98304       |
+| train/                  |             |
+|    approx_kl            | 0.003573698 |
+|    clip_fraction        | 0.0316      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.865       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0219     |
+|    n_updates            | 50          |
+|    policy_gradient_loss | -0.0019     |
+|    std                  | 1.01        |
+|    value_loss           | 0.022       |
+-----------------------------------------
+/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.
+  warnings.warn(
+Eval num_timesteps=100000, episode_reward=-29.60 +/- 36.59
+Episode length: 1939.35 +/- 264.37
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.94e+03     |
+|    mean_reward          | -29.6        |
+| time/                   |              |
+|    total_timesteps      | 100000       |
+| train/                  |              |
+|    approx_kl            | 0.0046861977 |
+|    clip_fraction        | 0.039        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.815        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0257      |
+|    n_updates            | 60           |
+|    policy_gradient_loss | -0.00203     |
+|    std                  | 1.01         |
+|    value_loss           | 0.0201       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2191   |
+|    iterations      | 7      |
+|    time_elapsed    | 52     |
+|    total_timesteps | 114688 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2314        |
+|    iterations           | 8           |
+|    time_elapsed         | 56          |
+|    total_timesteps      | 131072      |
+| train/                  |             |
+|    approx_kl            | 0.005258695 |
+|    clip_fraction        | 0.0503      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.807       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0211     |
+|    n_updates            | 70          |
+|    policy_gradient_loss | -0.00398    |
+|    std                  | 1.01        |
+|    value_loss           | 0.0164      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2359         |
+|    iterations           | 9            |
+|    time_elapsed         | 62           |
+|    total_timesteps      | 147456       |
+| train/                  |              |
+|    approx_kl            | 0.0043328116 |
+|    clip_fraction        | 0.0332       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.811        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0259      |
+|    n_updates            | 80           |
+|    policy_gradient_loss | -0.00173     |
+|    std                  | 1.01         |
+|    value_loss           | 0.0121       |
+------------------------------------------
+Eval num_timesteps=150000, episode_reward=-33.97 +/- 37.15
+Episode length: 1954.85 +/- 196.80
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.95e+03    |
+|    mean_reward          | -34         |
+| time/                   |             |
+|    total_timesteps      | 150000      |
+| train/                  |             |
+|    approx_kl            | 0.005169191 |
+|    clip_fraction        | 0.0506      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.649       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0287     |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.00384    |
+|    std                  | 1           |
+|    value_loss           | 0.0162      |
+-----------------------------------------
+
+[Diag @ 150,000 | n_sheep=1 | success=15%]
+  COMPACT_CANT_DRIVE         16/20
+  SUCCESS                    3/20
+  DROVE_NO_SHEEP             1/20
+  action_mag mean=0.239 p10=0.071 p90=0.433 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=4.80m best=1.70m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=10.22m best=1.50m
+  reward/step (mean): progress=+0.0013  alignment=+0.0000  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0078
+-------------------------------
+| time/              |        |
+|    fps             | 1935   |
+|    iterations      | 10     |
+|    time_elapsed    | 84     |
+|    total_timesteps | 163840 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2014         |
+|    iterations           | 11           |
+|    time_elapsed         | 89           |
+|    total_timesteps      | 180224       |
+| train/                  |              |
+|    approx_kl            | 0.0039950563 |
+|    clip_fraction        | 0.0276       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.623        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0128      |
+|    n_updates            | 100          |
+|    policy_gradient_loss | -0.00208     |
+|    std                  | 0.995        |
+|    value_loss           | 0.0959       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2093         |
+|    iterations           | 12           |
+|    time_elapsed         | 93           |
+|    total_timesteps      | 196608       |
+| train/                  |              |
+|    approx_kl            | 0.0036244316 |
+|    clip_fraction        | 0.0299       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.916        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0251      |
+|    n_updates            | 110          |
+|    policy_gradient_loss | -0.00229     |
+|    std                  | 0.991        |
+|    value_loss           | 0.0118       |
+------------------------------------------
+Eval num_timesteps=200000, episode_reward=-36.37 +/- 39.41
+Episode length: 1950.95 +/- 213.80
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.95e+03    |
+|    mean_reward          | -36.4       |
+| time/                   |             |
+|    total_timesteps      | 200000      |
+| train/                  |             |
+|    approx_kl            | 0.003325508 |
+|    clip_fraction        | 0.0223      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.83       |
+|    explained_variance   | 0.858       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0279     |
+|    n_updates            | 120         |
+|    policy_gradient_loss | -0.0007     |
+|    std                  | 0.999       |
+|    value_loss           | 0.0493      |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1964   |
+|    iterations      | 13     |
+|    time_elapsed    | 108    |
+|    total_timesteps | 212992 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2034        |
+|    iterations           | 14          |
+|    time_elapsed         | 112         |
+|    total_timesteps      | 229376      |
+| train/                  |             |
+|    approx_kl            | 0.004660043 |
+|    clip_fraction        | 0.0403      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.719       |
+|    learning_rate        | 0.0003      |
+|    loss                 | 0.128       |
+|    n_updates            | 130         |
+|    policy_gradient_loss | -0.00265    |
+|    std                  | 1.01        |
+|    value_loss           | 0.073       |
+-----------------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 2103       |
+|    iterations           | 15         |
+|    time_elapsed         | 116        |
+|    total_timesteps      | 245760     |
+| train/                  |            |
+|    approx_kl            | 0.00501227 |
+|    clip_fraction        | 0.0499     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.88      |
+|    explained_variance   | 0.847      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0237    |
+|    n_updates            | 140        |
+|    policy_gradient_loss | -0.00264   |
+|    std                  | 1.02       |
+|    value_loss           | 0.0415     |
+----------------------------------------
+Eval num_timesteps=250000, episode_reward=-44.92 +/- 15.63
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -44.9        |
+| time/                   |              |
+|    total_timesteps      | 250000       |
+| train/                  |              |
+|    approx_kl            | 0.0055294414 |
+|    clip_fraction        | 0.06         |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.89        |
+|    explained_variance   | 0.951        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0274      |
+|    n_updates            | 150          |
+|    policy_gradient_loss | -0.00491     |
+|    std                  | 1.03         |
+|    value_loss           | 0.014        |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1999   |
+|    iterations      | 16     |
+|    time_elapsed    | 131    |
+|    total_timesteps | 262144 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2051         |
+|    iterations           | 17           |
+|    time_elapsed         | 135          |
+|    total_timesteps      | 278528       |
+| train/                  |              |
+|    approx_kl            | 0.0051201656 |
+|    clip_fraction        | 0.0301       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.88        |
+|    explained_variance   | 0.941        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.148        |
+|    n_updates            | 160          |
+|    policy_gradient_loss | -0.00199     |
+|    std                  | 1.02         |
+|    value_loss           | 0.099        |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2096        |
+|    iterations           | 18          |
+|    time_elapsed         | 140         |
+|    total_timesteps      | 294912      |
+| train/                  |             |
+|    approx_kl            | 0.004261789 |
+|    clip_fraction        | 0.0328      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.88       |
+|    explained_variance   | 0.942       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0314     |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.00243    |
+|    std                  | 1.02        |
+|    value_loss           | 0.0117      |
+-----------------------------------------
+Eval num_timesteps=300000, episode_reward=-44.79 +/- 17.68
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -44.8       |
+| time/                   |             |
+|    total_timesteps      | 300000      |
+| train/                  |             |
+|    approx_kl            | 0.004783842 |
+|    clip_fraction        | 0.0296      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.87       |
+|    explained_variance   | 0.892       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0219     |
+|    n_updates            | 180         |
+|    policy_gradient_loss | -0.00159    |
+|    std                  | 1.01        |
+|    value_loss           | 0.0497      |
+-----------------------------------------
+
+[Diag @ 300,000 | n_sheep=1 | success=0%]
+  COMPACT_CANT_DRIVE         17/20
+  DROVE_NO_SHEEP             3/20
+  action_mag mean=0.241 p10=0.109 p90=0.389 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=4.77m best=2.12m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=9.31m best=1.50m
+  reward/step (mean): progress=+0.0016  alignment=+0.0000  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+-------------------------------
+| time/              |        |
+|    fps             | 1905   |
+|    iterations      | 19     |
+|    time_elapsed    | 163    |
+|    total_timesteps | 311296 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1949         |
+|    iterations           | 20           |
+|    time_elapsed         | 168          |
+|    total_timesteps      | 327680       |
+| train/                  |              |
+|    approx_kl            | 0.0033368056 |
+|    clip_fraction        | 0.0258       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.87        |
+|    explained_variance   | 0.794        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0211      |
+|    n_updates            | 190          |
+|    policy_gradient_loss | -0.00105     |
+|    std                  | 1.02         |
+|    value_loss           | 0.0769       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1992         |
+|    iterations           | 21           |
+|    time_elapsed         | 172          |
+|    total_timesteps      | 344064       |
+| train/                  |              |
+|    approx_kl            | 0.0046488494 |
+|    clip_fraction        | 0.0352       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.87        |
+|    explained_variance   | 0.927        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0274      |
+|    n_updates            | 200          |
+|    policy_gradient_loss | -0.00331     |
+|    std                  | 1.02         |
+|    value_loss           | 0.0165       |
+------------------------------------------
+Eval num_timesteps=350000, episode_reward=-24.90 +/- 50.25
+Episode length: 1976.75 +/- 82.03
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.98e+03     |
+|    mean_reward          | -24.9        |
+| time/                   |              |
+|    total_timesteps      | 350000       |
+| train/                  |              |
+|    approx_kl            | 0.0041725934 |
+|    clip_fraction        | 0.0299       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.88        |
+|    explained_variance   | 0.944        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.026       |
+|    n_updates            | 210          |
+|    policy_gradient_loss | -0.0026      |
+|    std                  | 1.02         |
+|    value_loss           | 0.00665      |
+------------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 1921   |
+|    iterations      | 22     |
+|    time_elapsed    | 187    |
+|    total_timesteps | 360448 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1963        |
+|    iterations           | 23          |
+|    time_elapsed         | 191         |
+|    total_timesteps      | 376832      |
+| train/                  |             |
+|    approx_kl            | 0.005180447 |
+|    clip_fraction        | 0.0532      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.87       |
+|    explained_variance   | 0.956       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0255     |
+|    n_updates            | 220         |
+|    policy_gradient_loss | -0.00352    |
+|    std                  | 1.02        |
+|    value_loss           | 0.0142      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1990        |
+|    iterations           | 24          |
+|    time_elapsed         | 197         |
+|    total_timesteps      | 393216      |
+| train/                  |             |
+|    approx_kl            | 0.004661506 |
+|    clip_fraction        | 0.0443      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.87       |
+|    explained_variance   | 0.967       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0331     |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.00441    |
+|    std                  | 1.02        |
+|    value_loss           | 0.0112      |
+-----------------------------------------
+Eval num_timesteps=400000, episode_reward=-26.04 +/- 47.69
+Episode length: 1890.85 +/- 367.20
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.89e+03    |
+|    mean_reward          | -26         |
+| time/                   |             |
+|    total_timesteps      | 400000      |
+| train/                  |             |
+|    approx_kl            | 0.005491742 |
+|    clip_fraction        | 0.0538      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.89       |
+|    explained_variance   | 0.941       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.042      |
+|    n_updates            | 240         |
+|    policy_gradient_loss | -0.00297    |
+|    std                  | 1.03        |
+|    value_loss           | 0.00877     |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1927   |
+|    iterations      | 25     |
+|    time_elapsed    | 212    |
+|    total_timesteps | 409600 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1966         |
+|    iterations           | 26           |
+|    time_elapsed         | 216          |
+|    total_timesteps      | 425984       |
+| train/                  |              |
+|    approx_kl            | 0.0045445506 |
+|    clip_fraction        | 0.0385       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.91        |
+|    explained_variance   | 0.941        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0343      |
+|    n_updates            | 250          |
+|    policy_gradient_loss | -0.00307     |
+|    std                  | 1.04         |
+|    value_loss           | 0.00818      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2004         |
+|    iterations           | 27           |
+|    time_elapsed         | 220          |
+|    total_timesteps      | 442368       |
+| train/                  |              |
+|    approx_kl            | 0.0045271795 |
+|    clip_fraction        | 0.0373       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.94        |
+|    explained_variance   | 0.97         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0361      |
+|    n_updates            | 260          |
+|    policy_gradient_loss | -0.00236     |
+|    std                  | 1.05         |
+|    value_loss           | 0.0091       |
+------------------------------------------
+Eval num_timesteps=450000, episode_reward=-24.58 +/- 48.73
+Episode length: 1907.85 +/- 276.46
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.91e+03     |
+|    mean_reward          | -24.6        |
+| time/                   |              |
+|    total_timesteps      | 450000       |
+| train/                  |              |
+|    approx_kl            | 0.0052676853 |
+|    clip_fraction        | 0.0498       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.948        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0261      |
+|    n_updates            | 270          |
+|    policy_gradient_loss | -0.00236     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0286       |
+------------------------------------------
+New best mean reward!
+
+[Diag @ 450,000 | n_sheep=1 | success=5%]
+  COMPACT_CANT_DRIVE         18/20
+  DROVE_NO_SHEEP             1/20
+  SUCCESS                    1/20
+  action_mag mean=0.272 p10=0.139 p90=0.407 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=4.81m best=1.54m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.36m best=1.96m
+  reward/step (mean): progress=+0.0012  alignment=+0.0000  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0025
+-------------------------------
+| time/              |        |
+|    fps             | 1893   |
+|    iterations      | 28     |
+|    time_elapsed    | 242    |
+|    total_timesteps | 458752 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1928        |
+|    iterations           | 29          |
+|    time_elapsed         | 246         |
+|    total_timesteps      | 475136      |
+| train/                  |             |
+|    approx_kl            | 0.004465497 |
+|    clip_fraction        | 0.0376      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.97       |
+|    explained_variance   | 0.948       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0307     |
+|    n_updates            | 280         |
+|    policy_gradient_loss | -0.00259    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0213      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1961         |
+|    iterations           | 30           |
+|    time_elapsed         | 250          |
+|    total_timesteps      | 491520       |
+| train/                  |              |
+|    approx_kl            | 0.0054338034 |
+|    clip_fraction        | 0.0512       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.97        |
+|    explained_variance   | 0.967        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.021       |
+|    n_updates            | 290          |
+|    policy_gradient_loss | -0.00296     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0138       |
+------------------------------------------
+Eval num_timesteps=500000, episode_reward=-44.13 +/- 20.75
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -44.1       |
+| time/                   |             |
+|    total_timesteps      | 500000      |
+| train/                  |             |
+|    approx_kl            | 0.006292434 |
+|    clip_fraction        | 0.0572      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.97       |
+|    explained_variance   | 0.937       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0398     |
+|    n_updates            | 300         |
+|    policy_gradient_loss | -0.00516    |
+|    std                  | 1.07        |
+|    value_loss           | 0.00832     |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1913   |
+|    iterations      | 31     |
+|    time_elapsed    | 265    |
+|    total_timesteps | 507904 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1940         |
+|    iterations           | 32           |
+|    time_elapsed         | 270          |
+|    total_timesteps      | 524288       |
+| train/                  |              |
+|    approx_kl            | 0.0063960385 |
+|    clip_fraction        | 0.0702       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0341      |
+|    n_updates            | 310          |
+|    policy_gradient_loss | -0.00436     |
+|    std                  | 1.06         |
+|    value_loss           | 0.0189       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1968         |
+|    iterations           | 33           |
+|    time_elapsed         | 274          |
+|    total_timesteps      | 540672       |
+| train/                  |              |
+|    approx_kl            | 0.0070166546 |
+|    clip_fraction        | 0.0888       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.955        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0376      |
+|    n_updates            | 320          |
+|    policy_gradient_loss | -0.00631     |
+|    std                  | 1.06         |
+|    value_loss           | 0.00861      |
+------------------------------------------
+Eval num_timesteps=550000, episode_reward=-38.60 +/- 14.53
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -38.6        |
+| time/                   |              |
+|    total_timesteps      | 550000       |
+| train/                  |              |
+|    approx_kl            | 0.0068266992 |
+|    clip_fraction        | 0.075        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.959        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0252      |
+|    n_updates            | 330          |
+|    policy_gradient_loss | -0.00593     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0131       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1922   |
+|    iterations      | 34     |
+|    time_elapsed    | 289    |
+|    total_timesteps | 557056 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1950        |
+|    iterations           | 35          |
+|    time_elapsed         | 294         |
+|    total_timesteps      | 573440      |
+| train/                  |             |
+|    approx_kl            | 0.006152669 |
+|    clip_fraction        | 0.0626      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.97       |
+|    explained_variance   | 0.954       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0376     |
+|    n_updates            | 340         |
+|    policy_gradient_loss | -0.00514    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0187      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1977        |
+|    iterations           | 36          |
+|    time_elapsed         | 298         |
+|    total_timesteps      | 589824      |
+| train/                  |             |
+|    approx_kl            | 0.006685758 |
+|    clip_fraction        | 0.0729      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.97       |
+|    explained_variance   | 0.958       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0387     |
+|    n_updates            | 350         |
+|    policy_gradient_loss | -0.00632    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0118      |
+-----------------------------------------
+Eval num_timesteps=600000, episode_reward=-31.39 +/- 8.94
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -31.4       |
+| time/                   |             |
+|    total_timesteps      | 600000      |
+| train/                  |             |
+|    approx_kl            | 0.008094068 |
+|    clip_fraction        | 0.0985      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.97       |
+|    explained_variance   | 0.937       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0439     |
+|    n_updates            | 360         |
+|    policy_gradient_loss | -0.00782    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0116      |
+-----------------------------------------
+
+[Diag @ 600,000 | n_sheep=1 | success=5%]
+  COMPACT_CANT_DRIVE         16/20
+  DROVE_NO_SHEEP             3/20
+  SUCCESS                    1/20
+  action_mag mean=0.150 p10=0.000 p90=0.392 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=3.64m best=0.68m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=10.60m best=1.50m
+  reward/step (mean): progress=+0.0025  alignment=+0.0000  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0026
+
+[Curriculum] leaving stage n_sheep=1 after 600,000 steps | training success rate (last 100 eps) = 9%
+[Curriculum] → 2 sheep at step 600,000
+
+-------------------------------
+| time/              |        |
+|    fps             | 1894   |
+|    iterations      | 37     |
+|    time_elapsed    | 319    |
+|    total_timesteps | 606208 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1917         |
+|    iterations           | 38           |
+|    time_elapsed         | 324          |
+|    total_timesteps      | 622592       |
+| train/                  |              |
+|    approx_kl            | 0.0067913756 |
+|    clip_fraction        | 0.0689       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.97        |
+|    explained_variance   | 0.861        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.0772       |
+|    n_updates            | 370          |
+|    policy_gradient_loss | -0.00184     |
+|    std                  | 1.07         |
+|    value_loss           | 0.101        |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1938         |
+|    iterations           | 39           |
+|    time_elapsed         | 329          |
+|    total_timesteps      | 638976       |
+| train/                  |              |
+|    approx_kl            | 0.0061344057 |
+|    clip_fraction        | 0.0666       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.98        |
+|    explained_variance   | 0.928        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0147      |
+|    n_updates            | 380          |
+|    policy_gradient_loss | -0.00148     |
+|    std                  | 1.08         |
+|    value_loss           | 0.0386       |
+------------------------------------------
+Eval num_timesteps=650000, episode_reward=-42.39 +/- 31.99
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -42.4        |
+| time/                   |              |
+|    total_timesteps      | 650000       |
+| train/                  |              |
+|    approx_kl            | 0.0061708866 |
+|    clip_fraction        | 0.06         |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.98        |
+|    explained_variance   | 0.918        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0203      |
+|    n_updates            | 390          |
+|    policy_gradient_loss | -0.00313     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0242       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1896   |
+|    iterations      | 40     |
+|    time_elapsed    | 345    |
+|    total_timesteps | 655360 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1918        |
+|    iterations           | 41          |
+|    time_elapsed         | 350         |
+|    total_timesteps      | 671744      |
+| train/                  |             |
+|    approx_kl            | 0.007122565 |
+|    clip_fraction        | 0.0765      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.98       |
+|    explained_variance   | 0.855       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.00749    |
+|    n_updates            | 400         |
+|    policy_gradient_loss | -0.00529    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0596      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1941         |
+|    iterations           | 42           |
+|    time_elapsed         | 354          |
+|    total_timesteps      | 688128       |
+| train/                  |              |
+|    approx_kl            | 0.0078532845 |
+|    clip_fraction        | 0.0975       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.98        |
+|    explained_variance   | 0.89         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0188      |
+|    n_updates            | 410          |
+|    policy_gradient_loss | -0.00699     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0207       |
+------------------------------------------
+Eval num_timesteps=700000, episode_reward=-39.79 +/- 29.60
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -39.8        |
+| time/                   |              |
+|    total_timesteps      | 700000       |
+| train/                  |              |
+|    approx_kl            | 0.0073551387 |
+|    clip_fraction        | 0.084        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.97        |
+|    explained_variance   | 0.824        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.0126       |
+|    n_updates            | 420          |
+|    policy_gradient_loss | -0.0064      |
+|    std                  | 1.06         |
+|    value_loss           | 0.0438       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1904   |
+|    iterations      | 43     |
+|    time_elapsed    | 370    |
+|    total_timesteps | 704512 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1922        |
+|    iterations           | 44          |
+|    time_elapsed         | 375         |
+|    total_timesteps      | 720896      |
+| train/                  |             |
+|    approx_kl            | 0.006614036 |
+|    clip_fraction        | 0.0611      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.95       |
+|    explained_variance   | 0.881       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0207     |
+|    n_updates            | 430         |
+|    policy_gradient_loss | -0.00371    |
+|    std                  | 1.06        |
+|    value_loss           | 0.0244      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1940         |
+|    iterations           | 45           |
+|    time_elapsed         | 380          |
+|    total_timesteps      | 737280       |
+| train/                  |              |
+|    approx_kl            | 0.0060790265 |
+|    clip_fraction        | 0.0591       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.95        |
+|    explained_variance   | 0.885        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0284      |
+|    n_updates            | 440          |
+|    policy_gradient_loss | -0.00447     |
+|    std                  | 1.06         |
+|    value_loss           | 0.0206       |
+------------------------------------------
+Eval num_timesteps=750000, episode_reward=-40.21 +/- 27.55
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -40.2        |
+| time/                   |              |
+|    total_timesteps      | 750000       |
+| train/                  |              |
+|    approx_kl            | 0.0066163363 |
+|    clip_fraction        | 0.0691       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.924        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.032       |
+|    n_updates            | 450          |
+|    policy_gradient_loss | -0.0043      |
+|    std                  | 1.06         |
+|    value_loss           | 0.0127       |
+------------------------------------------
+
+[Diag @ 750,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         14/20
+  NEVER_COMPACT              5/20
+  DROVE_NO_SHEEP             1/20
+  action_mag mean=0.313 p10=0.081 p90=0.638 (0=stopped, 1=full speed)
+  min_flock_radius mean=2.72m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=3.96m best=0.02m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.68m best=2.17m
+  reward/step (mean): progress=-0.0005  alignment=+0.0000  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0000
+-------------------------------
+| time/              |        |
+|    fps             | 1866   |
+|    iterations      | 46     |
+|    time_elapsed    | 403    |
+|    total_timesteps | 753664 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1887        |
+|    iterations           | 47          |
+|    time_elapsed         | 407         |
+|    total_timesteps      | 770048      |
+| train/                  |             |
+|    approx_kl            | 0.005094421 |
+|    clip_fraction        | 0.0496      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.96       |
+|    explained_variance   | 0.917       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0237     |
+|    n_updates            | 460         |
+|    policy_gradient_loss | -0.00332    |
+|    std                  | 1.06        |
+|    value_loss           | 0.0275      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1906        |
+|    iterations           | 48          |
+|    time_elapsed         | 412         |
+|    total_timesteps      | 786432      |
+| train/                  |             |
+|    approx_kl            | 0.006302662 |
+|    clip_fraction        | 0.0571      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.94       |
+|    explained_variance   | 0.944       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0353     |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.00424    |
+|    std                  | 1.05        |
+|    value_loss           | 0.0201      |
+-----------------------------------------
+Eval num_timesteps=800000, episode_reward=-31.43 +/- 45.97
+Episode length: 1953.35 +/- 203.34
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.95e+03     |
+|    mean_reward          | -31.4        |
+| time/                   |              |
+|    total_timesteps      | 800000       |
+| train/                  |              |
+|    approx_kl            | 0.0055750986 |
+|    clip_fraction        | 0.0494       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.95        |
+|    explained_variance   | 0.959        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0262      |
+|    n_updates            | 480          |
+|    policy_gradient_loss | -0.00386     |
+|    std                  | 1.06         |
+|    value_loss           | 0.0218       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1878   |
+|    iterations      | 49     |
+|    time_elapsed    | 427    |
+|    total_timesteps | 802816 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1897         |
+|    iterations           | 50           |
+|    time_elapsed         | 431          |
+|    total_timesteps      | 819200       |
+| train/                  |              |
+|    approx_kl            | 0.0057711033 |
+|    clip_fraction        | 0.0568       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.95        |
+|    explained_variance   | 0.838        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0362      |
+|    n_updates            | 490          |
+|    policy_gradient_loss | -0.00438     |
+|    std                  | 1.06         |
+|    value_loss           | 0.00952      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1914         |
+|    iterations           | 51           |
+|    time_elapsed         | 436          |
+|    total_timesteps      | 835584       |
+| train/                  |              |
+|    approx_kl            | 0.0073408587 |
+|    clip_fraction        | 0.077        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.931        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0283      |
+|    n_updates            | 500          |
+|    policy_gradient_loss | -0.00553     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0142       |
+------------------------------------------
+Eval num_timesteps=850000, episode_reward=-37.98 +/- 27.04
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -38          |
+| time/                   |              |
+|    total_timesteps      | 850000       |
+| train/                  |              |
+|    approx_kl            | 0.0055803536 |
+|    clip_fraction        | 0.0536       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.931        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0338      |
+|    n_updates            | 510          |
+|    policy_gradient_loss | -0.00469     |
+|    std                  | 1.06         |
+|    value_loss           | 0.0156       |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1884   |
+|    iterations      | 52     |
+|    time_elapsed    | 452    |
+|    total_timesteps | 851968 |
+-------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1899       |
+|    iterations           | 53         |
+|    time_elapsed         | 457        |
+|    total_timesteps      | 868352     |
+| train/                  |            |
+|    approx_kl            | 0.00585186 |
+|    clip_fraction        | 0.0638     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.97      |
+|    explained_variance   | 0.83       |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0333    |
+|    n_updates            | 520        |
+|    policy_gradient_loss | -0.00395   |
+|    std                  | 1.07       |
+|    value_loss           | 0.0322     |
+----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1915         |
+|    iterations           | 54           |
+|    time_elapsed         | 461          |
+|    total_timesteps      | 884736       |
+| train/                  |              |
+|    approx_kl            | 0.0055105407 |
+|    clip_fraction        | 0.045        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.845        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0283      |
+|    n_updates            | 530          |
+|    policy_gradient_loss | -0.00367     |
+|    std                  | 1.06         |
+|    value_loss           | 0.0109       |
+------------------------------------------
+Eval num_timesteps=900000, episode_reward=-41.53 +/- 35.40
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -41.5        |
+| time/                   |              |
+|    total_timesteps      | 900000       |
+| train/                  |              |
+|    approx_kl            | 0.0064837057 |
+|    clip_fraction        | 0.0625       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.909        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0394      |
+|    n_updates            | 540          |
+|    policy_gradient_loss | -0.00409     |
+|    std                  | 1.06         |
+|    value_loss           | 0.0147       |
+------------------------------------------
+
+[Diag @ 900,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         12/20
+  NEVER_COMPACT              8/20
+  action_mag mean=0.276 p10=0.038 p90=0.580 (0=stopped, 1=full speed)
+  min_flock_radius mean=4.30m best=0.98m  (target <5m to compact)
+  min_dog_to_com   mean=3.24m best=0.24m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.15m best=5.60m
+  reward/step (mean): progress=-0.0048  alignment=+0.0000  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+-------------------------------
+| time/              |        |
+|    fps             | 1857   |
+|    iterations      | 55     |
+|    time_elapsed    | 485    |
+|    total_timesteps | 901120 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1874        |
+|    iterations           | 56          |
+|    time_elapsed         | 489         |
+|    total_timesteps      | 917504      |
+| train/                  |             |
+|    approx_kl            | 0.006582682 |
+|    clip_fraction        | 0.0662      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.97       |
+|    explained_variance   | 0.961       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.039      |
+|    n_updates            | 550         |
+|    policy_gradient_loss | -0.00462    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0103      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1888         |
+|    iterations           | 57           |
+|    time_elapsed         | 494          |
+|    total_timesteps      | 933888       |
+| train/                  |              |
+|    approx_kl            | 0.0059698187 |
+|    clip_fraction        | 0.0573       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.97        |
+|    explained_variance   | 0.907        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0291      |
+|    n_updates            | 560          |
+|    policy_gradient_loss | -0.00446     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0113       |
+------------------------------------------
+Eval num_timesteps=950000, episode_reward=-26.73 +/- 22.82
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -26.7       |
+| time/                   |             |
+|    total_timesteps      | 950000      |
+| train/                  |             |
+|    approx_kl            | 0.006601461 |
+|    clip_fraction        | 0.0594      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.96       |
+|    explained_variance   | 0.872       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.034      |
+|    n_updates            | 570         |
+|    policy_gradient_loss | -0.00455    |
+|    std                  | 1.06        |
+|    value_loss           | 0.00901     |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1856   |
+|    iterations      | 58     |
+|    time_elapsed    | 511    |
+|    total_timesteps | 950272 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1869        |
+|    iterations           | 59          |
+|    time_elapsed         | 517         |
+|    total_timesteps      | 966656      |
+| train/                  |             |
+|    approx_kl            | 0.005824944 |
+|    clip_fraction        | 0.0624      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.96       |
+|    explained_variance   | 0.789       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0214     |
+|    n_updates            | 580         |
+|    policy_gradient_loss | -0.00363    |
+|    std                  | 1.07        |
+|    value_loss           | 0.0359      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1882        |
+|    iterations           | 60          |
+|    time_elapsed         | 522         |
+|    total_timesteps      | 983040      |
+| train/                  |             |
+|    approx_kl            | 0.005888001 |
+|    clip_fraction        | 0.0573      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.98       |
+|    explained_variance   | 0.887       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0391     |
+|    n_updates            | 590         |
+|    policy_gradient_loss | -0.00371    |
+|    std                  | 1.07        |
+|    value_loss           | 0.00935     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1895        |
+|    iterations           | 61          |
+|    time_elapsed         | 527         |
+|    total_timesteps      | 999424      |
+| train/                  |             |
+|    approx_kl            | 0.005874036 |
+|    clip_fraction        | 0.0611      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.98       |
+|    explained_variance   | 0.871       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0246     |
+|    n_updates            | 600         |
+|    policy_gradient_loss | -0.00492    |
+|    std                  | 1.07        |
+|    value_loss           | 0.00877     |
+-----------------------------------------
+Eval num_timesteps=1000000, episode_reward=-22.72 +/- 33.15
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -22.7        |
+| time/                   |              |
+|    total_timesteps      | 1000000      |
+| train/                  |              |
+|    approx_kl            | 0.0060388125 |
+|    clip_fraction        | 0.0637       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.97        |
+|    explained_variance   | 0.737        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0511      |
+|    n_updates            | 610          |
+|    policy_gradient_loss | -0.00387     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0538       |
+------------------------------------------
+New best mean reward!
+--------------------------------
+| time/              |         |
+|    fps             | 1869    |
+|    iterations      | 62      |
+|    time_elapsed    | 543     |
+|    total_timesteps | 1015808 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1882        |
+|    iterations           | 63          |
+|    time_elapsed         | 548         |
+|    total_timesteps      | 1032192     |
+| train/                  |             |
+|    approx_kl            | 0.007320485 |
+|    clip_fraction        | 0.0723      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.99       |
+|    explained_variance   | 0.946       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0342     |
+|    n_updates            | 620         |
+|    policy_gradient_loss | -0.0052     |
+|    std                  | 1.08        |
+|    value_loss           | 0.0174      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1894         |
+|    iterations           | 64           |
+|    time_elapsed         | 553          |
+|    total_timesteps      | 1048576      |
+| train/                  |              |
+|    approx_kl            | 0.0066477214 |
+|    clip_fraction        | 0.0621       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3           |
+|    explained_variance   | 0.919        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0301      |
+|    n_updates            | 630          |
+|    policy_gradient_loss | -0.00449     |
+|    std                  | 1.08         |
+|    value_loss           | 0.0109       |
+------------------------------------------
+Eval num_timesteps=1050000, episode_reward=-39.86 +/- 28.77
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -39.9        |
+| time/                   |              |
+|    total_timesteps      | 1050000      |
+| train/                  |              |
+|    approx_kl            | 0.0066243596 |
+|    clip_fraction        | 0.0772       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.99        |
+|    explained_variance   | 0.861        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0313      |
+|    n_updates            | 640          |
+|    policy_gradient_loss | -0.00462     |
+|    std                  | 1.07         |
+|    value_loss           | 0.0324       |
+------------------------------------------
+
+[Diag @ 1,050,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         18/20
+  NEVER_COMPACT              2/20
+  action_mag mean=0.200 p10=0.022 p90=0.478 (0=stopped, 1=full speed)
+  min_flock_radius mean=2.29m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=3.23m best=0.05m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.84m best=3.77m
+  reward/step (mean): progress=+0.0016  alignment=+0.0000  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1843    |
+|    iterations      | 65      |
+|    time_elapsed    | 577     |
+|    total_timesteps | 1064960 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1855         |
+|    iterations           | 66           |
+|    time_elapsed         | 582          |
+|    total_timesteps      | 1081344      |
+| train/                  |              |
+|    approx_kl            | 0.0066154073 |
+|    clip_fraction        | 0.0657       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.99        |
+|    explained_variance   | 0.836        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.029       |
+|    n_updates            | 650          |
+|    policy_gradient_loss | -0.0049      |
+|    std                  | 1.08         |
+|    value_loss           | 0.0135       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1865         |
+|    iterations           | 67           |
+|    time_elapsed         | 588          |
+|    total_timesteps      | 1097728      |
+| train/                  |              |
+|    approx_kl            | 0.0059733046 |
+|    clip_fraction        | 0.0634       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.01        |
+|    explained_variance   | 0.852        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0254      |
+|    n_updates            | 660          |
+|    policy_gradient_loss | -0.00452     |
+|    std                  | 1.09         |
+|    value_loss           | 0.0395       |
+------------------------------------------
+Eval num_timesteps=1100000, episode_reward=-33.30 +/- 26.65
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -33.3        |
+| time/                   |              |
+|    total_timesteps      | 1100000      |
+| train/                  |              |
+|    approx_kl            | 0.0054050894 |
+|    clip_fraction        | 0.048        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.02        |
+|    explained_variance   | 0.851        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0348      |
+|    n_updates            | 670          |
+|    policy_gradient_loss | -0.00385     |
+|    std                  | 1.1          |
+|    value_loss           | 0.0247       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1843    |
+|    iterations      | 68      |
+|    time_elapsed    | 604     |
+|    total_timesteps | 1114112 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1856         |
+|    iterations           | 69           |
+|    time_elapsed         | 608          |
+|    total_timesteps      | 1130496      |
+| train/                  |              |
+|    approx_kl            | 0.0073612374 |
+|    clip_fraction        | 0.076        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.01        |
+|    explained_variance   | 0.885        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0424      |
+|    n_updates            | 680          |
+|    policy_gradient_loss | -0.00512     |
+|    std                  | 1.09         |
+|    value_loss           | 0.0278       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1869         |
+|    iterations           | 70           |
+|    time_elapsed         | 613          |
+|    total_timesteps      | 1146880      |
+| train/                  |              |
+|    approx_kl            | 0.0063554104 |
+|    clip_fraction        | 0.067        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.01        |
+|    explained_variance   | 0.915        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0302      |
+|    n_updates            | 690          |
+|    policy_gradient_loss | -0.00577     |
+|    std                  | 1.09         |
+|    value_loss           | 0.0116       |
+------------------------------------------
+Eval num_timesteps=1150000, episode_reward=-26.91 +/- 26.08
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -26.9       |
+| time/                   |             |
+|    total_timesteps      | 1150000     |
+| train/                  |             |
+|    approx_kl            | 0.006060633 |
+|    clip_fraction        | 0.0603      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.02       |
+|    explained_variance   | 0.905       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0374     |
+|    n_updates            | 700         |
+|    policy_gradient_loss | -0.00442    |
+|    std                  | 1.1         |
+|    value_loss           | 0.0101      |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1847    |
+|    iterations      | 71      |
+|    time_elapsed    | 629     |
+|    total_timesteps | 1163264 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1859         |
+|    iterations           | 72           |
+|    time_elapsed         | 634          |
+|    total_timesteps      | 1179648      |
+| train/                  |              |
+|    approx_kl            | 0.0070389216 |
+|    clip_fraction        | 0.0728       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.03        |
+|    explained_variance   | 0.854        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0409      |
+|    n_updates            | 710          |
+|    policy_gradient_loss | -0.00505     |
+|    std                  | 1.1          |
+|    value_loss           | 0.0196       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1871         |
+|    iterations           | 73           |
+|    time_elapsed         | 638          |
+|    total_timesteps      | 1196032      |
+| train/                  |              |
+|    approx_kl            | 0.0055403598 |
+|    clip_fraction        | 0.0567       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.03        |
+|    explained_variance   | 0.906        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0324      |
+|    n_updates            | 720          |
+|    policy_gradient_loss | -0.00494     |
+|    std                  | 1.1          |
+|    value_loss           | 0.0109       |
+------------------------------------------
+Eval num_timesteps=1200000, episode_reward=-23.57 +/- 26.30
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -23.6        |
+| time/                   |              |
+|    total_timesteps      | 1200000      |
+| train/                  |              |
+|    approx_kl            | 0.0055604624 |
+|    clip_fraction        | 0.0522       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.02        |
+|    explained_variance   | 0.819        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.00379     |
+|    n_updates            | 730          |
+|    policy_gradient_loss | -0.00374     |
+|    std                  | 1.1          |
+|    value_loss           | 0.0453       |
+------------------------------------------
+
+[Diag @ 1,200,000 | n_sheep=2 | success=0%]
+  COMPACT_CANT_DRIVE         15/20
+  NEVER_COMPACT              4/20
+  DROVE_NO_SHEEP             1/20
+  action_mag mean=0.399 p10=0.067 p90=0.794 (0=stopped, 1=full speed)
+  min_flock_radius mean=2.96m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=2.17m best=0.14m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=11.07m best=2.66m
+  reward/step (mean): progress=+0.0064  alignment=+0.0000  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+
+[Curriculum] leaving stage n_sheep=2 after 600,000 steps | training success rate (last 100 eps) = 0%
+[Curriculum] → 3 sheep at step 1,200,000
+
+--------------------------------
+| time/              |         |
+|    fps             | 1828    |
+|    iterations      | 74      |
+|    time_elapsed    | 663     |
+|    total_timesteps | 1212416 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1839        |
+|    iterations           | 75          |
+|    time_elapsed         | 668         |
+|    total_timesteps      | 1228800     |
+| train/                  |             |
+|    approx_kl            | 0.007044647 |
+|    clip_fraction        | 0.0819      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.02       |
+|    explained_variance   | 0.902       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.00823    |
+|    n_updates            | 740         |
+|    policy_gradient_loss | -0.00327    |
+|    std                  | 1.1         |
+|    value_loss           | 0.042       |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1849         |
+|    iterations           | 76           |
+|    time_elapsed         | 673          |
+|    total_timesteps      | 1245184      |
+| train/                  |              |
+|    approx_kl            | 0.0064169513 |
+|    clip_fraction        | 0.0699       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.03        |
+|    explained_variance   | 0.928        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0323      |
+|    n_updates            | 750          |
+|    policy_gradient_loss | -0.00459     |
+|    std                  | 1.1          |
+|    value_loss           | 0.0102       |
+------------------------------------------
+Eval num_timesteps=1250000, episode_reward=-27.97 +/- 37.55
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -28         |
+| time/                   |             |
+|    total_timesteps      | 1250000     |
+| train/                  |             |
+|    approx_kl            | 0.006859841 |
+|    clip_fraction        | 0.0783      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.04       |
+|    explained_variance   | 0.94        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0368     |
+|    n_updates            | 760         |
+|    policy_gradient_loss | -0.00472    |
+|    std                  | 1.11        |
+|    value_loss           | 0.00931     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1825    |
+|    iterations      | 77      |
+|    time_elapsed    | 691     |
+|    total_timesteps | 1261568 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1836         |
+|    iterations           | 78           |
+|    time_elapsed         | 696          |
+|    total_timesteps      | 1277952      |
+| train/                  |              |
+|    approx_kl            | 0.0066901552 |
+|    clip_fraction        | 0.0704       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.04        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0329      |
+|    n_updates            | 770          |
+|    policy_gradient_loss | -0.00458     |
+|    std                  | 1.11         |
+|    value_loss           | 0.00938      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1845        |
+|    iterations           | 79          |
+|    time_elapsed         | 701         |
+|    total_timesteps      | 1294336     |
+| train/                  |             |
+|    approx_kl            | 0.007008245 |
+|    clip_fraction        | 0.082       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.03       |
+|    explained_variance   | 0.899       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0194     |
+|    n_updates            | 780         |
+|    policy_gradient_loss | -0.00426    |
+|    std                  | 1.1         |
+|    value_loss           | 0.052       |
+-----------------------------------------
+Eval num_timesteps=1300000, episode_reward=-41.12 +/- 37.68
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -41.1        |
+| time/                   |              |
+|    total_timesteps      | 1300000      |
+| train/                  |              |
+|    approx_kl            | 0.0070775724 |
+|    clip_fraction        | 0.0742       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.03        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0238      |
+|    n_updates            | 790          |
+|    policy_gradient_loss | -0.0052      |
+|    std                  | 1.11         |
+|    value_loss           | 0.00657      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1823    |
+|    iterations      | 80      |
+|    time_elapsed    | 718     |
+|    total_timesteps | 1310720 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1832        |
+|    iterations           | 81          |
+|    time_elapsed         | 724         |
+|    total_timesteps      | 1327104     |
+| train/                  |             |
+|    approx_kl            | 0.008046751 |
+|    clip_fraction        | 0.0851      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.04       |
+|    explained_variance   | 0.897       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0384     |
+|    n_updates            | 800         |
+|    policy_gradient_loss | -0.0057     |
+|    std                  | 1.11        |
+|    value_loss           | 0.009       |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1840        |
+|    iterations           | 82          |
+|    time_elapsed         | 730         |
+|    total_timesteps      | 1343488     |
+| train/                  |             |
+|    approx_kl            | 0.006007643 |
+|    clip_fraction        | 0.0548      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.06       |
+|    explained_variance   | 0.871       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0251     |
+|    n_updates            | 810         |
+|    policy_gradient_loss | -0.00416    |
+|    std                  | 1.12        |
+|    value_loss           | 0.0179      |
+-----------------------------------------
+Eval num_timesteps=1350000, episode_reward=-24.46 +/- 41.24
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -24.5        |
+| time/                   |              |
+|    total_timesteps      | 1350000      |
+| train/                  |              |
+|    approx_kl            | 0.0065572546 |
+|    clip_fraction        | 0.0698       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.877        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0219      |
+|    n_updates            | 820          |
+|    policy_gradient_loss | -0.00456     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0242       |
+------------------------------------------
+
+[Diag @ 1,350,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              14/20
+  COMPACT_CANT_DRIVE         6/20
+  action_mag mean=0.195 p10=0.018 p90=0.576 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.32m best=1.36m  (target <5m to compact)
+  min_dog_to_com   mean=4.15m best=0.61m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=11.37m best=4.88m
+  reward/step (mean): progress=+0.0029  alignment=+0.0000  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1798    |
+|    iterations      | 83      |
+|    time_elapsed    | 756     |
+|    total_timesteps | 1359872 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1809         |
+|    iterations           | 84           |
+|    time_elapsed         | 760          |
+|    total_timesteps      | 1376256      |
+| train/                  |              |
+|    approx_kl            | 0.0072198315 |
+|    clip_fraction        | 0.0764       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.909        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0208      |
+|    n_updates            | 830          |
+|    policy_gradient_loss | -0.00626     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0106       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1817         |
+|    iterations           | 85           |
+|    time_elapsed         | 766          |
+|    total_timesteps      | 1392640      |
+| train/                  |              |
+|    approx_kl            | 0.0070813587 |
+|    clip_fraction        | 0.0733       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.907        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0324      |
+|    n_updates            | 840          |
+|    policy_gradient_loss | -0.00505     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0166       |
+------------------------------------------
+Eval num_timesteps=1400000, episode_reward=-36.32 +/- 33.15
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -36.3        |
+| time/                   |              |
+|    total_timesteps      | 1400000      |
+| train/                  |              |
+|    approx_kl            | 0.0067584305 |
+|    clip_fraction        | 0.08         |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.906        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0308      |
+|    n_updates            | 850          |
+|    policy_gradient_loss | -0.0054      |
+|    std                  | 1.13         |
+|    value_loss           | 0.0112       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1798    |
+|    iterations      | 86      |
+|    time_elapsed    | 783     |
+|    total_timesteps | 1409024 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1807        |
+|    iterations           | 87          |
+|    time_elapsed         | 788         |
+|    total_timesteps      | 1425408     |
+| train/                  |             |
+|    approx_kl            | 0.007411341 |
+|    clip_fraction        | 0.0716      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.09       |
+|    explained_variance   | 0.904       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0322     |
+|    n_updates            | 860         |
+|    policy_gradient_loss | -0.00641    |
+|    std                  | 1.14        |
+|    value_loss           | 0.0191      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1815         |
+|    iterations           | 88           |
+|    time_elapsed         | 794          |
+|    total_timesteps      | 1441792      |
+| train/                  |              |
+|    approx_kl            | 0.0077011855 |
+|    clip_fraction        | 0.0774       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.09        |
+|    explained_variance   | 0.914        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0316      |
+|    n_updates            | 870          |
+|    policy_gradient_loss | -0.00545     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0148       |
+------------------------------------------
+Eval num_timesteps=1450000, episode_reward=-40.58 +/- 38.17
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -40.6       |
+| time/                   |             |
+|    total_timesteps      | 1450000     |
+| train/                  |             |
+|    approx_kl            | 0.007694071 |
+|    clip_fraction        | 0.0816      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.937       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.036      |
+|    n_updates            | 880         |
+|    policy_gradient_loss | -0.0054     |
+|    std                  | 1.12        |
+|    value_loss           | 0.0111      |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1796    |
+|    iterations      | 89      |
+|    time_elapsed    | 811     |
+|    total_timesteps | 1458176 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1805        |
+|    iterations           | 90          |
+|    time_elapsed         | 816         |
+|    total_timesteps      | 1474560     |
+| train/                  |             |
+|    approx_kl            | 0.007034345 |
+|    clip_fraction        | 0.0693      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.924       |
+|    learning_rate        | 0.0003      |
+|    loss                 | 0.0472      |
+|    n_updates            | 890         |
+|    policy_gradient_loss | -0.00472    |
+|    std                  | 1.13        |
+|    value_loss           | 0.0352      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1815         |
+|    iterations           | 91           |
+|    time_elapsed         | 821          |
+|    total_timesteps      | 1490944      |
+| train/                  |              |
+|    approx_kl            | 0.0078114523 |
+|    clip_fraction        | 0.0917       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0461      |
+|    n_updates            | 900          |
+|    policy_gradient_loss | -0.00668     |
+|    std                  | 1.13         |
+|    value_loss           | 0.00844      |
+------------------------------------------
+Eval num_timesteps=1500000, episode_reward=-19.66 +/- 25.98
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -19.7        |
+| time/                   |              |
+|    total_timesteps      | 1500000      |
+| train/                  |              |
+|    approx_kl            | 0.0067999987 |
+|    clip_fraction        | 0.0606       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.893        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0283      |
+|    n_updates            | 910          |
+|    policy_gradient_loss | -0.00385     |
+|    std                  | 1.12         |
+|    value_loss           | 0.0409       |
+------------------------------------------
+New best mean reward!
+
+[Diag @ 1,500,000 | n_sheep=3 | success=0%]
+  COMPACT_CANT_DRIVE         11/20
+  NEVER_COMPACT              7/20
+  DROVE_NO_SHEEP             2/20
+  action_mag mean=0.185 p10=0.015 p90=0.426 (0=stopped, 1=full speed)
+  min_flock_radius mean=4.43m best=1.38m  (target <5m to compact)
+  min_dog_to_com   mean=2.89m best=0.07m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=11.88m best=2.23m
+  reward/step (mean): progress=+0.0008  alignment=+0.0000  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1781    |
+|    iterations      | 92      |
+|    time_elapsed    | 846     |
+|    total_timesteps | 1507328 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1789         |
+|    iterations           | 93           |
+|    time_elapsed         | 851          |
+|    total_timesteps      | 1523712      |
+| train/                  |              |
+|    approx_kl            | 0.0069550863 |
+|    clip_fraction        | 0.0787       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.897        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0204      |
+|    n_updates            | 920          |
+|    policy_gradient_loss | -0.00394     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0324       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1798        |
+|    iterations           | 94          |
+|    time_elapsed         | 856         |
+|    total_timesteps      | 1540096     |
+| train/                  |             |
+|    approx_kl            | 0.006749108 |
+|    clip_fraction        | 0.0787      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.08       |
+|    explained_variance   | 0.929       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0338     |
+|    n_updates            | 930         |
+|    policy_gradient_loss | -0.00534    |
+|    std                  | 1.13        |
+|    value_loss           | 0.00967     |
+-----------------------------------------
+Eval num_timesteps=1550000, episode_reward=-26.47 +/- 25.94
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -26.5        |
+| time/                   |              |
+|    total_timesteps      | 1550000      |
+| train/                  |              |
+|    approx_kl            | 0.0073381998 |
+|    clip_fraction        | 0.0679       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.919        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0259      |
+|    n_updates            | 940          |
+|    policy_gradient_loss | -0.00554     |
+|    std                  | 1.13         |
+|    value_loss           | 0.00999      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1782    |
+|    iterations      | 95      |
+|    time_elapsed    | 873     |
+|    total_timesteps | 1556480 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1790         |
+|    iterations           | 96           |
+|    time_elapsed         | 878          |
+|    total_timesteps      | 1572864      |
+| train/                  |              |
+|    approx_kl            | 0.0071112993 |
+|    clip_fraction        | 0.0781       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.929        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0324      |
+|    n_updates            | 950          |
+|    policy_gradient_loss | -0.00428     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0246       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1798         |
+|    iterations           | 97           |
+|    time_elapsed         | 883          |
+|    total_timesteps      | 1589248      |
+| train/                  |              |
+|    approx_kl            | 0.0077134473 |
+|    clip_fraction        | 0.0784       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.08        |
+|    explained_variance   | 0.917        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0365      |
+|    n_updates            | 960          |
+|    policy_gradient_loss | -0.00445     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0122       |
+------------------------------------------
+Eval num_timesteps=1600000, episode_reward=-35.13 +/- 31.01
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -35.1        |
+| time/                   |              |
+|    total_timesteps      | 1600000      |
+| train/                  |              |
+|    approx_kl            | 0.0070123896 |
+|    clip_fraction        | 0.0712       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.07        |
+|    explained_variance   | 0.919        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.026       |
+|    n_updates            | 970          |
+|    policy_gradient_loss | -0.00519     |
+|    std                  | 1.13         |
+|    value_loss           | 0.0171       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1781    |
+|    iterations      | 98      |
+|    time_elapsed    | 901     |
+|    total_timesteps | 1605632 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1789        |
+|    iterations           | 99          |
+|    time_elapsed         | 906         |
+|    total_timesteps      | 1622016     |
+| train/                  |             |
+|    approx_kl            | 0.007990176 |
+|    clip_fraction        | 0.0845      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.873       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.04       |
+|    n_updates            | 980         |
+|    policy_gradient_loss | -0.0045     |
+|    std                  | 1.13        |
+|    value_loss           | 0.0153      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1798        |
+|    iterations           | 100         |
+|    time_elapsed         | 911         |
+|    total_timesteps      | 1638400     |
+| train/                  |             |
+|    approx_kl            | 0.006477687 |
+|    clip_fraction        | 0.0593      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.07       |
+|    explained_variance   | 0.946       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0396     |
+|    n_updates            | 990         |
+|    policy_gradient_loss | -0.00442    |
+|    std                  | 1.13        |
+|    value_loss           | 0.0107      |
+-----------------------------------------
+Eval num_timesteps=1650000, episode_reward=-31.86 +/- 47.05
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -31.9       |
+| time/                   |             |
+|    total_timesteps      | 1650000     |
+| train/                  |             |
+|    approx_kl            | 0.006796476 |
+|    clip_fraction        | 0.0672      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.08       |
+|    explained_variance   | 0.929       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0264     |
+|    n_updates            | 1000        |
+|    policy_gradient_loss | -0.00375    |
+|    std                  | 1.13        |
+|    value_loss           | 0.0385      |
+-----------------------------------------
+
+[Diag @ 1,650,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              11/20
+  COMPACT_CANT_DRIVE         9/20
+  action_mag mean=0.154 p10=0.005 p90=0.398 (0=stopped, 1=full speed)
+  min_flock_radius mean=5.81m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=3.22m best=0.52m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.42m best=7.08m
+  reward/step (mean): progress=+0.0061  alignment=+0.0000  pen_bonus=+0.0010  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1768    |
+|    iterations      | 101     |
+|    time_elapsed    | 935     |
+|    total_timesteps | 1654784 |
+--------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1774       |
+|    iterations           | 102        |
+|    time_elapsed         | 941        |
+|    total_timesteps      | 1671168    |
+| train/                  |            |
+|    approx_kl            | 0.00682881 |
+|    clip_fraction        | 0.0694     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.08      |
+|    explained_variance   | 0.939      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0233    |
+|    n_updates            | 1010       |
+|    policy_gradient_loss | -0.00461   |
+|    std                  | 1.13       |
+|    value_loss           | 0.0183     |
+----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1779         |
+|    iterations           | 103          |
+|    time_elapsed         | 948          |
+|    total_timesteps      | 1687552      |
+| train/                  |              |
+|    approx_kl            | 0.0071003223 |
+|    clip_fraction        | 0.0782       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.1         |
+|    explained_variance   | 0.923        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0398      |
+|    n_updates            | 1020         |
+|    policy_gradient_loss | -0.00491     |
+|    std                  | 1.15         |
+|    value_loss           | 0.0101       |
+------------------------------------------
+Eval num_timesteps=1700000, episode_reward=-32.11 +/- 36.59
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -32.1        |
+| time/                   |              |
+|    total_timesteps      | 1700000      |
+| train/                  |              |
+|    approx_kl            | 0.0064870613 |
+|    clip_fraction        | 0.0624       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.13        |
+|    explained_variance   | 0.909        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0365      |
+|    n_updates            | 1030         |
+|    policy_gradient_loss | -0.00404     |
+|    std                  | 1.17         |
+|    value_loss           | 0.00855      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1762    |
+|    iterations      | 104     |
+|    time_elapsed    | 966     |
+|    total_timesteps | 1703936 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1769        |
+|    iterations           | 105         |
+|    time_elapsed         | 972         |
+|    total_timesteps      | 1720320     |
+| train/                  |             |
+|    approx_kl            | 0.007349294 |
+|    clip_fraction        | 0.0833      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.926       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0358     |
+|    n_updates            | 1040        |
+|    policy_gradient_loss | -0.00514    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00848     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1777         |
+|    iterations           | 106          |
+|    time_elapsed         | 976          |
+|    total_timesteps      | 1736704      |
+| train/                  |              |
+|    approx_kl            | 0.0070306472 |
+|    clip_fraction        | 0.0814       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.887        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0359      |
+|    n_updates            | 1050         |
+|    policy_gradient_loss | -0.00489     |
+|    std                  | 1.17         |
+|    value_loss           | 0.0134       |
+------------------------------------------
+Eval num_timesteps=1750000, episode_reward=-34.24 +/- 43.23
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -34.2       |
+| time/                   |             |
+|    total_timesteps      | 1750000     |
+| train/                  |             |
+|    approx_kl            | 0.008487761 |
+|    clip_fraction        | 0.102       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.962       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0369     |
+|    n_updates            | 1060        |
+|    policy_gradient_loss | -0.0077     |
+|    std                  | 1.17        |
+|    value_loss           | 0.00786     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1762    |
+|    iterations      | 107     |
+|    time_elapsed    | 994     |
+|    total_timesteps | 1753088 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1766         |
+|    iterations           | 108          |
+|    time_elapsed         | 1001         |
+|    total_timesteps      | 1769472      |
+| train/                  |              |
+|    approx_kl            | 0.0074267983 |
+|    clip_fraction        | 0.0742       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.939        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0404      |
+|    n_updates            | 1070         |
+|    policy_gradient_loss | -0.00575     |
+|    std                  | 1.18         |
+|    value_loss           | 0.0158       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1772         |
+|    iterations           | 109          |
+|    time_elapsed         | 1007         |
+|    total_timesteps      | 1785856      |
+| train/                  |              |
+|    approx_kl            | 0.0075380025 |
+|    clip_fraction        | 0.074        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.961        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.034       |
+|    n_updates            | 1080         |
+|    policy_gradient_loss | -0.00553     |
+|    std                  | 1.17         |
+|    value_loss           | 0.00651      |
+------------------------------------------
+Eval num_timesteps=1800000, episode_reward=-31.16 +/- 37.32
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -31.2       |
+| time/                   |             |
+|    total_timesteps      | 1800000     |
+| train/                  |             |
+|    approx_kl            | 0.007386248 |
+|    clip_fraction        | 0.0843      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.922       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0419     |
+|    n_updates            | 1090        |
+|    policy_gradient_loss | -0.00596    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00858     |
+-----------------------------------------
+
+[Diag @ 1,800,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              17/20
+  COMPACT_CANT_DRIVE         3/20
+  action_mag mean=0.164 p10=0.007 p90=0.418 (0=stopped, 1=full speed)
+  min_flock_radius mean=7.52m best=2.00m  (target <5m to compact)
+  min_dog_to_com   mean=2.24m best=0.21m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.87m best=3.90m
+  reward/step (mean): progress=-0.0007  alignment=+0.0000  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+
+[Curriculum] leaving stage n_sheep=3 after 600,000 steps | training success rate (last 100 eps) = 0%
+[Curriculum] → 4 sheep at step 1,800,000
+
+--------------------------------
+| time/              |         |
+|    fps             | 1743    |
+|    iterations      | 110     |
+|    time_elapsed    | 1033    |
+|    total_timesteps | 1802240 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1749        |
+|    iterations           | 111         |
+|    time_elapsed         | 1039        |
+|    total_timesteps      | 1818624     |
+| train/                  |             |
+|    approx_kl            | 0.009158293 |
+|    clip_fraction        | 0.0991      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.893       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0414     |
+|    n_updates            | 1100        |
+|    policy_gradient_loss | -0.00701    |
+|    std                  | 1.17        |
+|    value_loss           | 0.0237      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1755        |
+|    iterations           | 112         |
+|    time_elapsed         | 1045        |
+|    total_timesteps      | 1835008     |
+| train/                  |             |
+|    approx_kl            | 0.007241189 |
+|    clip_fraction        | 0.0831      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.874       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0241     |
+|    n_updates            | 1110        |
+|    policy_gradient_loss | -0.00634    |
+|    std                  | 1.17        |
+|    value_loss           | 0.0226      |
+-----------------------------------------
+Eval num_timesteps=1850000, episode_reward=-29.45 +/- 31.10
+Episode length: 2000.00 +/- 0.00
+---------------------------------------
+| eval/                   |           |
+|    mean_ep_length       | 2e+03     |
+|    mean_reward          | -29.5     |
+| time/                   |           |
+|    total_timesteps      | 1850000   |
+| train/                  |           |
+|    approx_kl            | 0.0078688 |
+|    clip_fraction        | 0.0777    |
+|    clip_range           | 0.2       |
+|    entropy_loss         | -3.15     |
+|    explained_variance   | 0.895     |
+|    learning_rate        | 0.0003    |
+|    loss                 | -0.036    |
+|    n_updates            | 1120      |
+|    policy_gradient_loss | -0.00602  |
+|    std                  | 1.17      |
+|    value_loss           | 0.0128    |
+---------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1742    |
+|    iterations      | 113     |
+|    time_elapsed    | 1062    |
+|    total_timesteps | 1851392 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1749        |
+|    iterations           | 114         |
+|    time_elapsed         | 1067        |
+|    total_timesteps      | 1867776     |
+| train/                  |             |
+|    approx_kl            | 0.008158936 |
+|    clip_fraction        | 0.0963      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.14       |
+|    explained_variance   | 0.897       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0324     |
+|    n_updates            | 1130        |
+|    policy_gradient_loss | -0.00854    |
+|    std                  | 1.17        |
+|    value_loss           | 0.0144      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1754         |
+|    iterations           | 115          |
+|    time_elapsed         | 1073         |
+|    total_timesteps      | 1884160      |
+| train/                  |              |
+|    approx_kl            | 0.0074978825 |
+|    clip_fraction        | 0.0844       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.14        |
+|    explained_variance   | 0.92         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0246      |
+|    n_updates            | 1140         |
+|    policy_gradient_loss | -0.00578     |
+|    std                  | 1.16         |
+|    value_loss           | 0.0134       |
+------------------------------------------
+Eval num_timesteps=1900000, episode_reward=-38.21 +/- 31.08
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | -38.2      |
+| time/                   |            |
+|    total_timesteps      | 1900000    |
+| train/                  |            |
+|    approx_kl            | 0.00678163 |
+|    clip_fraction        | 0.0711     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.15      |
+|    explained_variance   | 0.892      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0345    |
+|    n_updates            | 1150       |
+|    policy_gradient_loss | -0.00409   |
+|    std                  | 1.18       |
+|    value_loss           | 0.0221     |
+----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1740    |
+|    iterations      | 116     |
+|    time_elapsed    | 1091    |
+|    total_timesteps | 1900544 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1746        |
+|    iterations           | 117         |
+|    time_elapsed         | 1097        |
+|    total_timesteps      | 1916928     |
+| train/                  |             |
+|    approx_kl            | 0.006992462 |
+|    clip_fraction        | 0.0731      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.16       |
+|    explained_variance   | 0.895       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0243     |
+|    n_updates            | 1160        |
+|    policy_gradient_loss | -0.00588    |
+|    std                  | 1.18        |
+|    value_loss           | 0.0145      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1750         |
+|    iterations           | 118          |
+|    time_elapsed         | 1104         |
+|    total_timesteps      | 1933312      |
+| train/                  |              |
+|    approx_kl            | 0.0069225584 |
+|    clip_fraction        | 0.068        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.905        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0297      |
+|    n_updates            | 1170         |
+|    policy_gradient_loss | -0.00516     |
+|    std                  | 1.17         |
+|    value_loss           | 0.0153       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1756        |
+|    iterations           | 119         |
+|    time_elapsed         | 1109        |
+|    total_timesteps      | 1949696     |
+| train/                  |             |
+|    approx_kl            | 0.005966103 |
+|    clip_fraction        | 0.059       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.896       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0337     |
+|    n_updates            | 1180        |
+|    policy_gradient_loss | -0.00413    |
+|    std                  | 1.17        |
+|    value_loss           | 0.0091      |
+-----------------------------------------
+Eval num_timesteps=1950000, episode_reward=-59.72 +/- 38.15
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -59.7        |
+| time/                   |              |
+|    total_timesteps      | 1950000      |
+| train/                  |              |
+|    approx_kl            | 0.0067311125 |
+|    clip_fraction        | 0.0733       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.16        |
+|    explained_variance   | 0.861        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0147      |
+|    n_updates            | 1190         |
+|    policy_gradient_loss | -0.00459     |
+|    std                  | 1.18         |
+|    value_loss           | 0.0083       |
+------------------------------------------
+
+[Diag @ 1,950,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              14/20
+  COMPACT_CANT_DRIVE         6/20
+  action_mag mean=0.325 p10=0.025 p90=0.778 (0=stopped, 1=full speed)
+  min_flock_radius mean=7.27m best=2.17m  (target <5m to compact)
+  min_dog_to_com   mean=3.74m best=0.07m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.01m best=6.24m
+  reward/step (mean): progress=+0.0026  alignment=+0.0000  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1728    |
+|    iterations      | 120     |
+|    time_elapsed    | 1137    |
+|    total_timesteps | 1966080 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1734         |
+|    iterations           | 121          |
+|    time_elapsed         | 1143         |
+|    total_timesteps      | 1982464      |
+| train/                  |              |
+|    approx_kl            | 0.0061555626 |
+|    clip_fraction        | 0.0631       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.17        |
+|    explained_variance   | 0.932        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0328      |
+|    n_updates            | 1200         |
+|    policy_gradient_loss | -0.00446     |
+|    std                  | 1.19         |
+|    value_loss           | 0.0133       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1739         |
+|    iterations           | 122          |
+|    time_elapsed         | 1149         |
+|    total_timesteps      | 1998848      |
+| train/                  |              |
+|    approx_kl            | 0.0060347347 |
+|    clip_fraction        | 0.057        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.18        |
+|    explained_variance   | 0.841        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0352      |
+|    n_updates            | 1210         |
+|    policy_gradient_loss | -0.00322     |
+|    std                  | 1.19         |
+|    value_loss           | 0.0104       |
+------------------------------------------
+Eval num_timesteps=2000000, episode_reward=-37.97 +/- 46.26
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -38          |
+| time/                   |              |
+|    total_timesteps      | 2000000      |
+| train/                  |              |
+|    approx_kl            | 0.0063244104 |
+|    clip_fraction        | 0.0675       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.18        |
+|    explained_variance   | 0.865        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0217      |
+|    n_updates            | 1220         |
+|    policy_gradient_loss | -0.00489     |
+|    std                  | 1.2          |
+|    value_loss           | 0.0219       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1725    |
+|    iterations      | 123     |
+|    time_elapsed    | 1167    |
+|    total_timesteps | 2015232 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1730        |
+|    iterations           | 124         |
+|    time_elapsed         | 1173        |
+|    total_timesteps      | 2031616     |
+| train/                  |             |
+|    approx_kl            | 0.007022621 |
+|    clip_fraction        | 0.0816      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.19       |
+|    explained_variance   | 0.949       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0248     |
+|    n_updates            | 1230        |
+|    policy_gradient_loss | -0.0053     |
+|    std                  | 1.19        |
+|    value_loss           | 0.00677     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1735        |
+|    iterations           | 125         |
+|    time_elapsed         | 1179        |
+|    total_timesteps      | 2048000     |
+| train/                  |             |
+|    approx_kl            | 0.006686856 |
+|    clip_fraction        | 0.0653      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.18       |
+|    explained_variance   | 0.928       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0333     |
+|    n_updates            | 1240        |
+|    policy_gradient_loss | -0.00445    |
+|    std                  | 1.19        |
+|    value_loss           | 0.00651     |
+-----------------------------------------
+Eval num_timesteps=2050000, episode_reward=-27.67 +/- 36.42
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -27.7       |
+| time/                   |             |
+|    total_timesteps      | 2050000     |
+| train/                  |             |
+|    approx_kl            | 0.006721792 |
+|    clip_fraction        | 0.0675      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.2        |
+|    explained_variance   | 0.921       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0278     |
+|    n_updates            | 1250        |
+|    policy_gradient_loss | -0.00408    |
+|    std                  | 1.21        |
+|    value_loss           | 0.00793     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1721    |
+|    iterations      | 126     |
+|    time_elapsed    | 1198    |
+|    total_timesteps | 2064384 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1726        |
+|    iterations           | 127         |
+|    time_elapsed         | 1205        |
+|    total_timesteps      | 2080768     |
+| train/                  |             |
+|    approx_kl            | 0.006730888 |
+|    clip_fraction        | 0.0617      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.23       |
+|    explained_variance   | 0.911       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0276     |
+|    n_updates            | 1260        |
+|    policy_gradient_loss | -0.00378    |
+|    std                  | 1.22        |
+|    value_loss           | 0.00964     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1732        |
+|    iterations           | 128         |
+|    time_elapsed         | 1210        |
+|    total_timesteps      | 2097152     |
+| train/                  |             |
+|    approx_kl            | 0.007725292 |
+|    clip_fraction        | 0.0775      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.23       |
+|    explained_variance   | 0.913       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0371     |
+|    n_updates            | 1270        |
+|    policy_gradient_loss | -0.006      |
+|    std                  | 1.22        |
+|    value_loss           | 0.0109      |
+-----------------------------------------
+Eval num_timesteps=2100000, episode_reward=-40.56 +/- 44.37
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -40.6        |
+| time/                   |              |
+|    total_timesteps      | 2100000      |
+| train/                  |              |
+|    approx_kl            | 0.0067186276 |
+|    clip_fraction        | 0.0644       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.24        |
+|    explained_variance   | 0.845        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0357      |
+|    n_updates            | 1280         |
+|    policy_gradient_loss | -0.00433     |
+|    std                  | 1.23         |
+|    value_loss           | 0.0263       |
+------------------------------------------
+
+[Diag @ 2,100,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              12/20
+  COMPACT_CANT_DRIVE         8/20
+  action_mag mean=0.384 p10=0.018 p90=0.884 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.36m best=2.11m  (target <5m to compact)
+  min_dog_to_com   mean=2.94m best=0.40m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.34m best=5.56m
+  reward/step (mean): progress=-0.0084  alignment=+0.0000  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1706    |
+|    iterations      | 129     |
+|    time_elapsed    | 1238    |
+|    total_timesteps | 2113536 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1712        |
+|    iterations           | 130         |
+|    time_elapsed         | 1243        |
+|    total_timesteps      | 2129920     |
+| train/                  |             |
+|    approx_kl            | 0.006317258 |
+|    clip_fraction        | 0.0623      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.26       |
+|    explained_variance   | 0.912       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0419     |
+|    n_updates            | 1290        |
+|    policy_gradient_loss | -0.00427    |
+|    std                  | 1.24        |
+|    value_loss           | 0.00859     |
+-----------------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1716       |
+|    iterations           | 131        |
+|    time_elapsed         | 1250       |
+|    total_timesteps      | 2146304    |
+| train/                  |            |
+|    approx_kl            | 0.00636432 |
+|    clip_fraction        | 0.0698     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.28      |
+|    explained_variance   | 0.851      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0266    |
+|    n_updates            | 1300       |
+|    policy_gradient_loss | -0.00374   |
+|    std                  | 1.25       |
+|    value_loss           | 0.0299     |
+----------------------------------------
+Eval num_timesteps=2150000, episode_reward=-63.32 +/- 33.74
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -63.3        |
+| time/                   |              |
+|    total_timesteps      | 2150000      |
+| train/                  |              |
+|    approx_kl            | 0.0060345423 |
+|    clip_fraction        | 0.0563       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.27        |
+|    explained_variance   | 0.898        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0404      |
+|    n_updates            | 1310         |
+|    policy_gradient_loss | -0.00356     |
+|    std                  | 1.24         |
+|    value_loss           | 0.0205       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1704    |
+|    iterations      | 132     |
+|    time_elapsed    | 1268    |
+|    total_timesteps | 2162688 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1709        |
+|    iterations           | 133         |
+|    time_elapsed         | 1274        |
+|    total_timesteps      | 2179072     |
+| train/                  |             |
+|    approx_kl            | 0.007027424 |
+|    clip_fraction        | 0.0693      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.25       |
+|    explained_variance   | 0.9         |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0315     |
+|    n_updates            | 1320        |
+|    policy_gradient_loss | -0.00521    |
+|    std                  | 1.23        |
+|    value_loss           | 0.0194      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1715        |
+|    iterations           | 134         |
+|    time_elapsed         | 1279        |
+|    total_timesteps      | 2195456     |
+| train/                  |             |
+|    approx_kl            | 0.006112649 |
+|    clip_fraction        | 0.0635      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.24       |
+|    explained_variance   | 0.957       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0339     |
+|    n_updates            | 1330        |
+|    policy_gradient_loss | -0.00383    |
+|    std                  | 1.23        |
+|    value_loss           | 0.00861     |
+-----------------------------------------
+Eval num_timesteps=2200000, episode_reward=-31.28 +/- 44.80
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -31.3        |
+| time/                   |              |
+|    total_timesteps      | 2200000      |
+| train/                  |              |
+|    approx_kl            | 0.0070182728 |
+|    clip_fraction        | 0.076        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.26        |
+|    explained_variance   | 0.883        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0412      |
+|    n_updates            | 1340         |
+|    policy_gradient_loss | -0.00534     |
+|    std                  | 1.25         |
+|    value_loss           | 0.013        |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1704    |
+|    iterations      | 135     |
+|    time_elapsed    | 1297    |
+|    total_timesteps | 2211840 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1708         |
+|    iterations           | 136          |
+|    time_elapsed         | 1304         |
+|    total_timesteps      | 2228224      |
+| train/                  |              |
+|    approx_kl            | 0.0062820893 |
+|    clip_fraction        | 0.062        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.26        |
+|    explained_variance   | 0.924        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0377      |
+|    n_updates            | 1350         |
+|    policy_gradient_loss | -0.00497     |
+|    std                  | 1.24         |
+|    value_loss           | 0.00797      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1713         |
+|    iterations           | 137          |
+|    time_elapsed         | 1310         |
+|    total_timesteps      | 2244608      |
+| train/                  |              |
+|    approx_kl            | 0.0072454046 |
+|    clip_fraction        | 0.0747       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.25        |
+|    explained_variance   | 0.94         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0366      |
+|    n_updates            | 1360         |
+|    policy_gradient_loss | -0.00572     |
+|    std                  | 1.23         |
+|    value_loss           | 0.00852      |
+------------------------------------------
+Eval num_timesteps=2250000, episode_reward=-36.00 +/- 38.67
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -36         |
+| time/                   |             |
+|    total_timesteps      | 2250000     |
+| train/                  |             |
+|    approx_kl            | 0.005690419 |
+|    clip_fraction        | 0.0546      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.25       |
+|    explained_variance   | 0.957       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0376     |
+|    n_updates            | 1370        |
+|    policy_gradient_loss | -0.00425    |
+|    std                  | 1.23        |
+|    value_loss           | 0.00524     |
+-----------------------------------------
+
+[Diag @ 2,250,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              13/20
+  COMPACT_CANT_DRIVE         7/20
+  action_mag mean=0.416 p10=0.038 p90=0.887 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.62m best=2.03m  (target <5m to compact)
+  min_dog_to_com   mean=3.54m best=0.40m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.24m best=9.65m
+  reward/step (mean): progress=-0.0070  alignment=+0.0000  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1690    |
+|    iterations      | 138     |
+|    time_elapsed    | 1337    |
+|    total_timesteps | 2260992 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1696         |
+|    iterations           | 139          |
+|    time_elapsed         | 1342         |
+|    total_timesteps      | 2277376      |
+| train/                  |              |
+|    approx_kl            | 0.0072061084 |
+|    clip_fraction        | 0.0728       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.25        |
+|    explained_variance   | 0.954        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0312      |
+|    n_updates            | 1380         |
+|    policy_gradient_loss | -0.00512     |
+|    std                  | 1.23         |
+|    value_loss           | 0.006        |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1702         |
+|    iterations           | 140          |
+|    time_elapsed         | 1347         |
+|    total_timesteps      | 2293760      |
+| train/                  |              |
+|    approx_kl            | 0.0066916933 |
+|    clip_fraction        | 0.0626       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.24        |
+|    explained_variance   | 0.939        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0408      |
+|    n_updates            | 1390         |
+|    policy_gradient_loss | -0.00463     |
+|    std                  | 1.23         |
+|    value_loss           | 0.00827      |
+------------------------------------------
+Eval num_timesteps=2300000, episode_reward=-43.65 +/- 42.86
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -43.7        |
+| time/                   |              |
+|    total_timesteps      | 2300000      |
+| train/                  |              |
+|    approx_kl            | 0.0062987795 |
+|    clip_fraction        | 0.0609       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.26        |
+|    explained_variance   | 0.898        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0316      |
+|    n_updates            | 1400         |
+|    policy_gradient_loss | -0.00442     |
+|    std                  | 1.25         |
+|    value_loss           | 0.00955      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1691    |
+|    iterations      | 141     |
+|    time_elapsed    | 1365    |
+|    total_timesteps | 2310144 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1696        |
+|    iterations           | 142         |
+|    time_elapsed         | 1371        |
+|    total_timesteps      | 2326528     |
+| train/                  |             |
+|    approx_kl            | 0.005443076 |
+|    clip_fraction        | 0.054       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.27       |
+|    explained_variance   | 0.877       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0296     |
+|    n_updates            | 1410        |
+|    policy_gradient_loss | -0.00375    |
+|    std                  | 1.24        |
+|    value_loss           | 0.00928     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1701        |
+|    iterations           | 143         |
+|    time_elapsed         | 1376        |
+|    total_timesteps      | 2342912     |
+| train/                  |             |
+|    approx_kl            | 0.004740049 |
+|    clip_fraction        | 0.0456      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.26       |
+|    explained_variance   | 0.922       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0318     |
+|    n_updates            | 1420        |
+|    policy_gradient_loss | -0.00351    |
+|    std                  | 1.24        |
+|    value_loss           | 0.0156      |
+-----------------------------------------
+Eval num_timesteps=2350000, episode_reward=-37.57 +/- 37.78
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -37.6        |
+| time/                   |              |
+|    total_timesteps      | 2350000      |
+| train/                  |              |
+|    approx_kl            | 0.0056120222 |
+|    clip_fraction        | 0.0542       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.27        |
+|    explained_variance   | 0.911        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0272      |
+|    n_updates            | 1430         |
+|    policy_gradient_loss | -0.0035      |
+|    std                  | 1.25         |
+|    value_loss           | 0.00811      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1690    |
+|    iterations      | 144     |
+|    time_elapsed    | 1395    |
+|    total_timesteps | 2359296 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1695         |
+|    iterations           | 145          |
+|    time_elapsed         | 1401         |
+|    total_timesteps      | 2375680      |
+| train/                  |              |
+|    approx_kl            | 0.0064737825 |
+|    clip_fraction        | 0.0697       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.28        |
+|    explained_variance   | 0.93         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.036       |
+|    n_updates            | 1440         |
+|    policy_gradient_loss | -0.00403     |
+|    std                  | 1.25         |
+|    value_loss           | 0.00488      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1699         |
+|    iterations           | 146          |
+|    time_elapsed         | 1407         |
+|    total_timesteps      | 2392064      |
+| train/                  |              |
+|    approx_kl            | 0.0050720195 |
+|    clip_fraction        | 0.0466       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.29        |
+|    explained_variance   | 0.902        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0374      |
+|    n_updates            | 1450         |
+|    policy_gradient_loss | -0.00283     |
+|    std                  | 1.26         |
+|    value_loss           | 0.00958      |
+------------------------------------------
+Eval num_timesteps=2400000, episode_reward=-42.55 +/- 37.89
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -42.6       |
+| time/                   |             |
+|    total_timesteps      | 2400000     |
+| train/                  |             |
+|    approx_kl            | 0.005990128 |
+|    clip_fraction        | 0.0565      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.31       |
+|    explained_variance   | 0.869       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0448     |
+|    n_updates            | 1460        |
+|    policy_gradient_loss | -0.0051     |
+|    std                  | 1.27        |
+|    value_loss           | 0.00854     |
+-----------------------------------------
+
+[Diag @ 2,400,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              15/20
+  COMPACT_CANT_DRIVE         5/20
+  action_mag mean=0.424 p10=0.025 p90=0.948 (0=stopped, 1=full speed)
+  min_flock_radius mean=7.66m best=1.63m  (target <5m to compact)
+  min_dog_to_com   mean=4.77m best=0.32m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.47m best=8.96m
+  reward/step (mean): progress=-0.0008  alignment=+0.0000  pen_bonus=+0.0003  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1677    |
+|    iterations      | 147     |
+|    time_elapsed    | 1435    |
+|    total_timesteps | 2408448 |
+--------------------------------
+
+Training complete. Artefacts saved to runs/ppo_fix_check/
diff --git a/training/runs/ppo_fix_check/best_model/best_model.zip b/training/runs/ppo_fix_check/best_model/best_model.zip
new file mode 100644
index 0000000..8533c33
Binary files /dev/null and b/training/runs/ppo_fix_check/best_model/best_model.zip differ
diff --git a/training/runs/ppo_fix_check/evaluations.npz b/training/runs/ppo_fix_check/evaluations.npz
new file mode 100644
index 0000000..9ae65e5
Binary files /dev/null and b/training/runs/ppo_fix_check/evaluations.npz differ
diff --git a/training/runs/ppo_fix_check/final_model.zip b/training/runs/ppo_fix_check/final_model.zip
new file mode 100644
index 0000000..7e1248e
Binary files /dev/null and b/training/runs/ppo_fix_check/final_model.zip differ
diff --git a/training/runs/ppo_fix_check/vecnorm.pkl b/training/runs/ppo_fix_check/vecnorm.pkl
new file mode 100644
index 0000000..f51753c
Binary files /dev/null and b/training/runs/ppo_fix_check/vecnorm.pkl differ