diff --git a/training/runs/ppo_fix_check2.log b/training/runs/ppo_fix_check2.log
new file mode 100644
index 0000000..a345ff5
--- /dev/null
+++ b/training/runs/ppo_fix_check2.log
@@ -0,0 +1,3391 @@
+Using cpu device
+Logging to runs/ppo_fix_check2/ppo_1
+------------------------------
+| time/              |       |
+|    fps             | 4605  |
+|    iterations      | 1     |
+|    time_elapsed    | 3     |
+|    total_timesteps | 16384 |
+------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 4011         |
+|    iterations           | 2            |
+|    time_elapsed         | 8            |
+|    total_timesteps      | 32768        |
+| train/                  |              |
+|    approx_kl            | 0.0033352287 |
+|    clip_fraction        | 0.0253       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.83        |
+|    explained_variance   | 0.271        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.00687     |
+|    n_updates            | 10           |
+|    policy_gradient_loss | -0.00103     |
+|    std                  | 0.996        |
+|    value_loss           | 0.0684       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 3789        |
+|    iterations           | 3           |
+|    time_elapsed         | 12          |
+|    total_timesteps      | 49152       |
+| train/                  |             |
+|    approx_kl            | 0.005950423 |
+|    clip_fraction        | 0.0552      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.83       |
+|    explained_variance   | 0.527       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0153     |
+|    n_updates            | 20          |
+|    policy_gradient_loss | -0.0029     |
+|    std                  | 0.997       |
+|    value_loss           | 0.0663      |
+-----------------------------------------
+/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.
+  warnings.warn(
+Eval num_timesteps=50000, episode_reward=-25.68 +/- 59.67
+Episode length: 1815.95 +/- 456.88
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.82e+03     |
+|    mean_reward          | -25.7        |
+| time/                   |              |
+|    total_timesteps      | 50000        |
+| train/                  |              |
+|    approx_kl            | 0.0040030424 |
+|    clip_fraction        | 0.0356       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.85        |
+|    explained_variance   | 0.421        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.149        |
+|    n_updates            | 30           |
+|    policy_gradient_loss | -0.00198     |
+|    std                  | 1.01         |
+|    value_loss           | 0.114        |
+------------------------------------------
+New best mean reward!
+------------------------------
+| time/              |       |
+|    fps             | 2351  |
+|    iterations      | 4     |
+|    time_elapsed    | 27    |
+|    total_timesteps | 65536 |
+------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2446        |
+|    iterations           | 5           |
+|    time_elapsed         | 33          |
+|    total_timesteps      | 81920       |
+| train/                  |             |
+|    approx_kl            | 0.005522004 |
+|    clip_fraction        | 0.0604      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.737       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0301     |
+|    n_updates            | 40          |
+|    policy_gradient_loss | -0.00434    |
+|    std                  | 1.01        |
+|    value_loss           | 0.0164      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2617         |
+|    iterations           | 6            |
+|    time_elapsed         | 37           |
+|    total_timesteps      | 98304        |
+| train/                  |              |
+|    approx_kl            | 0.0052388343 |
+|    clip_fraction        | 0.0463       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.626        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0294      |
+|    n_updates            | 50           |
+|    policy_gradient_loss | -0.00297     |
+|    std                  | 1.01         |
+|    value_loss           | 0.0597       |
+------------------------------------------
+/home/jalf/miniconda3/envs/tir/lib/python3.12/site-packages/stable_baselines3/common/evaluation.py:71: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.
+  warnings.warn(
+Eval num_timesteps=100000, episode_reward=-22.76 +/- 46.60
+Episode length: 1900.95 +/- 430.60
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.9e+03     |
+|    mean_reward          | -22.8       |
+| time/                   |             |
+|    total_timesteps      | 100000      |
+| train/                  |             |
+|    approx_kl            | 0.005612197 |
+|    clip_fraction        | 0.0475      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.86       |
+|    explained_variance   | 0.747       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0261     |
+|    n_updates            | 60          |
+|    policy_gradient_loss | -0.00393    |
+|    std                  | 1.01        |
+|    value_loss           | 0.0517      |
+-----------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 2178   |
+|    iterations      | 7      |
+|    time_elapsed    | 52     |
+|    total_timesteps | 114688 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2294         |
+|    iterations           | 8            |
+|    time_elapsed         | 57           |
+|    total_timesteps      | 131072       |
+| train/                  |              |
+|    approx_kl            | 0.0057119504 |
+|    clip_fraction        | 0.0541       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.85        |
+|    explained_variance   | 0.896        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0144      |
+|    n_updates            | 70           |
+|    policy_gradient_loss | -0.00364     |
+|    std                  | 1            |
+|    value_loss           | 0.0738       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2393        |
+|    iterations           | 9           |
+|    time_elapsed         | 61          |
+|    total_timesteps      | 147456      |
+| train/                  |             |
+|    approx_kl            | 0.005940904 |
+|    clip_fraction        | 0.0565      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.89        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0283     |
+|    n_updates            | 80          |
+|    policy_gradient_loss | -0.00245    |
+|    std                  | 1.01        |
+|    value_loss           | 0.0761      |
+-----------------------------------------
+Eval num_timesteps=150000, episode_reward=-29.37 +/- 28.32
+Episode length: 1997.50 +/- 10.90
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -29.4       |
+| time/                   |             |
+|    total_timesteps      | 150000      |
+| train/                  |             |
+|    approx_kl            | 0.004531667 |
+|    clip_fraction        | 0.0392      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.958       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0343     |
+|    n_updates            | 90          |
+|    policy_gradient_loss | -0.00379    |
+|    std                  | 1.01        |
+|    value_loss           | 0.00995     |
+-----------------------------------------
+
+[Diag @ 150,000 | n_sheep=1 | success=0%]
+  COMPACT_CANT_DRIVE         17/20
+  DROVE_NO_SHEEP             3/20
+  action_mag mean=0.089 p10=0.003 p90=0.274 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=4.40m best=2.07m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=11.66m best=1.50m
+  reward/step (mean): progress=+0.0004  alignment=+0.0000  pen_bonus=+0.0000  step_cost=-0.0200  complete=+0.0000
+-------------------------------
+| time/              |        |
+|    fps             | 1950   |
+|    iterations      | 10     |
+|    time_elapsed    | 84     |
+|    total_timesteps | 163840 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2020         |
+|    iterations           | 11           |
+|    time_elapsed         | 89           |
+|    total_timesteps      | 180224       |
+| train/                  |              |
+|    approx_kl            | 0.0061831754 |
+|    clip_fraction        | 0.068        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.975        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0349      |
+|    n_updates            | 100          |
+|    policy_gradient_loss | -0.00607     |
+|    std                  | 1.02         |
+|    value_loss           | 0.0156       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2084        |
+|    iterations           | 12          |
+|    time_elapsed         | 94          |
+|    total_timesteps      | 196608      |
+| train/                  |             |
+|    approx_kl            | 0.009407628 |
+|    clip_fraction        | 0.123       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.87       |
+|    explained_variance   | 0.899       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0305     |
+|    n_updates            | 110         |
+|    policy_gradient_loss | -0.00932    |
+|    std                  | 1.02        |
+|    value_loss           | 0.0223      |
+-----------------------------------------
+Eval num_timesteps=200000, episode_reward=-12.36 +/- 51.37
+Episode length: 1880.20 +/- 355.04
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.88e+03    |
+|    mean_reward          | -12.4       |
+| time/                   |             |
+|    total_timesteps      | 200000      |
+| train/                  |             |
+|    approx_kl            | 0.008270489 |
+|    clip_fraction        | 0.0945      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.85       |
+|    explained_variance   | 0.945       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0339     |
+|    n_updates            | 120         |
+|    policy_gradient_loss | -0.00809    |
+|    std                  | 1           |
+|    value_loss           | 0.0162      |
+-----------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 1936   |
+|    iterations      | 13     |
+|    time_elapsed    | 109    |
+|    total_timesteps | 212992 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1989        |
+|    iterations           | 14          |
+|    time_elapsed         | 115         |
+|    total_timesteps      | 229376      |
+| train/                  |             |
+|    approx_kl            | 0.008541125 |
+|    clip_fraction        | 0.112       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.83       |
+|    explained_variance   | 0.944       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0184     |
+|    n_updates            | 130         |
+|    policy_gradient_loss | -0.00846    |
+|    std                  | 0.994       |
+|    value_loss           | 0.0284      |
+-----------------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 2037       |
+|    iterations           | 15         |
+|    time_elapsed         | 120        |
+|    total_timesteps      | 245760     |
+| train/                  |            |
+|    approx_kl            | 0.00763176 |
+|    clip_fraction        | 0.0894     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.81      |
+|    explained_variance   | 0.9        |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0128    |
+|    n_updates            | 140        |
+|    policy_gradient_loss | -0.00655   |
+|    std                  | 0.987      |
+|    value_loss           | 0.071      |
+----------------------------------------
+Eval num_timesteps=250000, episode_reward=45.82 +/- 68.33
+Episode length: 1391.70 +/- 757.58
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.39e+03    |
+|    mean_reward          | 45.8        |
+| time/                   |             |
+|    total_timesteps      | 250000      |
+| train/                  |             |
+|    approx_kl            | 0.009210973 |
+|    clip_fraction        | 0.11        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.81       |
+|    explained_variance   | 0.95        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0401     |
+|    n_updates            | 150         |
+|    policy_gradient_loss | -0.0082     |
+|    std                  | 0.986       |
+|    value_loss           | 0.0202      |
+-----------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 1958   |
+|    iterations      | 16     |
+|    time_elapsed    | 133    |
+|    total_timesteps | 262144 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2005        |
+|    iterations           | 17          |
+|    time_elapsed         | 138         |
+|    total_timesteps      | 278528      |
+| train/                  |             |
+|    approx_kl            | 0.008197077 |
+|    clip_fraction        | 0.096       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.79       |
+|    explained_variance   | 0.949       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0375     |
+|    n_updates            | 160         |
+|    policy_gradient_loss | -0.00834    |
+|    std                  | 0.976       |
+|    value_loss           | 0.0207      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2061        |
+|    iterations           | 18          |
+|    time_elapsed         | 143         |
+|    total_timesteps      | 294912      |
+| train/                  |             |
+|    approx_kl            | 0.006078005 |
+|    clip_fraction        | 0.0598      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.78       |
+|    explained_variance   | 0.965       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0188     |
+|    n_updates            | 170         |
+|    policy_gradient_loss | -0.00464    |
+|    std                  | 0.969       |
+|    value_loss           | 0.0178      |
+-----------------------------------------
+Eval num_timesteps=300000, episode_reward=56.19 +/- 63.26
+Episode length: 1246.75 +/- 843.82
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.25e+03     |
+|    mean_reward          | 56.2         |
+| time/                   |              |
+|    total_timesteps      | 300000       |
+| train/                  |              |
+|    approx_kl            | 0.0056289425 |
+|    clip_fraction        | 0.0523       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.76        |
+|    explained_variance   | 0.969        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0246      |
+|    n_updates            | 180          |
+|    policy_gradient_loss | -0.00378     |
+|    std                  | 0.961        |
+|    value_loss           | 0.0174       |
+------------------------------------------
+New best mean reward!
+
+[Diag @ 300,000 | n_sheep=1 | success=40%]
+  DROVE_NO_SHEEP             11/20
+  SUCCESS                    8/20
+  COMPACT_CANT_DRIVE         1/20
+  action_mag mean=0.076 p10=0.000 p90=0.193 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=2.83m best=0.24m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=2.99m best=1.50m
+  reward/step (mean): progress=+0.0236  alignment=+0.0012  pen_bonus=+0.0029  step_cost=-0.0200  complete=+0.0291
+-------------------------------
+| time/              |        |
+|    fps             | 1939   |
+|    iterations      | 19     |
+|    time_elapsed    | 160    |
+|    total_timesteps | 311296 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1983        |
+|    iterations           | 20          |
+|    time_elapsed         | 165         |
+|    total_timesteps      | 327680      |
+| train/                  |             |
+|    approx_kl            | 0.005042998 |
+|    clip_fraction        | 0.05        |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.73       |
+|    explained_variance   | 0.941       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0242     |
+|    n_updates            | 190         |
+|    policy_gradient_loss | -0.00399    |
+|    std                  | 0.947       |
+|    value_loss           | 0.00505     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2018         |
+|    iterations           | 21           |
+|    time_elapsed         | 170          |
+|    total_timesteps      | 344064       |
+| train/                  |              |
+|    approx_kl            | 0.0054986854 |
+|    clip_fraction        | 0.0569       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.72        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0248      |
+|    n_updates            | 200          |
+|    policy_gradient_loss | -0.00415     |
+|    std                  | 0.941        |
+|    value_loss           | 0.00784      |
+------------------------------------------
+Eval num_timesteps=350000, episode_reward=25.08 +/- 61.55
+Episode length: 1562.00 +/- 761.23
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.56e+03     |
+|    mean_reward          | 25.1         |
+| time/                   |              |
+|    total_timesteps      | 350000       |
+| train/                  |              |
+|    approx_kl            | 0.0046333643 |
+|    clip_fraction        | 0.0476       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.71        |
+|    explained_variance   | 0.934        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0244      |
+|    n_updates            | 210          |
+|    policy_gradient_loss | -0.00237     |
+|    std                  | 0.934        |
+|    value_loss           | 0.00827      |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1950   |
+|    iterations      | 22     |
+|    time_elapsed    | 184    |
+|    total_timesteps | 360448 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1990        |
+|    iterations           | 23          |
+|    time_elapsed         | 189         |
+|    total_timesteps      | 376832      |
+| train/                  |             |
+|    approx_kl            | 0.006686668 |
+|    clip_fraction        | 0.0757      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.7        |
+|    explained_variance   | 0.963       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0423     |
+|    n_updates            | 220         |
+|    policy_gradient_loss | -0.00244    |
+|    std                  | 0.936       |
+|    value_loss           | 0.00575     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2027        |
+|    iterations           | 24          |
+|    time_elapsed         | 193         |
+|    total_timesteps      | 393216      |
+| train/                  |             |
+|    approx_kl            | 0.009116547 |
+|    clip_fraction        | 0.103       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.71       |
+|    explained_variance   | 0.97        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0353     |
+|    n_updates            | 230         |
+|    policy_gradient_loss | -0.0042     |
+|    std                  | 0.941       |
+|    value_loss           | 0.006       |
+-----------------------------------------
+Eval num_timesteps=400000, episode_reward=56.91 +/- 71.91
+Episode length: 1225.25 +/- 861.21
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.23e+03     |
+|    mean_reward          | 56.9         |
+| time/                   |              |
+|    total_timesteps      | 400000       |
+| train/                  |              |
+|    approx_kl            | 0.0061917743 |
+|    clip_fraction        | 0.0658       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.72        |
+|    explained_variance   | 0.975        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0378      |
+|    n_updates            | 240          |
+|    policy_gradient_loss | -0.00282     |
+|    std                  | 0.943        |
+|    value_loss           | 0.00633      |
+------------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 1981   |
+|    iterations      | 25     |
+|    time_elapsed    | 206    |
+|    total_timesteps | 409600 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2011        |
+|    iterations           | 26          |
+|    time_elapsed         | 211         |
+|    total_timesteps      | 425984      |
+| train/                  |             |
+|    approx_kl            | 0.007945089 |
+|    clip_fraction        | 0.1         |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.73       |
+|    explained_variance   | 0.978       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0343     |
+|    n_updates            | 250         |
+|    policy_gradient_loss | -0.00475    |
+|    std                  | 0.95        |
+|    value_loss           | 0.00708     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2044        |
+|    iterations           | 27          |
+|    time_elapsed         | 216         |
+|    total_timesteps      | 442368      |
+| train/                  |             |
+|    approx_kl            | 0.013059773 |
+|    clip_fraction        | 0.152       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.76       |
+|    explained_variance   | 0.984       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0421     |
+|    n_updates            | 260         |
+|    policy_gradient_loss | -0.00542    |
+|    std                  | 0.967       |
+|    value_loss           | 0.00331     |
+-----------------------------------------
+Eval num_timesteps=450000, episode_reward=58.80 +/- 74.46
+Episode length: 1123.15 +/- 881.85
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.12e+03     |
+|    mean_reward          | 58.8         |
+| time/                   |              |
+|    total_timesteps      | 450000       |
+| train/                  |              |
+|    approx_kl            | 0.0085322345 |
+|    clip_fraction        | 0.0967       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.77        |
+|    explained_variance   | 0.98         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0264      |
+|    n_updates            | 270          |
+|    policy_gradient_loss | -0.00612     |
+|    std                  | 0.963        |
+|    value_loss           | 0.00919      |
+------------------------------------------
+New best mean reward!
+
+[Diag @ 450,000 | n_sheep=1 | success=65%]
+  SUCCESS                    13/20
+  DROVE_NO_SHEEP             4/20
+  COMPACT_CANT_DRIVE         3/20
+  action_mag mean=0.105 p10=0.000 p90=0.272 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=1.67m best=0.43m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=3.26m best=2.29m
+  reward/step (mean): progress=+0.0326  alignment=+0.0024  pen_bonus=+0.0076  step_cost=-0.0200  complete=+0.0762
+-------------------------------
+| time/              |        |
+|    fps             | 1974   |
+|    iterations      | 28     |
+|    time_elapsed    | 232    |
+|    total_timesteps | 458752 |
+-------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 2005       |
+|    iterations           | 29         |
+|    time_elapsed         | 236        |
+|    total_timesteps      | 475136     |
+| train/                  |            |
+|    approx_kl            | 0.01203198 |
+|    clip_fraction        | 0.146      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.79      |
+|    explained_variance   | 0.963      |
+|    learning_rate        | 0.0003     |
+|    loss                 | 0.00738    |
+|    n_updates            | 280        |
+|    policy_gradient_loss | -0.0128    |
+|    std                  | 0.982      |
+|    value_loss           | 0.0749     |
+----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2037         |
+|    iterations           | 30           |
+|    time_elapsed         | 241          |
+|    total_timesteps      | 491520       |
+| train/                  |              |
+|    approx_kl            | 0.0078244675 |
+|    clip_fraction        | 0.0856       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.8         |
+|    explained_variance   | 0.937        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.0631       |
+|    n_updates            | 290          |
+|    policy_gradient_loss | -0.00651     |
+|    std                  | 0.977        |
+|    value_loss           | 0.131        |
+------------------------------------------
+Eval num_timesteps=500000, episode_reward=135.29 +/- 9.81
+Episode length: 287.30 +/- 88.71
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 287        |
+|    mean_reward          | 135        |
+| time/                   |            |
+|    total_timesteps      | 500000     |
+| train/                  |            |
+|    approx_kl            | 0.00837522 |
+|    clip_fraction        | 0.0866     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.77      |
+|    explained_variance   | 0.948      |
+|    learning_rate        | 0.0003     |
+|    loss                 | 0.041      |
+|    n_updates            | 300        |
+|    policy_gradient_loss | -0.00532   |
+|    std                  | 0.962      |
+|    value_loss           | 0.0898     |
+----------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 2048   |
+|    iterations      | 31     |
+|    time_elapsed    | 247    |
+|    total_timesteps | 507904 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2070         |
+|    iterations           | 32           |
+|    time_elapsed         | 253          |
+|    total_timesteps      | 524288       |
+| train/                  |              |
+|    approx_kl            | 0.0067581255 |
+|    clip_fraction        | 0.0543       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.75        |
+|    explained_variance   | 0.932        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.0518       |
+|    n_updates            | 310          |
+|    policy_gradient_loss | -0.00297     |
+|    std                  | 0.954        |
+|    value_loss           | 0.111        |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2090         |
+|    iterations           | 33           |
+|    time_elapsed         | 258          |
+|    total_timesteps      | 540672       |
+| train/                  |              |
+|    approx_kl            | 0.0066835573 |
+|    clip_fraction        | 0.0597       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.74        |
+|    explained_variance   | 0.934        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.00545      |
+|    n_updates            | 320          |
+|    policy_gradient_loss | -0.00508     |
+|    std                  | 0.949        |
+|    value_loss           | 0.101        |
+------------------------------------------
+Eval num_timesteps=550000, episode_reward=136.08 +/- 11.93
+Episode length: 285.80 +/- 123.59
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 286          |
+|    mean_reward          | 136          |
+| time/                   |              |
+|    total_timesteps      | 550000       |
+| train/                  |              |
+|    approx_kl            | 0.0062076193 |
+|    clip_fraction        | 0.0672       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.71        |
+|    explained_variance   | 0.942        |
+|    learning_rate        | 0.0003       |
+|    loss                 | 0.0229       |
+|    n_updates            | 330          |
+|    policy_gradient_loss | -0.00616     |
+|    std                  | 0.933        |
+|    value_loss           | 0.0813       |
+------------------------------------------
+New best mean reward!
+-------------------------------
+| time/              |        |
+|    fps             | 2104   |
+|    iterations      | 34     |
+|    time_elapsed    | 264    |
+|    total_timesteps | 557056 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2130         |
+|    iterations           | 35           |
+|    time_elapsed         | 269          |
+|    total_timesteps      | 573440       |
+| train/                  |              |
+|    approx_kl            | 0.0064913128 |
+|    clip_fraction        | 0.0631       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.67        |
+|    explained_variance   | 0.971        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0199      |
+|    n_updates            | 340          |
+|    policy_gradient_loss | -0.00631     |
+|    std                  | 0.917        |
+|    value_loss           | 0.0185       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2155         |
+|    iterations           | 36           |
+|    time_elapsed         | 273          |
+|    total_timesteps      | 589824       |
+| train/                  |              |
+|    approx_kl            | 0.0067110434 |
+|    clip_fraction        | 0.0719       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.63        |
+|    explained_variance   | 0.98         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0343      |
+|    n_updates            | 350          |
+|    policy_gradient_loss | -0.0069      |
+|    std                  | 0.897        |
+|    value_loss           | 0.0113       |
+------------------------------------------
+Eval num_timesteps=600000, episode_reward=135.45 +/- 12.96
+Episode length: 273.05 +/- 118.26
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 273          |
+|    mean_reward          | 135          |
+| time/                   |              |
+|    total_timesteps      | 600000       |
+| train/                  |              |
+|    approx_kl            | 0.0054842415 |
+|    clip_fraction        | 0.0564       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.59        |
+|    explained_variance   | 0.983        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.033       |
+|    n_updates            | 360          |
+|    policy_gradient_loss | -0.0042      |
+|    std                  | 0.883        |
+|    value_loss           | 0.00479      |
+------------------------------------------
+
+[Diag @ 600,000 | n_sheep=1 | success=100%]
+  SUCCESS                    20/20
+  action_mag mean=0.343 p10=0.232 p90=0.548 (0=stopped, 1=full speed)
+  min_flock_radius mean=0.00m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=1.53m best=0.76m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=3.49m best=2.84m
+  reward/step (mean): progress=+0.1066  alignment=+0.0088  pen_bonus=+0.0357  step_cost=-0.0200  complete=+0.3567
+
+[Curriculum] leaving stage n_sheep=1 after 600,000 steps | training success rate (last 100 eps) = 100%
+[Curriculum] → 2 sheep at step 600,000
+
+-------------------------------
+| time/              |        |
+|    fps             | 2156   |
+|    iterations      | 37     |
+|    time_elapsed    | 281    |
+|    total_timesteps | 606208 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2173        |
+|    iterations           | 38          |
+|    time_elapsed         | 286         |
+|    total_timesteps      | 622592      |
+| train/                  |             |
+|    approx_kl            | 0.011170821 |
+|    clip_fraction        | 0.117       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.59       |
+|    explained_variance   | 0.924       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0137     |
+|    n_updates            | 370         |
+|    policy_gradient_loss | 0.00714     |
+|    std                  | 0.886       |
+|    value_loss           | 0.0417      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2192        |
+|    iterations           | 39          |
+|    time_elapsed         | 291         |
+|    total_timesteps      | 638976      |
+| train/                  |             |
+|    approx_kl            | 0.012632904 |
+|    clip_fraction        | 0.156       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.6        |
+|    explained_variance   | 0.858       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.00445    |
+|    n_updates            | 380         |
+|    policy_gradient_loss | 0.00112     |
+|    std                  | 0.892       |
+|    value_loss           | 0.0156      |
+-----------------------------------------
+Eval num_timesteps=650000, episode_reward=-38.36 +/- 29.94
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -38.4       |
+| time/                   |             |
+|    total_timesteps      | 650000      |
+| train/                  |             |
+|    approx_kl            | 0.012015635 |
+|    clip_fraction        | 0.133       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.62       |
+|    explained_variance   | 0.946       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0168     |
+|    n_updates            | 390         |
+|    policy_gradient_loss | -0.000726   |
+|    std                  | 0.904       |
+|    value_loss           | 0.0126      |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2131   |
+|    iterations      | 40     |
+|    time_elapsed    | 307    |
+|    total_timesteps | 655360 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2145        |
+|    iterations           | 41          |
+|    time_elapsed         | 313         |
+|    total_timesteps      | 671744      |
+| train/                  |             |
+|    approx_kl            | 0.009391339 |
+|    clip_fraction        | 0.121       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.63       |
+|    explained_variance   | 0.955       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0164     |
+|    n_updates            | 400         |
+|    policy_gradient_loss | -0.00177    |
+|    std                  | 0.905       |
+|    value_loss           | 0.00536     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2156         |
+|    iterations           | 42           |
+|    time_elapsed         | 319          |
+|    total_timesteps      | 688128       |
+| train/                  |              |
+|    approx_kl            | 0.0077482145 |
+|    clip_fraction        | 0.0977       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.64        |
+|    explained_variance   | 0.895        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.023       |
+|    n_updates            | 410          |
+|    policy_gradient_loss | -0.00158     |
+|    std                  | 0.908        |
+|    value_loss           | 0.0068       |
+------------------------------------------
+Eval num_timesteps=700000, episode_reward=-16.26 +/- 48.54
+Episode length: 1934.20 +/- 286.82
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.93e+03    |
+|    mean_reward          | -16.3       |
+| time/                   |             |
+|    total_timesteps      | 700000      |
+| train/                  |             |
+|    approx_kl            | 0.007948186 |
+|    clip_fraction        | 0.0933      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.64       |
+|    explained_variance   | 0.934       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0205     |
+|    n_updates            | 420         |
+|    policy_gradient_loss | -0.00233    |
+|    std                  | 0.904       |
+|    value_loss           | 0.00556     |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2093   |
+|    iterations      | 43     |
+|    time_elapsed    | 336    |
+|    total_timesteps | 704512 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2109         |
+|    iterations           | 44           |
+|    time_elapsed         | 341          |
+|    total_timesteps      | 720896       |
+| train/                  |              |
+|    approx_kl            | 0.0077707805 |
+|    clip_fraction        | 0.101        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.64        |
+|    explained_variance   | 0.929        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.00469     |
+|    n_updates            | 430          |
+|    policy_gradient_loss | -0.00226     |
+|    std                  | 0.909        |
+|    value_loss           | 0.0031       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2129         |
+|    iterations           | 45           |
+|    time_elapsed         | 346          |
+|    total_timesteps      | 737280       |
+| train/                  |              |
+|    approx_kl            | 0.0063995067 |
+|    clip_fraction        | 0.0823       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.66        |
+|    explained_variance   | 0.951        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0249      |
+|    n_updates            | 440          |
+|    policy_gradient_loss | -0.00261     |
+|    std                  | 0.922        |
+|    value_loss           | 0.00343      |
+------------------------------------------
+Eval num_timesteps=750000, episode_reward=-12.10 +/- 56.78
+Episode length: 1850.50 +/- 449.09
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.85e+03     |
+|    mean_reward          | -12.1        |
+| time/                   |              |
+|    total_timesteps      | 750000       |
+| train/                  |              |
+|    approx_kl            | 0.0069549307 |
+|    clip_fraction        | 0.0847       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.68        |
+|    explained_variance   | 0.862        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0192      |
+|    n_updates            | 450          |
+|    policy_gradient_loss | -0.00165     |
+|    std                  | 0.929        |
+|    value_loss           | 0.0032       |
+------------------------------------------
+
+[Diag @ 750,000 | n_sheep=2 | success=5%]
+  COMPACT_CANT_DRIVE         9/20
+  NEVER_COMPACT              9/20
+  PARTIAL_1of2               1/20
+  SUCCESS                    1/20
+  action_mag mean=0.261 p10=0.002 p90=0.983 (0=stopped, 1=full speed)
+  min_flock_radius mean=3.93m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=0.79m best=0.07m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.43m best=1.62m
+  reward/step (mean): progress=-0.0058  alignment=+0.0087  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0025
+-------------------------------
+| time/              |        |
+|    fps             | 2043   |
+|    iterations      | 46     |
+|    time_elapsed    | 368    |
+|    total_timesteps | 753664 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2062        |
+|    iterations           | 47          |
+|    time_elapsed         | 373         |
+|    total_timesteps      | 770048      |
+| train/                  |             |
+|    approx_kl            | 0.008165602 |
+|    clip_fraction        | 0.0997      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.69       |
+|    explained_variance   | 0.931       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0461     |
+|    n_updates            | 460         |
+|    policy_gradient_loss | -0.00412    |
+|    std                  | 0.932       |
+|    value_loss           | 0.00308     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2074        |
+|    iterations           | 48          |
+|    time_elapsed         | 379         |
+|    total_timesteps      | 786432      |
+| train/                  |             |
+|    approx_kl            | 0.006088208 |
+|    clip_fraction        | 0.0805      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.71       |
+|    explained_variance   | 0.917       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.034      |
+|    n_updates            | 470         |
+|    policy_gradient_loss | -0.000257   |
+|    std                  | 0.943       |
+|    value_loss           | 0.00533     |
+-----------------------------------------
+Eval num_timesteps=800000, episode_reward=-32.78 +/- 23.33
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -32.8        |
+| time/                   |              |
+|    total_timesteps      | 800000       |
+| train/                  |              |
+|    approx_kl            | 0.0069386996 |
+|    clip_fraction        | 0.0883       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.73        |
+|    explained_variance   | 0.954        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0361      |
+|    n_updates            | 480          |
+|    policy_gradient_loss | -0.00228     |
+|    std                  | 0.948        |
+|    value_loss           | 0.00495      |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2028   |
+|    iterations      | 49     |
+|    time_elapsed    | 395    |
+|    total_timesteps | 802816 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2045         |
+|    iterations           | 50           |
+|    time_elapsed         | 400          |
+|    total_timesteps      | 819200       |
+| train/                  |              |
+|    approx_kl            | 0.0070893797 |
+|    clip_fraction        | 0.0687       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.74        |
+|    explained_variance   | 0.955        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.035       |
+|    n_updates            | 490          |
+|    policy_gradient_loss | -0.00221     |
+|    std                  | 0.954        |
+|    value_loss           | 0.00229      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 2060         |
+|    iterations           | 51           |
+|    time_elapsed         | 405          |
+|    total_timesteps      | 835584       |
+| train/                  |              |
+|    approx_kl            | 0.0068652867 |
+|    clip_fraction        | 0.0787       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.75        |
+|    explained_variance   | 0.863        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0337      |
+|    n_updates            | 500          |
+|    policy_gradient_loss | -0.00277     |
+|    std                  | 0.959        |
+|    value_loss           | 0.00229      |
+------------------------------------------
+Eval num_timesteps=850000, episode_reward=-14.34 +/- 48.77
+Episode length: 1998.40 +/- 6.97
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -14.3       |
+| time/                   |             |
+|    total_timesteps      | 850000      |
+| train/                  |             |
+|    approx_kl            | 0.007872021 |
+|    clip_fraction        | 0.0815      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.76       |
+|    explained_variance   | 0.852       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0358     |
+|    n_updates            | 510         |
+|    policy_gradient_loss | -0.00365    |
+|    std                  | 0.966       |
+|    value_loss           | 0.00272     |
+-----------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 2018   |
+|    iterations      | 52     |
+|    time_elapsed    | 422    |
+|    total_timesteps | 851968 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2032        |
+|    iterations           | 53          |
+|    time_elapsed         | 427         |
+|    total_timesteps      | 868352      |
+| train/                  |             |
+|    approx_kl            | 0.007002457 |
+|    clip_fraction        | 0.0752      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.78       |
+|    explained_variance   | 0.879       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0414     |
+|    n_updates            | 520         |
+|    policy_gradient_loss | -0.00242    |
+|    std                  | 0.977       |
+|    value_loss           | 0.00166     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2047        |
+|    iterations           | 54          |
+|    time_elapsed         | 432         |
+|    total_timesteps      | 884736      |
+| train/                  |             |
+|    approx_kl            | 0.007822147 |
+|    clip_fraction        | 0.0813      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.8        |
+|    explained_variance   | 0.871       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0376     |
+|    n_updates            | 530         |
+|    policy_gradient_loss | -0.00362    |
+|    std                  | 0.984       |
+|    value_loss           | 0.00212     |
+-----------------------------------------
+Eval num_timesteps=900000, episode_reward=-20.41 +/- 60.01
+Episode length: 1929.40 +/- 284.99
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 1.93e+03   |
+|    mean_reward          | -20.4      |
+| time/                   |            |
+|    total_timesteps      | 900000     |
+| train/                  |            |
+|    approx_kl            | 0.00738756 |
+|    clip_fraction        | 0.0793     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.81      |
+|    explained_variance   | 0.808      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0355    |
+|    n_updates            | 540        |
+|    policy_gradient_loss | -0.00195   |
+|    std                  | 0.988      |
+|    value_loss           | 0.00721    |
+----------------------------------------
+
+[Diag @ 900,000 | n_sheep=2 | success=5%]
+  COMPACT_CANT_DRIVE         11/20
+  NEVER_COMPACT              8/20
+  SUCCESS                    1/20
+  action_mag mean=0.203 p10=0.007 p90=0.704 (0=stopped, 1=full speed)
+  min_flock_radius mean=3.40m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=0.60m best=0.11m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.01m best=3.61m
+  reward/step (mean): progress=-0.0040  alignment=+0.0071  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0026
+-------------------------------
+| time/              |        |
+|    fps             | 1977   |
+|    iterations      | 55     |
+|    time_elapsed    | 455    |
+|    total_timesteps | 901120 |
+-------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1990        |
+|    iterations           | 56          |
+|    time_elapsed         | 460         |
+|    total_timesteps      | 917504      |
+| train/                  |             |
+|    approx_kl            | 0.007000256 |
+|    clip_fraction        | 0.0831      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.8        |
+|    explained_variance   | 0.889       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0285     |
+|    n_updates            | 550         |
+|    policy_gradient_loss | -0.00402    |
+|    std                  | 0.984       |
+|    value_loss           | 0.00171     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2005        |
+|    iterations           | 57          |
+|    time_elapsed         | 465         |
+|    total_timesteps      | 933888      |
+| train/                  |             |
+|    approx_kl            | 0.007749311 |
+|    clip_fraction        | 0.0755      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.83       |
+|    explained_variance   | 0.599       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.032      |
+|    n_updates            | 560         |
+|    policy_gradient_loss | -0.00239    |
+|    std                  | 1.01        |
+|    value_loss           | 0.00351     |
+-----------------------------------------
+Eval num_timesteps=950000, episode_reward=-13.16 +/- 44.70
+Episode length: 1949.30 +/- 221.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.95e+03     |
+|    mean_reward          | -13.2        |
+| time/                   |              |
+|    total_timesteps      | 950000       |
+| train/                  |              |
+|    approx_kl            | 0.0075328955 |
+|    clip_fraction        | 0.0829       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.85        |
+|    explained_variance   | 0.783        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0306      |
+|    n_updates            | 570          |
+|    policy_gradient_loss | -0.00352     |
+|    std                  | 1.01         |
+|    value_loss           | 0.00319      |
+------------------------------------------
+-------------------------------
+| time/              |        |
+|    fps             | 1971   |
+|    iterations      | 58     |
+|    time_elapsed    | 482    |
+|    total_timesteps | 950272 |
+-------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1981         |
+|    iterations           | 59           |
+|    time_elapsed         | 487          |
+|    total_timesteps      | 966656       |
+| train/                  |              |
+|    approx_kl            | 0.0072506005 |
+|    clip_fraction        | 0.0835       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.86        |
+|    explained_variance   | 0.929        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0291      |
+|    n_updates            | 580          |
+|    policy_gradient_loss | -0.00173     |
+|    std                  | 1.01         |
+|    value_loss           | 0.00491      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1991         |
+|    iterations           | 60           |
+|    time_elapsed         | 493          |
+|    total_timesteps      | 983040       |
+| train/                  |              |
+|    approx_kl            | 0.0068104668 |
+|    clip_fraction        | 0.0799       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.87        |
+|    explained_variance   | 0.813        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0282      |
+|    n_updates            | 590          |
+|    policy_gradient_loss | -0.00162     |
+|    std                  | 1.02         |
+|    value_loss           | 0.00477      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 2005        |
+|    iterations           | 61          |
+|    time_elapsed         | 498         |
+|    total_timesteps      | 999424      |
+| train/                  |             |
+|    approx_kl            | 0.007103944 |
+|    clip_fraction        | 0.0774      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.88       |
+|    explained_variance   | 0.942       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0322     |
+|    n_updates            | 600         |
+|    policy_gradient_loss | -0.00143    |
+|    std                  | 1.03        |
+|    value_loss           | 0.0033      |
+-----------------------------------------
+Eval num_timesteps=1000000, episode_reward=-25.58 +/- 49.00
+Episode length: 1999.50 +/- 2.18
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -25.6        |
+| time/                   |              |
+|    total_timesteps      | 1000000      |
+| train/                  |              |
+|    approx_kl            | 0.0075788023 |
+|    clip_fraction        | 0.088        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.9         |
+|    explained_variance   | 0.864        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0352      |
+|    n_updates            | 610          |
+|    policy_gradient_loss | -0.003       |
+|    std                  | 1.04         |
+|    value_loss           | 0.00192      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1971    |
+|    iterations      | 62      |
+|    time_elapsed    | 515     |
+|    total_timesteps | 1015808 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1983        |
+|    iterations           | 63          |
+|    time_elapsed         | 520         |
+|    total_timesteps      | 1032192     |
+| train/                  |             |
+|    approx_kl            | 0.009131588 |
+|    clip_fraction        | 0.0902      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.89       |
+|    explained_variance   | 0.941       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0476     |
+|    n_updates            | 620         |
+|    policy_gradient_loss | -0.00341    |
+|    std                  | 1.03        |
+|    value_loss           | 0.00705     |
+-----------------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1995       |
+|    iterations           | 64         |
+|    time_elapsed         | 525        |
+|    total_timesteps      | 1048576    |
+| train/                  |            |
+|    approx_kl            | 0.00746674 |
+|    clip_fraction        | 0.0838     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.89      |
+|    explained_variance   | 0.958      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.022     |
+|    n_updates            | 630        |
+|    policy_gradient_loss | -0.00392   |
+|    std                  | 1.03       |
+|    value_loss           | 0.00592    |
+----------------------------------------
+Eval num_timesteps=1050000, episode_reward=-12.04 +/- 64.56
+Episode length: 1889.90 +/- 333.38
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 1.89e+03     |
+|    mean_reward          | -12          |
+| time/                   |              |
+|    total_timesteps      | 1050000      |
+| train/                  |              |
+|    approx_kl            | 0.0058071706 |
+|    clip_fraction        | 0.0721       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.9         |
+|    explained_variance   | 0.932        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0188      |
+|    n_updates            | 640          |
+|    policy_gradient_loss | -0.00235     |
+|    std                  | 1.03         |
+|    value_loss           | 0.00513      |
+------------------------------------------
+
+[Diag @ 1,050,000 | n_sheep=2 | success=5%]
+  COMPACT_CANT_DRIVE         10/20
+  NEVER_COMPACT              9/20
+  SUCCESS                    1/20
+  action_mag mean=0.190 p10=0.001 p90=0.686 (0=stopped, 1=full speed)
+  min_flock_radius mean=4.60m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=0.54m best=0.21m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.05m best=3.62m
+  reward/step (mean): progress=-0.0023  alignment=+0.0072  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0025
+--------------------------------
+| time/              |         |
+|    fps             | 1931    |
+|    iterations      | 65      |
+|    time_elapsed    | 551     |
+|    total_timesteps | 1064960 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1944        |
+|    iterations           | 66          |
+|    time_elapsed         | 556         |
+|    total_timesteps      | 1081344     |
+| train/                  |             |
+|    approx_kl            | 0.006802067 |
+|    clip_fraction        | 0.0701      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.92       |
+|    explained_variance   | 0.937       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0304     |
+|    n_updates            | 650         |
+|    policy_gradient_loss | -0.0019     |
+|    std                  | 1.04        |
+|    value_loss           | 0.00206     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1956        |
+|    iterations           | 67          |
+|    time_elapsed         | 561         |
+|    total_timesteps      | 1097728     |
+| train/                  |             |
+|    approx_kl            | 0.007102525 |
+|    clip_fraction        | 0.074       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.92       |
+|    explained_variance   | 0.953       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.00869    |
+|    n_updates            | 660         |
+|    policy_gradient_loss | -0.00208    |
+|    std                  | 1.04        |
+|    value_loss           | 0.00579     |
+-----------------------------------------
+Eval num_timesteps=1100000, episode_reward=-29.51 +/- 23.80
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -29.5       |
+| time/                   |             |
+|    total_timesteps      | 1100000     |
+| train/                  |             |
+|    approx_kl            | 0.006372301 |
+|    clip_fraction        | 0.0669      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.94       |
+|    explained_variance   | 0.829       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0349     |
+|    n_updates            | 670         |
+|    policy_gradient_loss | -0.00135    |
+|    std                  | 1.06        |
+|    value_loss           | 0.00208     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1932    |
+|    iterations      | 68      |
+|    time_elapsed    | 576     |
+|    total_timesteps | 1114112 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1942        |
+|    iterations           | 69          |
+|    time_elapsed         | 581         |
+|    total_timesteps      | 1130496     |
+| train/                  |             |
+|    approx_kl            | 0.007083354 |
+|    clip_fraction        | 0.0839      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.95       |
+|    explained_variance   | 0.845       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0464     |
+|    n_updates            | 680         |
+|    policy_gradient_loss | -0.00298    |
+|    std                  | 1.06        |
+|    value_loss           | 0.00747     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1954        |
+|    iterations           | 70          |
+|    time_elapsed         | 586         |
+|    total_timesteps      | 1146880     |
+| train/                  |             |
+|    approx_kl            | 0.007034454 |
+|    clip_fraction        | 0.0875      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.96       |
+|    explained_variance   | 0.892       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0382     |
+|    n_updates            | 690         |
+|    policy_gradient_loss | -0.00359    |
+|    std                  | 1.06        |
+|    value_loss           | 0.00208     |
+-----------------------------------------
+Eval num_timesteps=1150000, episode_reward=-20.98 +/- 49.18
+Episode length: 1959.70 +/- 175.66
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 1.96e+03    |
+|    mean_reward          | -21         |
+| time/                   |             |
+|    total_timesteps      | 1150000     |
+| train/                  |             |
+|    approx_kl            | 0.006192833 |
+|    clip_fraction        | 0.0626      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.94       |
+|    explained_variance   | 0.951       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0224     |
+|    n_updates            | 700         |
+|    policy_gradient_loss | -0.00299    |
+|    std                  | 1.05        |
+|    value_loss           | 0.00883     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1926    |
+|    iterations      | 71      |
+|    time_elapsed    | 603     |
+|    total_timesteps | 1163264 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1937        |
+|    iterations           | 72          |
+|    time_elapsed         | 608         |
+|    total_timesteps      | 1179648     |
+| train/                  |             |
+|    approx_kl            | 0.008185772 |
+|    clip_fraction        | 0.0969      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.96       |
+|    explained_variance   | 0.944       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0278     |
+|    n_updates            | 710         |
+|    policy_gradient_loss | -0.00316    |
+|    std                  | 1.07        |
+|    value_loss           | 0.00421     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1947         |
+|    iterations           | 73           |
+|    time_elapsed         | 614          |
+|    total_timesteps      | 1196032      |
+| train/                  |              |
+|    approx_kl            | 0.0063469247 |
+|    clip_fraction        | 0.065        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -2.96        |
+|    explained_variance   | 0.912        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0239      |
+|    n_updates            | 720          |
+|    policy_gradient_loss | -0.00224     |
+|    std                  | 1.06         |
+|    value_loss           | 0.0054       |
+------------------------------------------
+Eval num_timesteps=1200000, episode_reward=-29.34 +/- 18.71
+Episode length: 2000.00 +/- 0.00
+----------------------------------------
+| eval/                   |            |
+|    mean_ep_length       | 2e+03      |
+|    mean_reward          | -29.3      |
+| time/                   |            |
+|    total_timesteps      | 1200000    |
+| train/                  |            |
+|    approx_kl            | 0.00778389 |
+|    clip_fraction        | 0.0734     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.95      |
+|    explained_variance   | 0.961      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0435    |
+|    n_updates            | 730        |
+|    policy_gradient_loss | -0.00184   |
+|    std                  | 1.06       |
+|    value_loss           | 0.0048     |
+----------------------------------------
+
+[Diag @ 1,200,000 | n_sheep=2 | success=10%]
+  NEVER_COMPACT              9/20
+  COMPACT_CANT_DRIVE         9/20
+  SUCCESS                    2/20
+  action_mag mean=0.198 p10=0.002 p90=0.744 (0=stopped, 1=full speed)
+  min_flock_radius mean=3.94m best=0.00m  (target <5m to compact)
+  min_dog_to_com   mean=0.50m best=0.14m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=11.36m best=3.58m
+  reward/step (mean): progress=-0.0002  alignment=+0.0073  pen_bonus=+0.0013  step_cost=-0.0200  complete=+0.0053
+
+[Curriculum] leaving stage n_sheep=2 after 600,000 steps | training success rate (last 100 eps) = 5%
+[Curriculum] → 3 sheep at step 1,200,000
+
+--------------------------------
+| time/              |         |
+|    fps             | 1898    |
+|    iterations      | 74      |
+|    time_elapsed    | 638     |
+|    total_timesteps | 1212416 |
+--------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1909       |
+|    iterations           | 75         |
+|    time_elapsed         | 643        |
+|    total_timesteps      | 1228800    |
+| train/                  |            |
+|    approx_kl            | 0.00918101 |
+|    clip_fraction        | 0.106      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -2.95      |
+|    explained_variance   | 0.919      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0112    |
+|    n_updates            | 740        |
+|    policy_gradient_loss | -0.00123   |
+|    std                  | 1.06       |
+|    value_loss           | 0.0427     |
+----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1917        |
+|    iterations           | 76          |
+|    time_elapsed         | 649         |
+|    total_timesteps      | 1245184     |
+| train/                  |             |
+|    approx_kl            | 0.010076641 |
+|    clip_fraction        | 0.137       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.94       |
+|    explained_variance   | 0.919       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0229     |
+|    n_updates            | 750         |
+|    policy_gradient_loss | -0.000617   |
+|    std                  | 1.05        |
+|    value_loss           | 0.0222      |
+-----------------------------------------
+Eval num_timesteps=1250000, episode_reward=-38.73 +/- 33.85
+Episode length: 2000.00 +/- 0.00
+---------------------------------------
+| eval/                   |           |
+|    mean_ep_length       | 2e+03     |
+|    mean_reward          | -38.7     |
+| time/                   |           |
+|    total_timesteps      | 1250000   |
+| train/                  |           |
+|    approx_kl            | 0.0084493 |
+|    clip_fraction        | 0.109     |
+|    clip_range           | 0.2       |
+|    entropy_loss         | -2.96     |
+|    explained_variance   | 0.96      |
+|    learning_rate        | 0.0003    |
+|    loss                 | -0.0259   |
+|    n_updates            | 760       |
+|    policy_gradient_loss | -0.00168  |
+|    std                  | 1.06      |
+|    value_loss           | 0.0024    |
+---------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1890    |
+|    iterations      | 77      |
+|    time_elapsed    | 667     |
+|    total_timesteps | 1261568 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1899        |
+|    iterations           | 78          |
+|    time_elapsed         | 672         |
+|    total_timesteps      | 1277952     |
+| train/                  |             |
+|    approx_kl            | 0.008724872 |
+|    clip_fraction        | 0.109       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.98       |
+|    explained_variance   | 0.931       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0293     |
+|    n_updates            | 770         |
+|    policy_gradient_loss | -0.00204    |
+|    std                  | 1.08        |
+|    value_loss           | 0.0067      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1906        |
+|    iterations           | 79          |
+|    time_elapsed         | 678         |
+|    total_timesteps      | 1294336     |
+| train/                  |             |
+|    approx_kl            | 0.008191848 |
+|    clip_fraction        | 0.096       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -2.99       |
+|    explained_variance   | 0.963       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0247     |
+|    n_updates            | 780         |
+|    policy_gradient_loss | -0.002      |
+|    std                  | 1.08        |
+|    value_loss           | 0.00632     |
+-----------------------------------------
+Eval num_timesteps=1300000, episode_reward=-26.68 +/- 27.12
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -26.7       |
+| time/                   |             |
+|    total_timesteps      | 1300000     |
+| train/                  |             |
+|    approx_kl            | 0.006018152 |
+|    clip_fraction        | 0.0869      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3          |
+|    explained_variance   | 0.96        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0311     |
+|    n_updates            | 790         |
+|    policy_gradient_loss | -0.00129    |
+|    std                  | 1.09        |
+|    value_loss           | 0.00189     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1881    |
+|    iterations      | 80      |
+|    time_elapsed    | 696     |
+|    total_timesteps | 1310720 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1892         |
+|    iterations           | 81           |
+|    time_elapsed         | 701          |
+|    total_timesteps      | 1327104      |
+| train/                  |              |
+|    approx_kl            | 0.0077671953 |
+|    clip_fraction        | 0.082        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.01        |
+|    explained_variance   | 0.972        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0308      |
+|    n_updates            | 800          |
+|    policy_gradient_loss | -0.00219     |
+|    std                  | 1.09         |
+|    value_loss           | 0.00177      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1902        |
+|    iterations           | 82          |
+|    time_elapsed         | 706         |
+|    total_timesteps      | 1343488     |
+| train/                  |             |
+|    approx_kl            | 0.008806022 |
+|    clip_fraction        | 0.0947      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.02       |
+|    explained_variance   | 0.962       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0426     |
+|    n_updates            | 810         |
+|    policy_gradient_loss | -0.00231    |
+|    std                  | 1.1         |
+|    value_loss           | 0.00235     |
+-----------------------------------------
+Eval num_timesteps=1350000, episode_reward=-24.30 +/- 32.03
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -24.3       |
+| time/                   |             |
+|    total_timesteps      | 1350000     |
+| train/                  |             |
+|    approx_kl            | 0.007263833 |
+|    clip_fraction        | 0.0797      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.03       |
+|    explained_variance   | 0.957       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0338     |
+|    n_updates            | 820         |
+|    policy_gradient_loss | -0.00251    |
+|    std                  | 1.11        |
+|    value_loss           | 0.00397     |
+-----------------------------------------
+
+[Diag @ 1,350,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              16/20
+  COMPACT_CANT_DRIVE         4/20
+  action_mag mean=0.058 p10=0.004 p90=0.054 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.77m best=1.04m  (target <5m to compact)
+  min_dog_to_com   mean=0.58m best=0.28m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.71m best=4.27m
+  reward/step (mean): progress=-0.0038  alignment=+0.0015  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1859    |
+|    iterations      | 83      |
+|    time_elapsed    | 731     |
+|    total_timesteps | 1359872 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1870        |
+|    iterations           | 84          |
+|    time_elapsed         | 735         |
+|    total_timesteps      | 1376256     |
+| train/                  |             |
+|    approx_kl            | 0.007816839 |
+|    clip_fraction        | 0.0812      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.05       |
+|    explained_variance   | 0.946       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0285     |
+|    n_updates            | 830         |
+|    policy_gradient_loss | -0.00277    |
+|    std                  | 1.11        |
+|    value_loss           | 0.0018      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1880         |
+|    iterations           | 85           |
+|    time_elapsed         | 740          |
+|    total_timesteps      | 1392640      |
+| train/                  |              |
+|    approx_kl            | 0.0064534983 |
+|    clip_fraction        | 0.0774       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.06        |
+|    explained_variance   | 0.958        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0305      |
+|    n_updates            | 840          |
+|    policy_gradient_loss | -0.00158     |
+|    std                  | 1.12         |
+|    value_loss           | 0.00988      |
+------------------------------------------
+Eval num_timesteps=1400000, episode_reward=-39.10 +/- 41.08
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -39.1        |
+| time/                   |              |
+|    total_timesteps      | 1400000      |
+| train/                  |              |
+|    approx_kl            | 0.0069560152 |
+|    clip_fraction        | 0.0835       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.07        |
+|    explained_variance   | 0.96         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0302      |
+|    n_updates            | 850          |
+|    policy_gradient_loss | -0.00283     |
+|    std                  | 1.12         |
+|    value_loss           | 0.00307      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1857    |
+|    iterations      | 86      |
+|    time_elapsed    | 758     |
+|    total_timesteps | 1409024 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1864        |
+|    iterations           | 87          |
+|    time_elapsed         | 764         |
+|    total_timesteps      | 1425408     |
+| train/                  |             |
+|    approx_kl            | 0.007682803 |
+|    clip_fraction        | 0.0931      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.09       |
+|    explained_variance   | 0.902       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0322     |
+|    n_updates            | 860         |
+|    policy_gradient_loss | -0.00224    |
+|    std                  | 1.14        |
+|    value_loss           | 0.013       |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1869         |
+|    iterations           | 88           |
+|    time_elapsed         | 771          |
+|    total_timesteps      | 1441792      |
+| train/                  |              |
+|    approx_kl            | 0.0063949013 |
+|    clip_fraction        | 0.0786       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.1         |
+|    explained_variance   | 0.953        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0401      |
+|    n_updates            | 870          |
+|    policy_gradient_loss | -0.00134     |
+|    std                  | 1.14         |
+|    value_loss           | 0.00193      |
+------------------------------------------
+Eval num_timesteps=1450000, episode_reward=-28.59 +/- 25.61
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -28.6       |
+| time/                   |             |
+|    total_timesteps      | 1450000     |
+| train/                  |             |
+|    approx_kl            | 0.007503539 |
+|    clip_fraction        | 0.0774      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.13       |
+|    explained_variance   | 0.951       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0378     |
+|    n_updates            | 880         |
+|    policy_gradient_loss | -0.00309    |
+|    std                  | 1.16        |
+|    value_loss           | 0.00551     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1845    |
+|    iterations      | 89      |
+|    time_elapsed    | 789     |
+|    total_timesteps | 1458176 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1852         |
+|    iterations           | 90           |
+|    time_elapsed         | 796          |
+|    total_timesteps      | 1474560      |
+| train/                  |              |
+|    approx_kl            | 0.0075057503 |
+|    clip_fraction        | 0.0793       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.955        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0439      |
+|    n_updates            | 890          |
+|    policy_gradient_loss | -0.00264     |
+|    std                  | 1.17         |
+|    value_loss           | 0.00265      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1857         |
+|    iterations           | 91           |
+|    time_elapsed         | 802          |
+|    total_timesteps      | 1490944      |
+| train/                  |              |
+|    approx_kl            | 0.0068523246 |
+|    clip_fraction        | 0.0755       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.15        |
+|    explained_variance   | 0.935        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0282      |
+|    n_updates            | 900          |
+|    policy_gradient_loss | -0.00292     |
+|    std                  | 1.17         |
+|    value_loss           | 0.00268      |
+------------------------------------------
+Eval num_timesteps=1500000, episode_reward=-40.66 +/- 25.29
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -40.7       |
+| time/                   |             |
+|    total_timesteps      | 1500000     |
+| train/                  |             |
+|    approx_kl            | 0.007249858 |
+|    clip_fraction        | 0.0857      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.15       |
+|    explained_variance   | 0.952       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0366     |
+|    n_updates            | 910         |
+|    policy_gradient_loss | -0.00319    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00564     |
+-----------------------------------------
+
+[Diag @ 1,500,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              14/20
+  COMPACT_CANT_DRIVE         6/20
+  action_mag mean=0.050 p10=0.005 p90=0.049 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.53m best=0.98m  (target <5m to compact)
+  min_dog_to_com   mean=0.46m best=0.06m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.38m best=5.44m
+  reward/step (mean): progress=+0.0039  alignment=+0.0011  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1819    |
+|    iterations      | 92      |
+|    time_elapsed    | 828     |
+|    total_timesteps | 1507328 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1828        |
+|    iterations           | 93          |
+|    time_elapsed         | 833         |
+|    total_timesteps      | 1523712     |
+| train/                  |             |
+|    approx_kl            | 0.007471386 |
+|    clip_fraction        | 0.0834      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.16       |
+|    explained_variance   | 0.929       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0275     |
+|    n_updates            | 920         |
+|    policy_gradient_loss | -0.00192    |
+|    std                  | 1.17        |
+|    value_loss           | 0.00791     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1835        |
+|    iterations           | 94          |
+|    time_elapsed         | 838         |
+|    total_timesteps      | 1540096     |
+| train/                  |             |
+|    approx_kl            | 0.007296456 |
+|    clip_fraction        | 0.0765      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.17       |
+|    explained_variance   | 0.95        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0484     |
+|    n_updates            | 930         |
+|    policy_gradient_loss | -0.00366    |
+|    std                  | 1.18        |
+|    value_loss           | 0.00788     |
+-----------------------------------------
+Eval num_timesteps=1550000, episode_reward=-34.66 +/- 25.47
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -34.7       |
+| time/                   |             |
+|    total_timesteps      | 1550000     |
+| train/                  |             |
+|    approx_kl            | 0.007654687 |
+|    clip_fraction        | 0.095       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.18       |
+|    explained_variance   | 0.92        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0386     |
+|    n_updates            | 940         |
+|    policy_gradient_loss | -0.00316    |
+|    std                  | 1.19        |
+|    value_loss           | 0.00363     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1817    |
+|    iterations      | 95      |
+|    time_elapsed    | 856     |
+|    total_timesteps | 1556480 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1823        |
+|    iterations           | 96          |
+|    time_elapsed         | 862         |
+|    total_timesteps      | 1572864     |
+| train/                  |             |
+|    approx_kl            | 0.007030643 |
+|    clip_fraction        | 0.0881      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.18       |
+|    explained_variance   | 0.944       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0346     |
+|    n_updates            | 950         |
+|    policy_gradient_loss | -0.00321    |
+|    std                  | 1.19        |
+|    value_loss           | 0.00208     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1828         |
+|    iterations           | 97           |
+|    time_elapsed         | 869          |
+|    total_timesteps      | 1589248      |
+| train/                  |              |
+|    approx_kl            | 0.0071562277 |
+|    clip_fraction        | 0.0834       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.19        |
+|    explained_variance   | 0.955        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0196      |
+|    n_updates            | 960          |
+|    policy_gradient_loss | -0.00259     |
+|    std                  | 1.2          |
+|    value_loss           | 0.00773      |
+------------------------------------------
+Eval num_timesteps=1600000, episode_reward=-33.49 +/- 36.88
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -33.5        |
+| time/                   |              |
+|    total_timesteps      | 1600000      |
+| train/                  |              |
+|    approx_kl            | 0.0069667175 |
+|    clip_fraction        | 0.0741       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.2         |
+|    explained_variance   | 0.94         |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0313      |
+|    n_updates            | 970          |
+|    policy_gradient_loss | -0.00399     |
+|    std                  | 1.2          |
+|    value_loss           | 0.00419      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1810    |
+|    iterations      | 98      |
+|    time_elapsed    | 886     |
+|    total_timesteps | 1605632 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1819         |
+|    iterations           | 99           |
+|    time_elapsed         | 891          |
+|    total_timesteps      | 1622016      |
+| train/                  |              |
+|    approx_kl            | 0.0061995042 |
+|    clip_fraction        | 0.0767       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.21        |
+|    explained_variance   | 0.968        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.036       |
+|    n_updates            | 980          |
+|    policy_gradient_loss | -0.00289     |
+|    std                  | 1.2          |
+|    value_loss           | 0.00241      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1826        |
+|    iterations           | 100         |
+|    time_elapsed         | 896         |
+|    total_timesteps      | 1638400     |
+| train/                  |             |
+|    approx_kl            | 0.006502889 |
+|    clip_fraction        | 0.0714      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.22       |
+|    explained_variance   | 0.976       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0445     |
+|    n_updates            | 990         |
+|    policy_gradient_loss | -0.00314    |
+|    std                  | 1.21        |
+|    value_loss           | 0.00218     |
+-----------------------------------------
+Eval num_timesteps=1650000, episode_reward=-38.00 +/- 30.02
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -38         |
+| time/                   |             |
+|    total_timesteps      | 1650000     |
+| train/                  |             |
+|    approx_kl            | 0.006163503 |
+|    clip_fraction        | 0.0739      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.22       |
+|    explained_variance   | 0.955       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0391     |
+|    n_updates            | 1000        |
+|    policy_gradient_loss | -0.00257    |
+|    std                  | 1.22        |
+|    value_loss           | 0.0027      |
+-----------------------------------------
+
+[Diag @ 1,650,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              16/20
+  COMPACT_CANT_DRIVE         4/20
+  action_mag mean=0.054 p10=0.002 p90=0.051 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.63m best=3.72m  (target <5m to compact)
+  min_dog_to_com   mean=0.60m best=0.09m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.17m best=5.44m
+  reward/step (mean): progress=+0.0032  alignment=+0.0015  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1793    |
+|    iterations      | 101     |
+|    time_elapsed    | 922     |
+|    total_timesteps | 1654784 |
+--------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1800       |
+|    iterations           | 102        |
+|    time_elapsed         | 927        |
+|    total_timesteps      | 1671168    |
+| train/                  |            |
+|    approx_kl            | 0.00634938 |
+|    clip_fraction        | 0.073      |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.23      |
+|    explained_variance   | 0.97       |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0462    |
+|    n_updates            | 1010       |
+|    policy_gradient_loss | -0.00394   |
+|    std                  | 1.22       |
+|    value_loss           | 0.00334    |
+----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1807         |
+|    iterations           | 103          |
+|    time_elapsed         | 933          |
+|    total_timesteps      | 1687552      |
+| train/                  |              |
+|    approx_kl            | 0.0072235917 |
+|    clip_fraction        | 0.0774       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.23        |
+|    explained_variance   | 0.957        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0284      |
+|    n_updates            | 1020         |
+|    policy_gradient_loss | -0.00292     |
+|    std                  | 1.22         |
+|    value_loss           | 0.00807      |
+------------------------------------------
+Eval num_timesteps=1700000, episode_reward=-32.26 +/- 31.96
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -32.3        |
+| time/                   |              |
+|    total_timesteps      | 1700000      |
+| train/                  |              |
+|    approx_kl            | 0.0060304543 |
+|    clip_fraction        | 0.0721       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.23        |
+|    explained_variance   | 0.929        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0427      |
+|    n_updates            | 1030         |
+|    policy_gradient_loss | -0.00306     |
+|    std                  | 1.21         |
+|    value_loss           | 0.00208      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1789    |
+|    iterations      | 104     |
+|    time_elapsed    | 952     |
+|    total_timesteps | 1703936 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1795        |
+|    iterations           | 105         |
+|    time_elapsed         | 958         |
+|    total_timesteps      | 1720320     |
+| train/                  |             |
+|    approx_kl            | 0.006440907 |
+|    clip_fraction        | 0.0642      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.22       |
+|    explained_variance   | 0.947       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0317     |
+|    n_updates            | 1040        |
+|    policy_gradient_loss | -0.00158    |
+|    std                  | 1.21        |
+|    value_loss           | 0.00165     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1801        |
+|    iterations           | 106         |
+|    time_elapsed         | 963         |
+|    total_timesteps      | 1736704     |
+| train/                  |             |
+|    approx_kl            | 0.006897255 |
+|    clip_fraction        | 0.0738      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.2        |
+|    explained_variance   | 0.939       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0408     |
+|    n_updates            | 1050        |
+|    policy_gradient_loss | -0.00349    |
+|    std                  | 1.19        |
+|    value_loss           | 0.00814     |
+-----------------------------------------
+Eval num_timesteps=1750000, episode_reward=-40.58 +/- 28.91
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -40.6        |
+| time/                   |              |
+|    total_timesteps      | 1750000      |
+| train/                  |              |
+|    approx_kl            | 0.0070952754 |
+|    clip_fraction        | 0.0742       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.19        |
+|    explained_variance   | 0.957        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0308      |
+|    n_updates            | 1060         |
+|    policy_gradient_loss | -0.0037      |
+|    std                  | 1.19         |
+|    value_loss           | 0.0191       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1784    |
+|    iterations      | 107     |
+|    time_elapsed    | 982     |
+|    total_timesteps | 1753088 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1791        |
+|    iterations           | 108         |
+|    time_elapsed         | 987         |
+|    total_timesteps      | 1769472     |
+| train/                  |             |
+|    approx_kl            | 0.006444447 |
+|    clip_fraction        | 0.0736      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.2        |
+|    explained_variance   | 0.968       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0362     |
+|    n_updates            | 1070        |
+|    policy_gradient_loss | -0.00409    |
+|    std                  | 1.2         |
+|    value_loss           | 0.00395     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1797        |
+|    iterations           | 109         |
+|    time_elapsed         | 993         |
+|    total_timesteps      | 1785856     |
+| train/                  |             |
+|    approx_kl            | 0.007391736 |
+|    clip_fraction        | 0.0758      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.22       |
+|    explained_variance   | 0.96        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0341     |
+|    n_updates            | 1080        |
+|    policy_gradient_loss | -0.00272    |
+|    std                  | 1.21        |
+|    value_loss           | 0.00221     |
+-----------------------------------------
+Eval num_timesteps=1800000, episode_reward=-29.06 +/- 30.98
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -29.1       |
+| time/                   |             |
+|    total_timesteps      | 1800000     |
+| train/                  |             |
+|    approx_kl            | 0.006899439 |
+|    clip_fraction        | 0.0695      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.25       |
+|    explained_variance   | 0.965       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0317     |
+|    n_updates            | 1090        |
+|    policy_gradient_loss | -0.00226    |
+|    std                  | 1.23        |
+|    value_loss           | 0.00615     |
+-----------------------------------------
+
+[Diag @ 1,800,000 | n_sheep=3 | success=0%]
+  NEVER_COMPACT              11/20
+  COMPACT_CANT_DRIVE         9/20
+  action_mag mean=0.054 p10=0.003 p90=0.057 (0=stopped, 1=full speed)
+  min_flock_radius mean=6.01m best=1.13m  (target <5m to compact)
+  min_dog_to_com   mean=0.51m best=0.11m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=12.52m best=3.21m
+  reward/step (mean): progress=+0.0050  alignment=+0.0017  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0000
+
+[Curriculum] leaving stage n_sheep=3 after 600,000 steps | training success rate (last 100 eps) = 0%
+[Curriculum] → 4 sheep at step 1,800,000
+
+--------------------------------
+| time/              |         |
+|    fps             | 1769    |
+|    iterations      | 110     |
+|    time_elapsed    | 1018    |
+|    total_timesteps | 1802240 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1776        |
+|    iterations           | 111         |
+|    time_elapsed         | 1023        |
+|    total_timesteps      | 1818624     |
+| train/                  |             |
+|    approx_kl            | 0.006710761 |
+|    clip_fraction        | 0.0761      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.25       |
+|    explained_variance   | 0.867       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.031      |
+|    n_updates            | 1100        |
+|    policy_gradient_loss | -0.00311    |
+|    std                  | 1.23        |
+|    value_loss           | 0.0186      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1783        |
+|    iterations           | 112         |
+|    time_elapsed         | 1028        |
+|    total_timesteps      | 1835008     |
+| train/                  |             |
+|    approx_kl            | 0.006202608 |
+|    clip_fraction        | 0.0682      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.25       |
+|    explained_variance   | 0.954       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0245     |
+|    n_updates            | 1110        |
+|    policy_gradient_loss | -0.00429    |
+|    std                  | 1.23        |
+|    value_loss           | 0.00641     |
+-----------------------------------------
+Eval num_timesteps=1850000, episode_reward=-35.87 +/- 42.36
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -35.9       |
+| time/                   |             |
+|    total_timesteps      | 1850000     |
+| train/                  |             |
+|    approx_kl            | 0.008398036 |
+|    clip_fraction        | 0.086       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.28       |
+|    explained_variance   | 0.938       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0514     |
+|    n_updates            | 1120        |
+|    policy_gradient_loss | -0.00497    |
+|    std                  | 1.25        |
+|    value_loss           | 0.00614     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1768    |
+|    iterations      | 113     |
+|    time_elapsed    | 1046    |
+|    total_timesteps | 1851392 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1775        |
+|    iterations           | 114         |
+|    time_elapsed         | 1052        |
+|    total_timesteps      | 1867776     |
+| train/                  |             |
+|    approx_kl            | 0.007641702 |
+|    clip_fraction        | 0.0742      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.31       |
+|    explained_variance   | 0.935       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.046      |
+|    n_updates            | 1130        |
+|    policy_gradient_loss | -0.00349    |
+|    std                  | 1.28        |
+|    value_loss           | 0.0228      |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1781         |
+|    iterations           | 115          |
+|    time_elapsed         | 1057         |
+|    total_timesteps      | 1884160      |
+| train/                  |              |
+|    approx_kl            | 0.0073437546 |
+|    clip_fraction        | 0.0747       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.34        |
+|    explained_variance   | 0.928        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0498      |
+|    n_updates            | 1140         |
+|    policy_gradient_loss | -0.00496     |
+|    std                  | 1.29         |
+|    value_loss           | 0.00764      |
+------------------------------------------
+Eval num_timesteps=1900000, episode_reward=-41.88 +/- 27.01
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -41.9       |
+| time/                   |             |
+|    total_timesteps      | 1900000     |
+| train/                  |             |
+|    approx_kl            | 0.006885264 |
+|    clip_fraction        | 0.0728      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.36       |
+|    explained_variance   | 0.934       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0503     |
+|    n_updates            | 1150        |
+|    policy_gradient_loss | -0.00384    |
+|    std                  | 1.3         |
+|    value_loss           | 0.00423     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1767    |
+|    iterations      | 116     |
+|    time_elapsed    | 1075    |
+|    total_timesteps | 1900544 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1773         |
+|    iterations           | 117          |
+|    time_elapsed         | 1080         |
+|    total_timesteps      | 1916928      |
+| train/                  |              |
+|    approx_kl            | 0.0077611385 |
+|    clip_fraction        | 0.0792       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.38        |
+|    explained_variance   | 0.931        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0374      |
+|    n_updates            | 1160         |
+|    policy_gradient_loss | -0.00399     |
+|    std                  | 1.31         |
+|    value_loss           | 0.00292      |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1780        |
+|    iterations           | 118         |
+|    time_elapsed         | 1085        |
+|    total_timesteps      | 1933312     |
+| train/                  |             |
+|    approx_kl            | 0.006831214 |
+|    clip_fraction        | 0.0758      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.4        |
+|    explained_variance   | 0.963       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0175     |
+|    n_updates            | 1170        |
+|    policy_gradient_loss | -0.00471    |
+|    std                  | 1.33        |
+|    value_loss           | 0.00235     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1786        |
+|    iterations           | 119         |
+|    time_elapsed         | 1091        |
+|    total_timesteps      | 1949696     |
+| train/                  |             |
+|    approx_kl            | 0.006474304 |
+|    clip_fraction        | 0.0666      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.43       |
+|    explained_variance   | 0.931       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0318     |
+|    n_updates            | 1180        |
+|    policy_gradient_loss | -0.00285    |
+|    std                  | 1.35        |
+|    value_loss           | 0.00699     |
+-----------------------------------------
+Eval num_timesteps=1950000, episode_reward=-35.80 +/- 28.95
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -35.8       |
+| time/                   |             |
+|    total_timesteps      | 1950000     |
+| train/                  |             |
+|    approx_kl            | 0.008532442 |
+|    clip_fraction        | 0.0746      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.43       |
+|    explained_variance   | 0.958       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.00337    |
+|    n_updates            | 1190        |
+|    policy_gradient_loss | -0.00376    |
+|    std                  | 1.34        |
+|    value_loss           | 0.0156      |
+-----------------------------------------
+
+[Diag @ 1,950,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              19/20
+  COMPACT_CANT_DRIVE         1/20
+  action_mag mean=0.049 p10=0.007 p90=0.044 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.95m best=4.96m  (target <5m to compact)
+  min_dog_to_com   mean=0.39m best=0.07m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.18m best=9.30m
+  reward/step (mean): progress=-0.0121  alignment=+0.0010  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1759    |
+|    iterations      | 120     |
+|    time_elapsed    | 1117    |
+|    total_timesteps | 1966080 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1766        |
+|    iterations           | 121         |
+|    time_elapsed         | 1122        |
+|    total_timesteps      | 1982464     |
+| train/                  |             |
+|    approx_kl            | 0.006549825 |
+|    clip_fraction        | 0.0665      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.43       |
+|    explained_variance   | 0.966       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0345     |
+|    n_updates            | 1200        |
+|    policy_gradient_loss | -0.00349    |
+|    std                  | 1.34        |
+|    value_loss           | 0.00315     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1773         |
+|    iterations           | 122          |
+|    time_elapsed         | 1127         |
+|    total_timesteps      | 1998848      |
+| train/                  |              |
+|    approx_kl            | 0.0062008686 |
+|    clip_fraction        | 0.0699       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.44        |
+|    explained_variance   | 0.959        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0512      |
+|    n_updates            | 1210         |
+|    policy_gradient_loss | -0.00291     |
+|    std                  | 1.35         |
+|    value_loss           | 0.00544      |
+------------------------------------------
+Eval num_timesteps=2000000, episode_reward=-45.28 +/- 26.78
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -45.3       |
+| time/                   |             |
+|    total_timesteps      | 2000000     |
+| train/                  |             |
+|    approx_kl            | 0.006553275 |
+|    clip_fraction        | 0.0739      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.45       |
+|    explained_variance   | 0.924       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0416     |
+|    n_updates            | 1220        |
+|    policy_gradient_loss | -0.00427    |
+|    std                  | 1.36        |
+|    value_loss           | 0.0127      |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1761    |
+|    iterations      | 123     |
+|    time_elapsed    | 1144    |
+|    total_timesteps | 2015232 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1767         |
+|    iterations           | 124          |
+|    time_elapsed         | 1149         |
+|    total_timesteps      | 2031616      |
+| train/                  |              |
+|    approx_kl            | 0.0059226304 |
+|    clip_fraction        | 0.0653       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.46        |
+|    explained_variance   | 0.947        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.025       |
+|    n_updates            | 1230         |
+|    policy_gradient_loss | -0.00273     |
+|    std                  | 1.36         |
+|    value_loss           | 0.00879      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1775         |
+|    iterations           | 125          |
+|    time_elapsed         | 1153         |
+|    total_timesteps      | 2048000      |
+| train/                  |              |
+|    approx_kl            | 0.0076779695 |
+|    clip_fraction        | 0.0729       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.47        |
+|    explained_variance   | 0.931        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0382      |
+|    n_updates            | 1240         |
+|    policy_gradient_loss | -0.00385     |
+|    std                  | 1.37         |
+|    value_loss           | 0.00692      |
+------------------------------------------
+Eval num_timesteps=2050000, episode_reward=-44.22 +/- 28.52
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -44.2        |
+| time/                   |              |
+|    total_timesteps      | 2050000      |
+| train/                  |              |
+|    approx_kl            | 0.0073502595 |
+|    clip_fraction        | 0.0822       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.49        |
+|    explained_variance   | 0.946        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0342      |
+|    n_updates            | 1250         |
+|    policy_gradient_loss | -0.00592     |
+|    std                  | 1.39         |
+|    value_loss           | 0.00555      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1764    |
+|    iterations      | 126     |
+|    time_elapsed    | 1170    |
+|    total_timesteps | 2064384 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1770        |
+|    iterations           | 127         |
+|    time_elapsed         | 1175        |
+|    total_timesteps      | 2080768     |
+| train/                  |             |
+|    approx_kl            | 0.006628736 |
+|    clip_fraction        | 0.0767      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.51       |
+|    explained_variance   | 0.95        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.035      |
+|    n_updates            | 1260        |
+|    policy_gradient_loss | -0.00457    |
+|    std                  | 1.4         |
+|    value_loss           | 0.00416     |
+-----------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1776         |
+|    iterations           | 128          |
+|    time_elapsed         | 1180         |
+|    total_timesteps      | 2097152      |
+| train/                  |              |
+|    approx_kl            | 0.0068027405 |
+|    clip_fraction        | 0.0719       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.53        |
+|    explained_variance   | 0.891        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0391      |
+|    n_updates            | 1270         |
+|    policy_gradient_loss | -0.00312     |
+|    std                  | 1.42         |
+|    value_loss           | 0.00492      |
+------------------------------------------
+Eval num_timesteps=2100000, episode_reward=-39.37 +/- 34.76
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -39.4       |
+| time/                   |             |
+|    total_timesteps      | 2100000     |
+| train/                  |             |
+|    approx_kl            | 0.005523986 |
+|    clip_fraction        | 0.0604      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.54       |
+|    explained_variance   | 0.938       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0364     |
+|    n_updates            | 1280        |
+|    policy_gradient_loss | -0.00281    |
+|    std                  | 1.42        |
+|    value_loss           | 0.015       |
+-----------------------------------------
+
+[Diag @ 2,100,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              20/20
+  action_mag mean=0.047 p10=0.002 p90=0.041 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.62m best=5.89m  (target <5m to compact)
+  min_dog_to_com   mean=0.46m best=0.04m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.19m best=7.53m
+  reward/step (mean): progress=-0.0012  alignment=+0.0012  pen_bonus=+0.0010  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1751    |
+|    iterations      | 129     |
+|    time_elapsed    | 1206    |
+|    total_timesteps | 2113536 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1756        |
+|    iterations           | 130         |
+|    time_elapsed         | 1212        |
+|    total_timesteps      | 2129920     |
+| train/                  |             |
+|    approx_kl            | 0.007766474 |
+|    clip_fraction        | 0.0823      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.53       |
+|    explained_variance   | 0.96        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0396     |
+|    n_updates            | 1290        |
+|    policy_gradient_loss | -0.00492    |
+|    std                  | 1.41        |
+|    value_loss           | 0.00554     |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1762        |
+|    iterations           | 131         |
+|    time_elapsed         | 1217        |
+|    total_timesteps      | 2146304     |
+| train/                  |             |
+|    approx_kl            | 0.006704482 |
+|    clip_fraction        | 0.0748      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.53       |
+|    explained_variance   | 0.97        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0466     |
+|    n_updates            | 1300        |
+|    policy_gradient_loss | -0.00339    |
+|    std                  | 1.42        |
+|    value_loss           | 0.00432     |
+-----------------------------------------
+Eval num_timesteps=2150000, episode_reward=-43.17 +/- 26.95
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -43.2        |
+| time/                   |              |
+|    total_timesteps      | 2150000      |
+| train/                  |              |
+|    approx_kl            | 0.0065447316 |
+|    clip_fraction        | 0.0751       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.53        |
+|    explained_variance   | 0.888        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0369      |
+|    n_updates            | 1310         |
+|    policy_gradient_loss | -0.00369     |
+|    std                  | 1.41         |
+|    value_loss           | 0.0165       |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1750    |
+|    iterations      | 132     |
+|    time_elapsed    | 1235    |
+|    total_timesteps | 2162688 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1755         |
+|    iterations           | 133          |
+|    time_elapsed         | 1241         |
+|    total_timesteps      | 2179072      |
+| train/                  |              |
+|    approx_kl            | 0.0070872563 |
+|    clip_fraction        | 0.075        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.54        |
+|    explained_variance   | 0.954        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0427      |
+|    n_updates            | 1320         |
+|    policy_gradient_loss | -0.00406     |
+|    std                  | 1.42         |
+|    value_loss           | 0.00977      |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1762         |
+|    iterations           | 134          |
+|    time_elapsed         | 1245         |
+|    total_timesteps      | 2195456      |
+| train/                  |              |
+|    approx_kl            | 0.0073371828 |
+|    clip_fraction        | 0.077        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.55        |
+|    explained_variance   | 0.939        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0303      |
+|    n_updates            | 1330         |
+|    policy_gradient_loss | -0.00371     |
+|    std                  | 1.43         |
+|    value_loss           | 0.00862      |
+------------------------------------------
+Eval num_timesteps=2200000, episode_reward=-40.81 +/- 44.39
+Episode length: 2000.00 +/- 0.00
+------------------------------------------
+| eval/                   |              |
+|    mean_ep_length       | 2e+03        |
+|    mean_reward          | -40.8        |
+| time/                   |              |
+|    total_timesteps      | 2200000      |
+| train/                  |              |
+|    approx_kl            | 0.0072064474 |
+|    clip_fraction        | 0.0714       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.58        |
+|    explained_variance   | 0.951        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0517      |
+|    n_updates            | 1340         |
+|    policy_gradient_loss | -0.00405     |
+|    std                  | 1.45         |
+|    value_loss           | 0.00351      |
+------------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1751    |
+|    iterations      | 135     |
+|    time_elapsed    | 1262    |
+|    total_timesteps | 2211840 |
+--------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1758        |
+|    iterations           | 136         |
+|    time_elapsed         | 1267        |
+|    total_timesteps      | 2228224     |
+| train/                  |             |
+|    approx_kl            | 0.008551812 |
+|    clip_fraction        | 0.0911      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.58       |
+|    explained_variance   | 0.929       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0258     |
+|    n_updates            | 1350        |
+|    policy_gradient_loss | -0.00599    |
+|    std                  | 1.45        |
+|    value_loss           | 0.0034      |
+-----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1764        |
+|    iterations           | 137         |
+|    time_elapsed         | 1271        |
+|    total_timesteps      | 2244608     |
+| train/                  |             |
+|    approx_kl            | 0.006960677 |
+|    clip_fraction        | 0.0702      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.59       |
+|    explained_variance   | 0.9         |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0396     |
+|    n_updates            | 1360        |
+|    policy_gradient_loss | -0.00412    |
+|    std                  | 1.46        |
+|    value_loss           | 0.00429     |
+-----------------------------------------
+Eval num_timesteps=2250000, episode_reward=-37.92 +/- 31.68
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -37.9       |
+| time/                   |             |
+|    total_timesteps      | 2250000     |
+| train/                  |             |
+|    approx_kl            | 0.005949891 |
+|    clip_fraction        | 0.0683      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.59       |
+|    explained_variance   | 0.948       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0381     |
+|    n_updates            | 1370        |
+|    policy_gradient_loss | -0.00328    |
+|    std                  | 1.46        |
+|    value_loss           | 0.0113      |
+-----------------------------------------
+
+[Diag @ 2,250,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              19/20
+  COMPACT_CANT_DRIVE         1/20
+  action_mag mean=0.068 p10=0.004 p90=0.045 (0=stopped, 1=full speed)
+  min_flock_radius mean=7.87m best=3.57m  (target <5m to compact)
+  min_dog_to_com   mean=0.45m best=0.15m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=14.06m best=6.95m
+  reward/step (mean): progress=-0.0035  alignment=+0.0020  pen_bonus=+0.0008  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1743    |
+|    iterations      | 138     |
+|    time_elapsed    | 1297    |
+|    total_timesteps | 2260992 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1749         |
+|    iterations           | 139          |
+|    time_elapsed         | 1301         |
+|    total_timesteps      | 2277376      |
+| train/                  |              |
+|    approx_kl            | 0.0071727796 |
+|    clip_fraction        | 0.0784       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.6         |
+|    explained_variance   | 0.943        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0387      |
+|    n_updates            | 1380         |
+|    policy_gradient_loss | -0.0042      |
+|    std                  | 1.46         |
+|    value_loss           | 0.0113       |
+------------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1755        |
+|    iterations           | 140         |
+|    time_elapsed         | 1306        |
+|    total_timesteps      | 2293760     |
+| train/                  |             |
+|    approx_kl            | 0.006800391 |
+|    clip_fraction        | 0.0662      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.59       |
+|    explained_variance   | 0.931       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0283     |
+|    n_updates            | 1390        |
+|    policy_gradient_loss | -0.00421    |
+|    std                  | 1.46        |
+|    value_loss           | 0.00659     |
+-----------------------------------------
+Eval num_timesteps=2300000, episode_reward=-47.47 +/- 37.24
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -47.5       |
+| time/                   |             |
+|    total_timesteps      | 2300000     |
+| train/                  |             |
+|    approx_kl            | 0.008103053 |
+|    clip_fraction        | 0.081       |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.59       |
+|    explained_variance   | 0.945       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0433     |
+|    n_updates            | 1400        |
+|    policy_gradient_loss | -0.00404    |
+|    std                  | 1.46        |
+|    value_loss           | 0.00796     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1745    |
+|    iterations      | 141     |
+|    time_elapsed    | 1323    |
+|    total_timesteps | 2310144 |
+--------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1751         |
+|    iterations           | 142          |
+|    time_elapsed         | 1328         |
+|    total_timesteps      | 2326528      |
+| train/                  |              |
+|    approx_kl            | 0.0061590094 |
+|    clip_fraction        | 0.066        |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.61        |
+|    explained_variance   | 0.957        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0436      |
+|    n_updates            | 1410         |
+|    policy_gradient_loss | -0.00287     |
+|    std                  | 1.47         |
+|    value_loss           | 0.0102       |
+------------------------------------------
+------------------------------------------
+| time/                   |              |
+|    fps                  | 1757         |
+|    iterations           | 143          |
+|    time_elapsed         | 1332         |
+|    total_timesteps      | 2342912      |
+| train/                  |              |
+|    approx_kl            | 0.0070403973 |
+|    clip_fraction        | 0.0733       |
+|    clip_range           | 0.2          |
+|    entropy_loss         | -3.62        |
+|    explained_variance   | 0.863        |
+|    learning_rate        | 0.0003       |
+|    loss                 | -0.0356      |
+|    n_updates            | 1420         |
+|    policy_gradient_loss | -0.00525     |
+|    std                  | 1.48         |
+|    value_loss           | 0.0103       |
+------------------------------------------
+Eval num_timesteps=2350000, episode_reward=-47.95 +/- 27.60
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -48         |
+| time/                   |             |
+|    total_timesteps      | 2350000     |
+| train/                  |             |
+|    approx_kl            | 0.007505033 |
+|    clip_fraction        | 0.0729      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.64       |
+|    explained_variance   | 0.94        |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0473     |
+|    n_updates            | 1430        |
+|    policy_gradient_loss | -0.00385    |
+|    std                  | 1.5         |
+|    value_loss           | 0.00449     |
+-----------------------------------------
+--------------------------------
+| time/              |         |
+|    fps             | 1747    |
+|    iterations      | 144     |
+|    time_elapsed    | 1350    |
+|    total_timesteps | 2359296 |
+--------------------------------
+----------------------------------------
+| time/                   |            |
+|    fps                  | 1752       |
+|    iterations           | 145        |
+|    time_elapsed         | 1355       |
+|    total_timesteps      | 2375680    |
+| train/                  |            |
+|    approx_kl            | 0.00724002 |
+|    clip_fraction        | 0.0739     |
+|    clip_range           | 0.2        |
+|    entropy_loss         | -3.65      |
+|    explained_variance   | 0.948      |
+|    learning_rate        | 0.0003     |
+|    loss                 | -0.0419    |
+|    n_updates            | 1440       |
+|    policy_gradient_loss | -0.00426   |
+|    std                  | 1.5        |
+|    value_loss           | 0.00886    |
+----------------------------------------
+-----------------------------------------
+| time/                   |             |
+|    fps                  | 1758        |
+|    iterations           | 146         |
+|    time_elapsed         | 1360        |
+|    total_timesteps      | 2392064     |
+| train/                  |             |
+|    approx_kl            | 0.007578165 |
+|    clip_fraction        | 0.0713      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.64       |
+|    explained_variance   | 0.859       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0427     |
+|    n_updates            | 1450        |
+|    policy_gradient_loss | -0.0049     |
+|    std                  | 1.49        |
+|    value_loss           | 0.00429     |
+-----------------------------------------
+Eval num_timesteps=2400000, episode_reward=-47.88 +/- 34.39
+Episode length: 2000.00 +/- 0.00
+-----------------------------------------
+| eval/                   |             |
+|    mean_ep_length       | 2e+03       |
+|    mean_reward          | -47.9       |
+| time/                   |             |
+|    total_timesteps      | 2400000     |
+| train/                  |             |
+|    approx_kl            | 0.006707498 |
+|    clip_fraction        | 0.0692      |
+|    clip_range           | 0.2         |
+|    entropy_loss         | -3.65       |
+|    explained_variance   | 0.861       |
+|    learning_rate        | 0.0003      |
+|    loss                 | -0.0426     |
+|    n_updates            | 1460        |
+|    policy_gradient_loss | -0.00411    |
+|    std                  | 1.5         |
+|    value_loss           | 0.00639     |
+-----------------------------------------
+
+[Diag @ 2,400,000 | n_sheep=4 | success=0%]
+  NEVER_COMPACT              19/20
+  COMPACT_CANT_DRIVE         1/20
+  action_mag mean=0.052 p10=0.005 p90=0.045 (0=stopped, 1=full speed)
+  min_flock_radius mean=8.79m best=3.32m  (target <5m to compact)
+  min_dog_to_com   mean=0.45m best=0.20m  (FLEE_DIST=7m)
+  min_com_to_pen   mean=13.96m best=9.02m
+  reward/step (mean): progress=-0.0047  alignment=+0.0013  pen_bonus=+0.0005  step_cost=-0.0200  complete=+0.0000
+--------------------------------
+| time/              |         |
+|    fps             | 1737    |
+|    iterations      | 147     |
+|    time_elapsed    | 1386    |
+|    total_timesteps | 2408448 |
+--------------------------------
+
+Training complete. Artefacts saved to runs/ppo_fix_check2/
diff --git a/training/runs/ppo_fix_check2/best_model/best_model.zip b/training/runs/ppo_fix_check2/best_model/best_model.zip
new file mode 100644
index 0000000..b07d85b
Binary files /dev/null and b/training/runs/ppo_fix_check2/best_model/best_model.zip differ
diff --git a/training/runs/ppo_fix_check2/evaluations.npz b/training/runs/ppo_fix_check2/evaluations.npz
new file mode 100644
index 0000000..cc6f67e
Binary files /dev/null and b/training/runs/ppo_fix_check2/evaluations.npz differ
diff --git a/training/runs/ppo_fix_check2/final_model.zip b/training/runs/ppo_fix_check2/final_model.zip
new file mode 100644
index 0000000..ac482b3
Binary files /dev/null and b/training/runs/ppo_fix_check2/final_model.zip differ
diff --git a/training/runs/ppo_fix_check2/vecnorm.pkl b/training/runs/ppo_fix_check2/vecnorm.pkl
new file mode 100644
index 0000000..20a640e
Binary files /dev/null and b/training/runs/ppo_fix_check2/vecnorm.pkl differ