diff --git a/training/runs/expA_fresh2.log b/training/runs/expA_fresh2.log new file mode 100644 index 0000000..6ea1c89 --- /dev/null +++ b/training/runs/expA_fresh2.log @@ -0,0 +1,35 @@ +Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02} +Run dir: runs/expA_fresh2 +Curriculum: 2 → 2 sheep, 2,000,000 steps/stage + +[Stage n_sheep=2] training 2,000,000 steps + ... [trial 1 | 2 sheep | 100,000 steps | ret(last 50)=-13.44 sr=0%] + ... [trial 1 | 2 sheep | 200,000 steps | ret(last 50)=-14.60 sr=0%] + ... [trial 1 | 2 sheep | 300,000 steps | ret(last 50)=-17.36 sr=0%] + ... [trial 1 | 2 sheep | 400,000 steps | ret(last 50)=-17.36 sr=0%] + ... [trial 1 | 2 sheep | 500,000 steps | ret(last 50)=-17.92 sr=0%] + ... [trial 1 | 2 sheep | 600,000 steps | ret(last 50)=-15.65 sr=0%] + ... [trial 1 | 2 sheep | 700,000 steps | ret(last 50)=-17.69 sr=2%] + ... [trial 1 | 2 sheep | 800,000 steps | ret(last 50)=-14.61 sr=2%] + ... [trial 1 | 2 sheep | 900,000 steps | ret(last 50)=-17.36 sr=0%] + ... [trial 1 | 2 sheep | 1,000,000 steps | ret(last 50)=-17.44 sr=0%] + ... [trial 1 | 2 sheep | 1,100,000 steps | ret(last 50)=-15.91 sr=2%] + ... [trial 1 | 2 sheep | 1,200,000 steps | ret(last 50)=-16.08 sr=0%] + ... [trial 1 | 2 sheep | 1,300,000 steps | ret(last 50)=-14.34 sr=0%] + ... [trial 1 | 2 sheep | 1,400,000 steps | ret(last 50)=-17.00 sr=2%] + ... [trial 1 | 2 sheep | 1,500,000 steps | ret(last 50)=-18.52 sr=0%] + ... [trial 1 | 2 sheep | 1,600,000 steps | ret(last 50)=-16.68 sr=0%] + ... [trial 1 | 2 sheep | 1,700,000 steps | ret(last 50)=-17.52 sr=0%] + ... [trial 1 | 2 sheep | 1,800,000 steps | ret(last 50)=-17.33 sr=0%] + ... [trial 1 | 2 sheep | 1,900,000 steps | ret(last 50)=-14.96 sr=2%] + ... [trial 1 | 2 sheep | 2,000,000 steps | ret(last 50)=-15.59 sr=0%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=0% mean_len=1500 mean_min_pen=13.2m mean_act=0.96 + +============================================================ + REPLAY SUMMARY +============================================================ + n_sheep=2 sr= 0% len= 1500 min_pen= 13.2m act=0.96 + + Total time: 10.7 min + Artefacts: runs/expA_fresh2/ diff --git a/training/runs/expA_fresh2/config.json b/training/runs/expA_fresh2/config.json new file mode 100644 index 0000000..b2d15fe --- /dev/null +++ b/training/runs/expA_fresh2/config.json @@ -0,0 +1,11 @@ +{ + "W_PER_SHEEP": 1.0, + "W_ALIGN": 0.0, + "W_PEN_BONUS": 5.0, + "W_STEP_COST": 0.02, + "W_COMPLETE": 200.0, + "W_COMPACT": 1.5, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": false, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/expA_fresh2/final_model.zip b/training/runs/expA_fresh2/final_model.zip new file mode 100644 index 0000000..3d8a3e3 Binary files /dev/null and b/training/runs/expA_fresh2/final_model.zip differ diff --git a/training/runs/expA_fresh2/stage_results.json b/training/runs/expA_fresh2/stage_results.json new file mode 100644 index 0000000..323888a --- /dev/null +++ b/training/runs/expA_fresh2/stage_results.json @@ -0,0 +1,9 @@ +[ + { + "n_sheep": 2, + "sr": 0.0, + "mean_len": 1500.0, + "mean_min_pen": 13.171057415008544, + "mean_act": 0.960968065615257 + } +] \ No newline at end of file diff --git a/training/runs/expA_fresh2/vecnorm.pkl b/training/runs/expA_fresh2/vecnorm.pkl new file mode 100644 index 0000000..5e15a6d Binary files /dev/null and b/training/runs/expA_fresh2/vecnorm.pkl differ diff --git a/training/runs/expB_mixed.log b/training/runs/expB_mixed.log new file mode 100644 index 0000000..02c2b65 --- /dev/null +++ b/training/runs/expB_mixed.log @@ -0,0 +1,51 @@ +Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02} +Run dir: runs/expB_mixed +MIXED training: random n_sheep ∈ [1, 3], 3,000,000 total steps + +[Mixed] training 3,000,000 steps + ... [trial 1 | mixed | 100,000 steps | ret(last 50)=-13.68 sr=2%] + ... [trial 1 | mixed | 200,000 steps | ret(last 50)=-14.08 sr=0%] + ... [trial 1 | mixed | 300,000 steps | ret(last 50)=-9.80 sr=0%] + ... [trial 1 | mixed | 400,000 steps | ret(last 50)=-11.20 sr=0%] + ... [trial 1 | mixed | 500,000 steps | ret(last 50)=-10.61 sr=0%] + ... [trial 1 | mixed | 600,000 steps | ret(last 50)=-11.19 sr=0%] + ... [trial 1 | mixed | 700,000 steps | ret(last 50)=-14.22 sr=0%] + ... [trial 1 | mixed | 800,000 steps | ret(last 50)=-6.31 sr=0%] + ... [trial 1 | mixed | 900,000 steps | ret(last 50)=-12.68 sr=0%] + ... [trial 1 | mixed | 1,000,000 steps | ret(last 50)=-11.06 sr=0%] + ... [trial 1 | mixed | 1,100,000 steps | ret(last 50)=-13.39 sr=0%] + ... [trial 1 | mixed | 1,200,000 steps | ret(last 50)=-14.20 sr=0%] + ... [trial 1 | mixed | 1,300,000 steps | ret(last 50)=-11.33 sr=0%] + ... [trial 1 | mixed | 1,400,000 steps | ret(last 50)=-10.73 sr=0%] + ... [trial 1 | mixed | 1,500,000 steps | ret(last 50)=-10.91 sr=0%] + ... [trial 1 | mixed | 1,600,000 steps | ret(last 50)=-10.44 sr=0%] + ... [trial 1 | mixed | 1,700,000 steps | ret(last 50)=-10.56 sr=0%] + ... [trial 1 | mixed | 1,800,000 steps | ret(last 50)=-15.74 sr=0%] + ... [trial 1 | mixed | 1,900,000 steps | ret(last 50)=-13.46 sr=0%] + ... [trial 1 | mixed | 2,000,000 steps | ret(last 50)=-9.86 sr=0%] + ... [trial 1 | mixed | 2,100,000 steps | ret(last 50)=-13.07 sr=0%] + ... [trial 1 | mixed | 2,200,000 steps | ret(last 50)=-9.86 sr=0%] + ... [trial 1 | mixed | 2,300,000 steps | ret(last 50)=-9.73 sr=2%] + ... [trial 1 | mixed | 2,400,000 steps | ret(last 50)=-12.21 sr=0%] + ... [trial 1 | mixed | 2,500,000 steps | ret(last 50)=-14.27 sr=0%] + ... [trial 1 | mixed | 2,600,000 steps | ret(last 50)=-10.90 sr=2%] + ... [trial 1 | mixed | 2,700,000 steps | ret(last 50)=-9.67 sr=0%] + ... [trial 1 | mixed | 2,800,000 steps | ret(last 50)=-14.29 sr=0%] + ... [trial 1 | mixed | 2,900,000 steps | ret(last 50)=-9.08 sr=0%] + ... [trial 1 | mixed | 3,000,000 steps | ret(last 50)=-11.62 sr=6%] +[Mixed] evaluating n=1, 30 eps +[Mixed] n_sheep=1 sr=0% mean_len=1500 mean_min_pen=12.1m mean_act=0.64 +[Mixed] evaluating n=2, 30 eps +[Mixed] n_sheep=2 sr=0% mean_len=1500 mean_min_pen=13.6m mean_act=1.12 +[Mixed] evaluating n=3, 30 eps +[Mixed] n_sheep=3 sr=0% mean_len=1500 mean_min_pen=13.3m mean_act=1.02 + +============================================================ + REPLAY SUMMARY +============================================================ + n_sheep=1 sr= 0% len= 1500 min_pen= 12.1m act=0.64 + n_sheep=2 sr= 0% len= 1500 min_pen= 13.6m act=1.12 + n_sheep=3 sr= 0% len= 1500 min_pen= 13.3m act=1.02 + + Total time: 20.6 min + Artefacts: runs/expB_mixed/ diff --git a/training/runs/expB_mixed/config.json b/training/runs/expB_mixed/config.json new file mode 100644 index 0000000..b2d15fe --- /dev/null +++ b/training/runs/expB_mixed/config.json @@ -0,0 +1,11 @@ +{ + "W_PER_SHEEP": 1.0, + "W_ALIGN": 0.0, + "W_PEN_BONUS": 5.0, + "W_STEP_COST": 0.02, + "W_COMPLETE": 200.0, + "W_COMPACT": 1.5, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": false, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/expB_mixed/final_model.zip b/training/runs/expB_mixed/final_model.zip new file mode 100644 index 0000000..211707c Binary files /dev/null and b/training/runs/expB_mixed/final_model.zip differ diff --git a/training/runs/expB_mixed/stage_results.json b/training/runs/expB_mixed/stage_results.json new file mode 100644 index 0000000..735c94e --- /dev/null +++ b/training/runs/expB_mixed/stage_results.json @@ -0,0 +1,23 @@ +[ + { + "n_sheep": 1, + "sr": 0.0, + "mean_len": 1500.0, + "mean_min_pen": 12.136781152089437, + "mean_act": 0.6380681545449439 + }, + { + "n_sheep": 2, + "sr": 0.0, + "mean_len": 1500.0, + "mean_min_pen": 13.609641806284587, + "mean_act": 1.1225489819858792 + }, + { + "n_sheep": 3, + "sr": 0.0, + "mean_len": 1500.0, + "mean_min_pen": 13.337443319956462, + "mean_act": 1.0186407331574738 + } +] \ No newline at end of file diff --git a/training/runs/expB_mixed/vecnorm.pkl b/training/runs/expB_mixed/vecnorm.pkl new file mode 100644 index 0000000..9bb6497 Binary files /dev/null and b/training/runs/expB_mixed/vecnorm.pkl differ