diff --git a/training/runs/expC_clustered.log b/training/runs/expC_clustered.log new file mode 100644 index 0000000..424303f --- /dev/null +++ b/training/runs/expC_clustered.log @@ -0,0 +1,57 @@ +Config: {'W_PER_SHEEP': 1.0, 'W_ALIGN': 0.0, 'W_PEN_BONUS': 5.0, 'W_STEP_COST': 0.02, 'W_COMPLETE': 200.0, 'W_COMPACT': 1.5, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': False, 'ent_coef': 0.02} +Run dir: runs/expC_clustered +Curriculum: 1 → 3 sheep, 1,000,000 steps/stage + +[Stage n_sheep=1] training 1,000,000 steps + ... [trial 1 | 1 sheep | 100,000 steps | ret(last 50)=-17.04 sr=6%] + ... [trial 1 | 1 sheep | 200,000 steps | ret(last 50)=-17.39 sr=4%] + ... [trial 1 | 1 sheep | 300,000 steps | ret(last 50)=-15.50 sr=4%] + ... [trial 1 | 1 sheep | 400,000 steps | ret(last 50)=-2.07 sr=26%] + ... [trial 1 | 1 sheep | 500,000 steps | ret(last 50)=+3.81 sr=52%] + ... [trial 1 | 1 sheep | 600,000 steps | ret(last 50)=+8.03 sr=76%] + ... [trial 1 | 1 sheep | 700,000 steps | ret(last 50)=+9.49 sr=86%] + ... [trial 1 | 1 sheep | 800,000 steps | ret(last 50)=+9.42 sr=88%] + ... [trial 1 | 1 sheep | 900,000 steps | ret(last 50)=+9.49 sr=88%] + ... [trial 1 | 1 sheep | 1,000,000 steps | ret(last 50)=+10.34 sr=94%] +[Stage n_sheep=1] evaluating 30 eps +[Stage n_sheep=1] sr=83% mean_len=519 mean_min_pen=3.5m mean_act=0.25 + +[Stage n_sheep=2] training 1,000,000 steps + ... [trial 1 | 2 sheep | 1,015,816 steps | ret(last 0)=+nan sr=nan%] + ... [trial 1 | 2 sheep | 1,115,816 steps | ret(last 50)=-0.13 sr=10%] + ... [trial 1 | 2 sheep | 1,215,816 steps | ret(last 50)=-1.23 sr=10%] + ... [trial 1 | 2 sheep | 1,315,816 steps | ret(last 50)=-0.10 sr=6%] + ... [trial 1 | 2 sheep | 1,415,816 steps | ret(last 50)=+4.10 sr=28%] + ... [trial 1 | 2 sheep | 1,515,816 steps | ret(last 50)=+6.24 sr=32%] + ... [trial 1 | 2 sheep | 1,615,816 steps | ret(last 50)=+8.48 sr=52%] + ... [trial 1 | 2 sheep | 1,715,816 steps | ret(last 50)=+14.14 sr=98%] + ... [trial 1 | 2 sheep | 1,815,816 steps | ret(last 50)=+14.33 sr=98%] + ... [trial 1 | 2 sheep | 1,915,816 steps | ret(last 50)=+14.02 sr=100%] + ... [trial 1 | 2 sheep | 2,015,816 steps | ret(last 50)=+14.05 sr=100%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=100% mean_len=695 mean_min_pen=3.4m mean_act=0.58 + +[Stage n_sheep=3] training 1,000,000 steps + ... [trial 1 | 3 sheep | 2,031,624 steps | ret(last 0)=+nan sr=nan%] + ... [trial 1 | 3 sheep | 2,131,624 steps | ret(last 50)=+10.43 sr=56%] + ... [trial 1 | 3 sheep | 2,231,624 steps | ret(last 50)=+13.91 sr=74%] + ... [trial 1 | 3 sheep | 2,331,624 steps | ret(last 50)=+13.98 sr=76%] + ... [trial 1 | 3 sheep | 2,431,624 steps | ret(last 50)=+12.67 sr=68%] + ... [trial 1 | 3 sheep | 2,531,624 steps | ret(last 50)=+15.79 sr=90%] + ... [trial 1 | 3 sheep | 2,631,624 steps | ret(last 50)=+16.29 sr=94%] + ... [trial 1 | 3 sheep | 2,731,624 steps | ret(last 50)=+15.47 sr=90%] + ... [trial 1 | 3 sheep | 2,831,624 steps | ret(last 50)=+16.67 sr=96%] + ... [trial 1 | 3 sheep | 2,931,624 steps | ret(last 50)=+17.50 sr=100%] + ... [trial 1 | 3 sheep | 3,031,624 steps | ret(last 50)=+16.49 sr=96%] +[Stage n_sheep=3] evaluating 30 eps +[Stage n_sheep=3] sr=90% mean_len=794 mean_min_pen=3.7m mean_act=0.47 + +============================================================ + REPLAY SUMMARY +============================================================ + n_sheep=1 sr= 83% len= 519 min_pen= 3.5m act=0.25 + n_sheep=2 sr=100% len= 695 min_pen= 3.4m act=0.58 + n_sheep=3 sr= 90% len= 794 min_pen= 3.7m act=0.47 + + Total time: 15.1 min + Artefacts: runs/expC_clustered/ diff --git a/training/runs/expC_clustered/config.json b/training/runs/expC_clustered/config.json new file mode 100644 index 0000000..b2d15fe --- /dev/null +++ b/training/runs/expC_clustered/config.json @@ -0,0 +1,11 @@ +{ + "W_PER_SHEEP": 1.0, + "W_ALIGN": 0.0, + "W_PEN_BONUS": 5.0, + "W_STEP_COST": 0.02, + "W_COMPLETE": 200.0, + "W_COMPACT": 1.5, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": false, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/expC_clustered/final_model.zip b/training/runs/expC_clustered/final_model.zip new file mode 100644 index 0000000..86d9208 Binary files /dev/null and b/training/runs/expC_clustered/final_model.zip differ diff --git a/training/runs/expC_clustered/stage_results.json b/training/runs/expC_clustered/stage_results.json new file mode 100644 index 0000000..7614958 --- /dev/null +++ b/training/runs/expC_clustered/stage_results.json @@ -0,0 +1,23 @@ +[ + { + "n_sheep": 1, + "sr": 0.8333333333333334, + "mean_len": 518.5333333333333, + "mean_min_pen": 3.5244259238243103, + "mean_act": 0.25044742608759274 + }, + { + "n_sheep": 2, + "sr": 1.0, + "mean_len": 694.9, + "mean_min_pen": 3.4314632336298625, + "mean_act": 0.5796192060058971 + }, + { + "n_sheep": 3, + "sr": 0.9, + "mean_len": 794.1333333333333, + "mean_min_pen": 3.6645382324854534, + "mean_act": 0.46590614892287907 + } +] \ No newline at end of file diff --git a/training/runs/expC_clustered/vecnorm.pkl b/training/runs/expC_clustered/vecnorm.pkl new file mode 100644 index 0000000..0cffe9b Binary files /dev/null and b/training/runs/expC_clustered/vecnorm.pkl differ