diff --git a/training/runs/webots_n3.log b/training/runs/webots_n3.log new file mode 100644 index 0000000..cd3ef43 --- /dev/null +++ b/training/runs/webots_n3.log @@ -0,0 +1,81 @@ +Config loaded from config.json +Config: {'W_PER_SHEEP': 2.0, 'W_ALIGN': 0.05, 'W_PEN_BONUS': 10.0, 'W_COMPLETE': 100.0, 'W_STEP_COST': 0.02, 'W_SOUTH': 0.01, 'W_COMPACT': 0.0, 'W_WALL_TOUCH': 0.0, 'WALL_TOUCH_BUFFER': 0.4, 'ALIGN_SHAPE': 'standoff', 'ALIGN_GATED': True, 'ENTRY_AWARE': True, 'ent_coef': 0.02} +Run dir: runs/webots_n3 +Curriculum: 1 → 3 sheep, 1,500,000 steps/stage + + +[Stage n_sheep=1] training 1,500,000 steps + ... [1 sheep | 100,000 steps | ret(last 41)=-22.59 win_sr=15% cum_sr=15%] + ... [1 sheep | 200,000 steps | ret(last 50)=-24.68 win_sr=12% cum_sr=13%] + ... [1 sheep | 300,000 steps | ret(last 50)=-23.63 win_sr=10% cum_sr=11%] + ... [1 sheep | 400,000 steps | ret(last 50)=-18.18 win_sr=12% cum_sr=12%] + ... [1 sheep | 500,000 steps | ret(last 50)=+18.15 win_sr=100% cum_sr=37%] + ... [1 sheep | 600,000 steps | ret(last 50)=+14.43 win_sr=100% cum_sr=69%] + ... [1 sheep | 700,000 steps | ret(last 50)=+14.09 win_sr=100% cum_sr=81%] + ... [1 sheep | 800,000 steps | ret(last 50)=+13.60 win_sr=100% cum_sr=87%] + ... [1 sheep | 900,000 steps | ret(last 50)=+13.64 win_sr=100% cum_sr=90%] + ... [1 sheep | 1,000,000 steps | ret(last 50)=+13.70 win_sr=100% cum_sr=92%] + ... [1 sheep | 1,100,000 steps | ret(last 50)=+13.03 win_sr=100% cum_sr=93%] + ... [1 sheep | 1,200,000 steps | ret(last 50)=+13.32 win_sr=100% cum_sr=94%] + ... [1 sheep | 1,300,000 steps | ret(last 50)=+13.19 win_sr=100% cum_sr=95%] + ... [1 sheep | 1,400,000 steps | ret(last 50)=+13.56 win_sr=100% cum_sr=95%] + ... [1 sheep | 1,500,000 steps | ret(last 50)=+13.20 win_sr=100% cum_sr=96%] +[Stage n_sheep=1] evaluating 30 eps +[Stage n_sheep=1] sr=80% mean_len=687 mean_min_pen=3.7m mean_act=0.16 + failure modes: SUCCESS=24 COMPACT_CANT_DRIVE=6 + reward/step: progress=+0.0388 alignment=+0.0002 south=-0.0022 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0117 step_cost=-0.0200 complete=+0.1165 + +[Stage n_sheep=2] training 1,500,000 steps + ... [2 sheep | 1,507,336 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [2 sheep | 1,607,336 steps | ret(last 41)=-9.02 win_sr=2% cum_sr=2%] + ... [2 sheep | 1,707,336 steps | ret(last 50)=-7.70 win_sr=6% cum_sr=4%] + ... [2 sheep | 1,807,336 steps | ret(last 50)=-5.98 win_sr=16% cum_sr=9%] + ... [2 sheep | 1,907,336 steps | ret(last 50)=-6.55 win_sr=16% cum_sr=10%] + ... [2 sheep | 2,007,336 steps | ret(last 50)=-9.51 win_sr=10% cum_sr=10%] + ... [2 sheep | 2,107,336 steps | ret(last 50)=-0.32 win_sr=36% cum_sr=15%] + ... [2 sheep | 2,207,336 steps | ret(last 50)=+7.58 win_sr=76% cum_sr=28%] + ... [2 sheep | 2,307,336 steps | ret(last 50)=+16.41 win_sr=100% cum_sr=41%] + ... [2 sheep | 2,407,336 steps | ret(last 50)=+17.65 win_sr=100% cum_sr=54%] + ... [2 sheep | 2,507,336 steps | ret(last 50)=+18.87 win_sr=100% cum_sr=63%] + ... [2 sheep | 2,607,336 steps | ret(last 50)=+19.68 win_sr=100% cum_sr=69%] + ... [2 sheep | 2,707,336 steps | ret(last 50)=+19.69 win_sr=100% cum_sr=73%] + ... [2 sheep | 2,807,336 steps | ret(last 50)=+19.71 win_sr=100% cum_sr=77%] + ... [2 sheep | 2,907,336 steps | ret(last 50)=+18.49 win_sr=100% cum_sr=79%] + ... [2 sheep | 3,007,336 steps | ret(last 50)=+19.24 win_sr=100% cum_sr=81%] +[Stage n_sheep=2] evaluating 30 eps +[Stage n_sheep=2] sr=100% mean_len=654 mean_min_pen=3.4m mean_act=0.87 + failure modes: SUCCESS=30 + reward/step: progress=+0.0905 alignment=+0.0136 south=-0.0078 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0306 step_cost=-0.0200 complete=+0.1529 + +[Stage n_sheep=3] training 1,500,000 steps + ... [3 sheep | 3,014,664 steps | ret(last 0)=+nan win_sr=nan% cum_sr=nan%] + ... [3 sheep | 3,114,664 steps | ret(last 50)=+25.01 win_sr=100% cum_sr=100%] + ... [3 sheep | 3,214,664 steps | ret(last 50)=+23.20 win_sr=98% cum_sr=99%] + ... [3 sheep | 3,314,664 steps | ret(last 50)=+24.99 win_sr=100% cum_sr=100%] + ... [3 sheep | 3,414,664 steps | ret(last 50)=+24.87 win_sr=100% cum_sr=100%] + ... [3 sheep | 3,514,664 steps | ret(last 50)=+24.74 win_sr=100% cum_sr=100%] + ... [3 sheep | 3,614,664 steps | ret(last 50)=+21.31 win_sr=100% cum_sr=100%] + ... [3 sheep | 3,714,664 steps | ret(last 50)=+22.95 win_sr=98% cum_sr=100%] + ... [3 sheep | 3,814,664 steps | ret(last 50)=+23.87 win_sr=100% cum_sr=100%] + ... [3 sheep | 3,914,664 steps | ret(last 50)=+23.55 win_sr=100% cum_sr=100%] + ... [3 sheep | 4,014,664 steps | ret(last 50)=+26.08 win_sr=100% cum_sr=100%] + ... [3 sheep | 4,114,664 steps | ret(last 50)=+24.06 win_sr=100% cum_sr=100%] + ... [3 sheep | 4,214,664 steps | ret(last 50)=+24.43 win_sr=100% cum_sr=100%] + ... [3 sheep | 4,314,664 steps | ret(last 50)=+21.18 win_sr=96% cum_sr=100%] + ... [3 sheep | 4,414,664 steps | ret(last 50)=+23.24 win_sr=100% cum_sr=100%] + ... [3 sheep | 4,514,664 steps | ret(last 50)=+23.97 win_sr=100% cum_sr=100%] +[Stage n_sheep=3] evaluating 30 eps +[Stage n_sheep=3] sr=73% mean_len=1270 mean_min_pen=1.8m mean_act=1.12 + failure modes: SUCCESS=22 PARTIAL_1of3=8 + reward/step: progress=+0.0685 alignment=+0.0103 south=-0.0295 compact=+0.0000 wall_touch=+0.0000 pen_bonus=+0.0194 step_cost=-0.0200 complete=+0.0578 + +====================================================================== + TRAINING SUMMARY +====================================================================== + n_sheep=1 sr= 80% len= 687 min_pen= 3.7m act=0.16 + n_sheep=2 sr=100% len= 654 min_pen= 3.4m act=0.87 + n_sheep=3 sr= 73% len= 1270 min_pen= 1.8m act=1.12 + + Total time: 23.6 min + Artefacts: runs/webots_n3/ + Plots: runs/webots_n3/success_rate.png, runs/webots_n3/eval/ diff --git a/training/runs/webots_n3/config.json b/training/runs/webots_n3/config.json new file mode 100644 index 0000000..a2a98c3 --- /dev/null +++ b/training/runs/webots_n3/config.json @@ -0,0 +1,15 @@ +{ + "W_PER_SHEEP": 2.0, + "W_ALIGN": 0.05, + "W_PEN_BONUS": 10.0, + "W_COMPLETE": 100.0, + "W_STEP_COST": 0.02, + "W_SOUTH": 0.01, + "W_COMPACT": 0.0, + "W_WALL_TOUCH": 0.0, + "WALL_TOUCH_BUFFER": 0.4, + "ALIGN_SHAPE": "standoff", + "ALIGN_GATED": true, + "ENTRY_AWARE": true, + "ent_coef": 0.02 +} \ No newline at end of file diff --git a/training/runs/webots_n3/eval/ep_1s_fail.gif b/training/runs/webots_n3/eval/ep_1s_fail.gif new file mode 100644 index 0000000..cf7aa57 Binary files /dev/null and b/training/runs/webots_n3/eval/ep_1s_fail.gif differ diff --git a/training/runs/webots_n3/eval/ep_2s_success.gif b/training/runs/webots_n3/eval/ep_2s_success.gif new file mode 100644 index 0000000..9a757d4 Binary files /dev/null and b/training/runs/webots_n3/eval/ep_2s_success.gif differ diff --git a/training/runs/webots_n3/eval/ep_3s_success.gif b/training/runs/webots_n3/eval/ep_3s_success.gif new file mode 100644 index 0000000..7a8b62d Binary files /dev/null and b/training/runs/webots_n3/eval/ep_3s_success.gif differ diff --git a/training/runs/webots_n3/eval/traj_1s_fail.png b/training/runs/webots_n3/eval/traj_1s_fail.png new file mode 100644 index 0000000..7be1651 Binary files /dev/null and b/training/runs/webots_n3/eval/traj_1s_fail.png differ diff --git a/training/runs/webots_n3/eval/traj_2s_success.png b/training/runs/webots_n3/eval/traj_2s_success.png new file mode 100644 index 0000000..67e047d Binary files /dev/null and b/training/runs/webots_n3/eval/traj_2s_success.png differ diff --git a/training/runs/webots_n3/eval/traj_3s_success.png b/training/runs/webots_n3/eval/traj_3s_success.png new file mode 100644 index 0000000..595811c Binary files /dev/null and b/training/runs/webots_n3/eval/traj_3s_success.png differ diff --git a/training/runs/webots_n3/eval/ts_1s_fail.png b/training/runs/webots_n3/eval/ts_1s_fail.png new file mode 100644 index 0000000..8fb7e39 Binary files /dev/null and b/training/runs/webots_n3/eval/ts_1s_fail.png differ diff --git a/training/runs/webots_n3/eval/ts_2s_success.png b/training/runs/webots_n3/eval/ts_2s_success.png new file mode 100644 index 0000000..8bd4df2 Binary files /dev/null and b/training/runs/webots_n3/eval/ts_2s_success.png differ diff --git a/training/runs/webots_n3/eval/ts_3s_success.png b/training/runs/webots_n3/eval/ts_3s_success.png new file mode 100644 index 0000000..48765ea Binary files /dev/null and b/training/runs/webots_n3/eval/ts_3s_success.png differ diff --git a/training/runs/webots_n3/final_model.zip b/training/runs/webots_n3/final_model.zip new file mode 100644 index 0000000..139a531 Binary files /dev/null and b/training/runs/webots_n3/final_model.zip differ diff --git a/training/runs/webots_n3/stage_results.json b/training/runs/webots_n3/stage_results.json new file mode 100644 index 0000000..41472df --- /dev/null +++ b/training/runs/webots_n3/stage_results.json @@ -0,0 +1,64 @@ +[ + { + "sr": 0.8, + "mean_len": 686.6666666666666, + "mean_min_pen": 3.6629639943440755, + "mean_act": 0.16181865727591677, + "failure_modes": { + "SUCCESS": 24, + "COMPACT_CANT_DRIVE": 6 + }, + "reward_per_step": { + "progress": 0.03881567865085689, + "alignment": 0.00015139903442241512, + "south": -0.00216557193820928, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.011650485436893204, + "step_cost": -0.019999999999995886, + "complete": 0.11650485436893204 + }, + "n_sheep": 1 + }, + { + "sr": 1.0, + "mean_len": 654.0, + "mean_min_pen": 3.4076531132062278, + "mean_act": 0.8715365563710358, + "failure_modes": { + "SUCCESS": 30 + }, + "reward_per_step": { + "progress": 0.09053953751754566, + "alignment": 0.01356517500879768, + "south": -0.007814150244939332, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.03058103975535168, + "step_cost": -0.01999999999999659, + "complete": 0.1529051987767584 + }, + "n_sheep": 2 + }, + { + "sr": 0.7333333333333333, + "mean_len": 1269.6333333333334, + "mean_min_pen": 1.7831791202227274, + "mean_act": 1.1211744887920247, + "failure_modes": { + "SUCCESS": 22, + "PARTIAL_1of3": 8 + }, + "reward_per_step": { + "progress": 0.06853864337494794, + "alignment": 0.010319892308307451, + "south": -0.029485590403771442, + "compact": 0.0, + "wall_touch": 0.0, + "pen_bonus": 0.01942818136469847, + "step_cost": -0.019999999999989426, + "complete": 0.05775945811126572 + }, + "n_sheep": 3 + } +] \ No newline at end of file diff --git a/training/runs/webots_n3/success_rate.png b/training/runs/webots_n3/success_rate.png new file mode 100644 index 0000000..6422772 Binary files /dev/null and b/training/runs/webots_n3/success_rate.png differ diff --git a/training/runs/webots_n3/vecnorm.pkl b/training/runs/webots_n3/vecnorm.pkl new file mode 100644 index 0000000..695f22c Binary files /dev/null and b/training/runs/webots_n3/vecnorm.pkl differ