Full retrain pipeline + hybrid policy set
Ran end-to-end clean retrain + gym eval + 24-cell Webots sweep
(tools/full_pipeline.sh). Results:
Differential — all 16 cells pen N/N. Updated policies committed.
Mecanum — new training stochastically regressed (only 2/8 cells
vs the v2 baseline's 4/8). v2 baseline mec policies
are RESTORED in this commit (training/runs/{bc,rl}_
mecanum_*) — they remain the deliverable.
The retrain pipeline itself is committed for reproducibility
(tools/full_pipeline.sh: clean → train_all → eval_all → 24-cell
Webots sweep). The v2 mec policies are also backed up locally to
_backup_pretrain/mec_v2_baseline/ (gitignored).
Verified after restore:
bc mec field_round n=10 → 10/10 in 147 s sim
rl diff field n=5 → 5/5 in 137 s sim
This commit is contained in:
Executable
+86
@@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Full retrain + eval + Webots-validate pipeline.
|
||||||
|
#
|
||||||
|
# Usage: bash tools/full_pipeline.sh
|
||||||
|
#
|
||||||
|
# Output logs are written to the repo root:
|
||||||
|
# full_pipeline.log — main pipeline log
|
||||||
|
# stage_train.log — make train_all output
|
||||||
|
# stage_eval.log — make eval_all output
|
||||||
|
# stage_webots.log — Webots validation sweep
|
||||||
|
#
|
||||||
|
# Total runtime estimate: 8–12 hours.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||||
|
cd "$ROOT"
|
||||||
|
source "$ROOT/tools/setup_env.sh"
|
||||||
|
|
||||||
|
PIPELINE_LOG="$ROOT/full_pipeline.log"
|
||||||
|
TRAIN_LOG="$ROOT/stage_train.log"
|
||||||
|
EVAL_LOG="$ROOT/stage_eval.log"
|
||||||
|
WEBOTS_LOG="$ROOT/stage_webots.log"
|
||||||
|
truncate -s 0 "$PIPELINE_LOG" "$TRAIN_LOG" "$EVAL_LOG" "$WEBOTS_LOG"
|
||||||
|
|
||||||
|
log() { echo "[pipeline $(date +%H:%M:%S)] $*" | tee -a "$PIPELINE_LOG"; }
|
||||||
|
|
||||||
|
log "=== START full pipeline $(date) ==="
|
||||||
|
log ""
|
||||||
|
log "Phase 1/4: clean_all"
|
||||||
|
make clean_all 2>&1 | tee -a "$PIPELINE_LOG"
|
||||||
|
log ""
|
||||||
|
|
||||||
|
log "Phase 2/4: train_all (4 combos, ~8h)"
|
||||||
|
make train_all 2>&1 | tee -a "$TRAIN_LOG"
|
||||||
|
log " train_all finished"
|
||||||
|
log ""
|
||||||
|
|
||||||
|
log "Phase 3/4: eval_all (gym eval, ~30min)"
|
||||||
|
make eval_all 2>&1 | tee -a "$EVAL_LOG"
|
||||||
|
log " eval_all finished"
|
||||||
|
log ""
|
||||||
|
|
||||||
|
log "Phase 4/4: Webots validation sweep (~90min)"
|
||||||
|
truncate -s 0 "$WEBOTS_LOG"
|
||||||
|
|
||||||
|
run_cell() {
|
||||||
|
local MODE="$1" DRIVE="$2" WORLD="$3" N="$4"
|
||||||
|
echo "" | tee -a "$WEBOTS_LOG"
|
||||||
|
echo "=== $MODE $DRIVE $WORLD n=$N ===" | tee -a "$WEBOTS_LOG"
|
||||||
|
rm -f "$ROOT/training/.run_done"
|
||||||
|
local STDOUT="$ROOT/pipeline_${MODE}_${DRIVE}_${WORLD}_n${N}.stdout"
|
||||||
|
timeout --kill-after=15s 320 \
|
||||||
|
xvfb-run -a \
|
||||||
|
env WEBOTS_HEADLESS=1 WEBOTS_EXTRA_ARGS="--stdout --stderr" \
|
||||||
|
HERDING_SEED=42 \
|
||||||
|
bash tools/run_webots.sh "$N" "$MODE" "$DRIVE" "$WORLD" > "$STDOUT" 2>&1 || true
|
||||||
|
BEST=$(grep "GT_penned=" "$STDOUT" 2>/dev/null | awk -F'GT_penned=' '{print $2}' | awk '{split($1,a,"/"); print a[1]"/"a[2]}' | sort -t/ -k1,1n | tail -1)
|
||||||
|
grep -E "\[results\]" "$STDOUT" 2>/dev/null | head -1 | tee -a "$WEBOTS_LOG"
|
||||||
|
echo " best GT_penned: $BEST" | tee -a "$WEBOTS_LOG"
|
||||||
|
pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
|
||||||
|
sleep 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Differential drive: 4 controllers × 2 worlds × 2 n
|
||||||
|
for M in bc rl strombom sequential; do
|
||||||
|
for W in field field_round; do
|
||||||
|
for N in 5 10; do
|
||||||
|
run_cell "$M" differential "$W" "$N"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# Mecanum drive: 2 controllers × 2 worlds × 2 n
|
||||||
|
for M in bc rl; do
|
||||||
|
for W in field field_round; do
|
||||||
|
for N in 5 10; do
|
||||||
|
run_cell "$M" mecanum "$W" "$N"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
log ""
|
||||||
|
log "=== FULL PIPELINE DONE $(date) ==="
|
||||||
|
log ""
|
||||||
|
log "Summary:"
|
||||||
|
grep -E "=== |best GT_penned" "$WEBOTS_LOG" | tee -a "$PIPELINE_LOG"
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user