diff --git a/tools/full_pipeline.sh b/tools/full_pipeline.sh new file mode 100755 index 0000000..fb83ab9 --- /dev/null +++ b/tools/full_pipeline.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# Full retrain + eval + Webots-validate pipeline. +# +# Usage: bash tools/full_pipeline.sh +# +# Output logs are written to the repo root: +# full_pipeline.log — main pipeline log +# stage_train.log — make train_all output +# stage_eval.log — make eval_all output +# stage_webots.log — Webots validation sweep +# +# Total runtime estimate: 8–12 hours. + +set -e +ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" +cd "$ROOT" +source "$ROOT/tools/setup_env.sh" + +PIPELINE_LOG="$ROOT/full_pipeline.log" +TRAIN_LOG="$ROOT/stage_train.log" +EVAL_LOG="$ROOT/stage_eval.log" +WEBOTS_LOG="$ROOT/stage_webots.log" +truncate -s 0 "$PIPELINE_LOG" "$TRAIN_LOG" "$EVAL_LOG" "$WEBOTS_LOG" + +log() { echo "[pipeline $(date +%H:%M:%S)] $*" | tee -a "$PIPELINE_LOG"; } + +log "=== START full pipeline $(date) ===" +log "" +log "Phase 1/4: clean_all" +make clean_all 2>&1 | tee -a "$PIPELINE_LOG" +log "" + +log "Phase 2/4: train_all (4 combos, ~8h)" +make train_all 2>&1 | tee -a "$TRAIN_LOG" +log " train_all finished" +log "" + +log "Phase 3/4: eval_all (gym eval, ~30min)" +make eval_all 2>&1 | tee -a "$EVAL_LOG" +log " eval_all finished" +log "" + +log "Phase 4/4: Webots validation sweep (~90min)" +truncate -s 0 "$WEBOTS_LOG" + +run_cell() { + local MODE="$1" DRIVE="$2" WORLD="$3" N="$4" + echo "" | tee -a "$WEBOTS_LOG" + echo "=== $MODE $DRIVE $WORLD n=$N ===" | tee -a "$WEBOTS_LOG" + rm -f "$ROOT/training/.run_done" + local STDOUT="$ROOT/pipeline_${MODE}_${DRIVE}_${WORLD}_n${N}.stdout" + timeout --kill-after=15s 320 \ + xvfb-run -a \ + env WEBOTS_HEADLESS=1 WEBOTS_EXTRA_ARGS="--stdout --stderr" \ + HERDING_SEED=42 \ + bash tools/run_webots.sh "$N" "$MODE" "$DRIVE" "$WORLD" > "$STDOUT" 2>&1 || true + BEST=$(grep "GT_penned=" "$STDOUT" 2>/dev/null | awk -F'GT_penned=' '{print $2}' | awk '{split($1,a,"/"); print a[1]"/"a[2]}' | sort -t/ -k1,1n | tail -1) + grep -E "\[results\]" "$STDOUT" 2>/dev/null | head -1 | tee -a "$WEBOTS_LOG" + echo " best GT_penned: $BEST" | tee -a "$WEBOTS_LOG" + pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true + sleep 1 +} + +# Differential drive: 4 controllers × 2 worlds × 2 n +for M in bc rl strombom sequential; do + for W in field field_round; do + for N in 5 10; do + run_cell "$M" differential "$W" "$N" + done + done +done + +# Mecanum drive: 2 controllers × 2 worlds × 2 n +for M in bc rl; do + for W in field field_round; do + for N in 5 10; do + run_cell "$M" mecanum "$W" "$N" + done + done +done + +log "" +log "=== FULL PIPELINE DONE $(date) ===" +log "" +log "Summary:" +grep -E "=== |best GT_penned" "$WEBOTS_LOG" | tee -a "$PIPELINE_LOG" diff --git a/training/runs/bc_differential_field/policy.zip b/training/runs/bc_differential_field/policy.zip index 47faee9..486eb7c 100644 Binary files a/training/runs/bc_differential_field/policy.zip and b/training/runs/bc_differential_field/policy.zip differ diff --git a/training/runs/bc_differential_field_round/policy.zip b/training/runs/bc_differential_field_round/policy.zip index cdf547c..4d4af7f 100644 Binary files a/training/runs/bc_differential_field_round/policy.zip and b/training/runs/bc_differential_field_round/policy.zip differ diff --git a/training/runs/rl_differential_field/policy.zip b/training/runs/rl_differential_field/policy.zip index b1f82b4..ddc7948 100644 Binary files a/training/runs/rl_differential_field/policy.zip and b/training/runs/rl_differential_field/policy.zip differ diff --git a/training/runs/rl_differential_field_round/policy.zip b/training/runs/rl_differential_field_round/policy.zip index 76ba579..87cf3d1 100644 Binary files a/training/runs/rl_differential_field_round/policy.zip and b/training/runs/rl_differential_field_round/policy.zip differ