#!/usr/bin/env bash # Headless Webots sweep across modes, drives, worlds, and flock sizes. # Runs sequentially; each trial gets a hard 150s wall-clock timeout. # Results are written to webots_sweep.log (tab-separated) and printed. # # Usage: bash tools/webots_sweep.sh [output_log] set -euo pipefail ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" OUT="${1:-$ROOT/webots_sweep.log}" TIMEOUT_S=120 # ~80k steps in fast headless mode — covers slow-converging physics # Webots uses its own python3; put the conda env first so all deps resolve. export PATH="/home/jalf/miniconda3/envs/tir/bin:$PATH" # Columns: mode drive world n_sheep success steps printf "%-12s %-14s %-12s %7s %7s %s\n" \ "mode" "drive" "world" "n_sheep" "success" "steps" | tee "$OUT" printf '%s\n' "$(printf '%-12s %-14s %-12s %7s %7s %s' \ '----' '-----' '-----' '-------' '-------' '-----')" | tee -a "$OUT" run_trial() { local mode="$1" drive="$2" world="$3" n="$4" policy_dir="${5:-}" local done_file="$ROOT/training/.run_done" rm -f "$done_file" local extra_env=() extra_env+=(WEBOTS_HEADLESS=1) extra_env+=(WEBOTS_EXTRA_ARGS="--stdout --stderr") extra_env+=(HERDING_USE_GT=0) if [[ -n "$policy_dir" ]]; then extra_env+=(HERDING_POLICY_DIR="$ROOT/$policy_dir") fi local raw raw=$( timeout --kill-after=15s "$TIMEOUT_S" \ xvfb-run -a \ env "${extra_env[@]}" \ bash "$ROOT/tools/run_webots.sh" "$n" "$mode" "$drive" "$world" \ 2>&1 ) || true # Webots-bin and Xvfb can survive the timeout; kill any orphans now. pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true sleep 1 local success="FAIL" local steps="timeout" if echo "$raw" | grep -q "\[dog\] all .* sheep penned at step"; then success="OK" steps=$(echo "$raw" | grep "\[dog\] all .* sheep penned at step" \ | grep -oP 'step \K[0-9]+') fi printf "%-12s %-14s %-12s %7s %7s %s\n" \ "$mode" "$drive" "$world" "$n" "$success" "$steps" | tee -a "$OUT" } # --------------------------------------------------------------------------- # Analytic baselines (differential only — that's the story context) # strombom / sequential: canonical baselines # universal: the actual teacher used to collect BC demos # --------------------------------------------------------------------------- for mode in strombom sequential universal; do for world in field field_round; do for n in 5 10; do run_trial "$mode" differential "$world" "$n" done done done # --------------------------------------------------------------------------- # BC — world-specific policies # --------------------------------------------------------------------------- for drive in differential mecanum; do for world in field field_round; do for n in 5 10; do run_trial bc "$drive" "$world" "$n" \ "training/runs/bc_${drive}_${world}" done done done # --------------------------------------------------------------------------- # RL_FAST — MODE=rl with explicit HERDING_POLICY_DIR pointing to rl_fast dirs # (run_webots.sh rejects "rl_fast" as a mode; "rl" + policy override is correct) # --------------------------------------------------------------------------- for drive in differential mecanum; do for world in field field_round; do for n in 5 10; do run_trial rl "$drive" "$world" "$n" \ "training/runs/rl_fast_${drive}_${world}" done done done echo "" echo "Sweep complete. Results saved to: $OUT"