#!/bin/bash # tools/auto_dagger.sh — automated DAgger collection across many headless # Webots runs. # # For each (flock_size, run_index) combination, generates a world with N # active sheep at randomised positions, launches Webots in fast/headless # mode, lets the controller log (lidar_obs, teacher_action) pairs for up # to RUN_SEC seconds, kills the run, and moves on. The dog controller's # 500-step periodic flush means each run produces a complete .npz even # when killed by timeout. # # Usage: # tools/auto_dagger.sh [RUNS_PER_FLOCK] [SECONDS_PER_RUN] # RUNS_PER_FLOCK : how many randomised runs per flock size (default 3) # SECONDS_PER_RUN: wall-clock cap per Webots run (default 60) # # Env-var overrides: # HERDING_POLICY_DIR : policy the controller loads (only used when # HERDING_DAGGER_DRIVER=student). Default bc. # HERDING_DAGGER_DRIVER : "teacher" (default) or "student". # HEADLESS=1 : force --no-rendering (default on). # FLOCKS="1 3 5 8 10" : space-separated flock sizes to iterate over. # # Output: # training/dagger/dagger_.npz — one per Webots run. # # After collection, run: # python -m tools.dagger_merge_train --out training/runs/bc_dagger set -e RUNS_PER_FLOCK=${1:-3} RUN_SEC=${2:-60} FLOCKS=${FLOCKS:-"1 3 5 8 10"} HEADLESS=${HEADLESS:-1} ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" SRC="$ROOT/worlds/field.wbt" DST="$ROOT/worlds/field_test.wbt" POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}" DRIVER="${HERDING_DAGGER_DRIVER:-teacher}" DONE_FILE="$ROOT/training/dagger/.DONE" WEBOTS_PID="" cleanup() { echo "Caught interrupt — killing Webots (pid=$WEBOTS_PID) and exiting." [[ -n "$WEBOTS_PID" ]] && kill "$WEBOTS_PID" 2>/dev/null wait "$WEBOTS_PID" 2>/dev/null || true exit 1 } trap cleanup INT TERM webots_args=(--mode=fast --batch --minimize) if [[ "$HEADLESS" == "1" ]]; then webots_args+=(--no-rendering) fi echo "Auto-dagger collection" echo " flock sizes : $FLOCKS" echo " runs per size : $RUNS_PER_FLOCK" echo " seconds per run : $RUN_SEC" echo " policy dir : $POLICY_DIR (used only when driver=student)" echo " driver : $DRIVER" echo " webots flags : ${webots_args[*]}" echo # Runtime config — re-written before each run anyway, but written once # here so a manual webots launch at the same time would also pick it up. cat > "$ROOT/herding_runtime.cfg" </dev/null | wc -l || echo 0) run_idx=0 total_runs=0 for f in $FLOCKS; do total_runs=$((total_runs + RUNS_PER_FLOCK)); done for flock in $FLOCKS; do for run in $(seq 1 "$RUNS_PER_FLOCK"); do run_idx=$((run_idx + 1)) seed=$((1000 * flock + run)) echo "=== [$run_idx/$total_runs] flock=$flock run=$run seed=$seed ===" # Generate randomised world. cp "$SRC" "$DST" for i in $(seq $((flock + 1)) 10); do sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST" done # Inline Python: jitter sheep1..flock translations. python3 - "$DST" "$flock" "$seed" <<'PYEOF' import re, random, sys path, n_str, seed = sys.argv[1], sys.argv[2], sys.argv[3] n = int(n_str); random.seed(int(seed)) with open(path) as f: txt = f.read() def rand_pos(): while True: x = random.uniform(-12.0, 12.0) y = random.uniform(-10.0, 12.0) # avoid the gate strip if x * x + y * y > 9.0: # at least 3 m from dog spawn return x, y for i in range(1, n + 1): x, y = rand_pos() pat = re.compile( r'Sheep \{ translation\s+\S+\s+\S+\s+(\S+)\s+name "sheep' + str(i) + r'"' ) txt = pat.sub(rf'Sheep {{ translation {x:.2f} {y:.2f} \g<1> name "sheep{i}"', txt, count=1) with open(path, "w") as f: f.write(txt) PYEOF # Run Webots in the background; poll for the .DONE sentinel or # the wall-clock timeout, whichever comes first. rm -f "$DONE_FILE" webots "${webots_args[@]}" "$DST" \ > /tmp/webots_dagger_run.log 2>&1 & WEBOTS_PID=$! # Give the controller 10 s to start before polling the sentinel, # otherwise a sheep that spawns already penned triggers an instant # false-positive kill. elapsed=0 grace=10 while kill -0 "$WEBOTS_PID" 2>/dev/null; do if (( elapsed >= grace )) && [[ -f "$DONE_FILE" ]]; then echo " sentinel .DONE detected — killing Webots early" kill "$WEBOTS_PID" 2>/dev/null wait "$WEBOTS_PID" 2>/dev/null || true break fi if (( elapsed >= RUN_SEC )); then echo " timeout ($RUN_SEC s) — killing Webots" kill "$WEBOTS_PID" 2>/dev/null wait "$WEBOTS_PID" 2>/dev/null || true break fi sleep 2 elapsed=$((elapsed + 2)) done WEBOTS_PID="" # Quick sanity from the log: did the controller actually run? if grep -q "running in mode=dagger" /tmp/webots_dagger_run.log; then new_pairs=$(tail -50 /tmp/webots_dagger_run.log | grep -oE 'logged=[0-9]+' | tail -1) echo " controller ran ($new_pairs)" else echo " WARNING: controller may not have started (see /tmp/webots_dagger_run.log)" fi done done after_count=$(ls -1 "$ROOT/training/dagger"/dagger_*.npz 2>/dev/null | wc -l || echo 0) new_files=$((after_count - before_count)) echo echo "Done." echo " new dagger files : $new_files" echo " total in dir : $after_count" echo echo "Next:" echo " python -m tools.dagger_merge_train --out training/runs/bc_dagger"