167 lines
5.8 KiB
Bash
Executable File
167 lines
5.8 KiB
Bash
Executable File
#!/bin/bash
|
|
# tools/auto_dagger.sh — automated DAgger collection across many headless
|
|
# Webots runs.
|
|
#
|
|
# For each (flock_size, run_index) combination, generates a world with N
|
|
# active sheep at randomised positions, launches Webots in fast/headless
|
|
# mode, lets the controller log (lidar_obs, teacher_action) pairs for up
|
|
# to RUN_SEC seconds, kills the run, and moves on. The dog controller's
|
|
# 500-step periodic flush means each run produces a complete .npz even
|
|
# when killed by timeout.
|
|
#
|
|
# Usage:
|
|
# tools/auto_dagger.sh [RUNS_PER_FLOCK] [SECONDS_PER_RUN]
|
|
# RUNS_PER_FLOCK : how many randomised runs per flock size (default 3)
|
|
# SECONDS_PER_RUN: wall-clock cap per Webots run (default 60)
|
|
#
|
|
# Env-var overrides:
|
|
# HERDING_POLICY_DIR : policy the controller loads (only used when
|
|
# HERDING_DAGGER_DRIVER=student). Default bc_v3.
|
|
# HERDING_DAGGER_DRIVER : "teacher" (default) or "student".
|
|
# HEADLESS=1 : force --no-rendering (default on).
|
|
# FLOCKS="1 3 5 8 10" : space-separated flock sizes to iterate over.
|
|
#
|
|
# Output:
|
|
# training/dagger/dagger_<ts>.npz — one per Webots run.
|
|
#
|
|
# After collection, run:
|
|
# python -m tools.dagger_merge_train --out training/runs/bc_dagger
|
|
|
|
set -e
|
|
|
|
RUNS_PER_FLOCK=${1:-3}
|
|
RUN_SEC=${2:-60}
|
|
FLOCKS=${FLOCKS:-"1 3 5 8 10"}
|
|
HEADLESS=${HEADLESS:-1}
|
|
|
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
|
SRC="$ROOT/worlds/field.wbt"
|
|
DST="$ROOT/worlds/field_test.wbt"
|
|
POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_v3}"
|
|
DRIVER="${HERDING_DAGGER_DRIVER:-teacher}"
|
|
DONE_FILE="$ROOT/training/dagger/.DONE"
|
|
WEBOTS_PID=""
|
|
|
|
cleanup() {
|
|
echo "Caught interrupt — killing Webots (pid=$WEBOTS_PID) and exiting."
|
|
[[ -n "$WEBOTS_PID" ]] && kill "$WEBOTS_PID" 2>/dev/null
|
|
wait "$WEBOTS_PID" 2>/dev/null || true
|
|
exit 1
|
|
}
|
|
trap cleanup INT TERM
|
|
|
|
webots_args=(--mode=fast --batch --minimize)
|
|
if [[ "$HEADLESS" == "1" ]]; then
|
|
webots_args+=(--no-rendering)
|
|
fi
|
|
|
|
echo "Auto-dagger collection"
|
|
echo " flock sizes : $FLOCKS"
|
|
echo " runs per size : $RUNS_PER_FLOCK"
|
|
echo " seconds per run : $RUN_SEC"
|
|
echo " policy dir : $POLICY_DIR (used only when driver=student)"
|
|
echo " driver : $DRIVER"
|
|
echo " webots flags : ${webots_args[*]}"
|
|
echo
|
|
|
|
# Runtime config — re-written before each run anyway, but written once
|
|
# here so a manual webots launch at the same time would also pick it up.
|
|
cat > "$ROOT/herding_runtime.cfg" <<EOF
|
|
HERDING_MODE=dagger
|
|
HERDING_POLICY_DIR=$POLICY_DIR
|
|
HERDING_DAGGER_DRIVER=$DRIVER
|
|
EOF
|
|
|
|
# Count files before, so we can summarise what was added.
|
|
mkdir -p "$ROOT/training/dagger"
|
|
before_count=$(ls -1 "$ROOT/training/dagger"/dagger_*.npz 2>/dev/null | wc -l || echo 0)
|
|
|
|
run_idx=0
|
|
total_runs=0
|
|
for f in $FLOCKS; do total_runs=$((total_runs + RUNS_PER_FLOCK)); done
|
|
|
|
for flock in $FLOCKS; do
|
|
for run in $(seq 1 "$RUNS_PER_FLOCK"); do
|
|
run_idx=$((run_idx + 1))
|
|
seed=$((1000 * flock + run))
|
|
echo "=== [$run_idx/$total_runs] flock=$flock run=$run seed=$seed ==="
|
|
|
|
# Generate randomised world.
|
|
cp "$SRC" "$DST"
|
|
for i in $(seq $((flock + 1)) 10); do
|
|
sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST"
|
|
done
|
|
# Inline Python: jitter sheep1..flock translations.
|
|
python3 - "$DST" "$flock" "$seed" <<'PYEOF'
|
|
import re, random, sys
|
|
path, n_str, seed = sys.argv[1], sys.argv[2], sys.argv[3]
|
|
n = int(n_str); random.seed(int(seed))
|
|
with open(path) as f:
|
|
txt = f.read()
|
|
def rand_pos():
|
|
while True:
|
|
x = random.uniform(-12.0, 12.0)
|
|
y = random.uniform(-10.0, 12.0) # avoid the gate strip
|
|
if x * x + y * y > 9.0: # at least 3 m from dog spawn
|
|
return x, y
|
|
for i in range(1, n + 1):
|
|
x, y = rand_pos()
|
|
pat = re.compile(
|
|
r'Sheep \{ translation\s+\S+\s+\S+\s+(\S+)\s+name "sheep' + str(i) + r'"'
|
|
)
|
|
txt = pat.sub(rf'Sheep {{ translation {x:.2f} {y:.2f} \g<1> name "sheep{i}"', txt, count=1)
|
|
with open(path, "w") as f:
|
|
f.write(txt)
|
|
PYEOF
|
|
|
|
# Run Webots in the background; poll for the .DONE sentinel or
|
|
# the wall-clock timeout, whichever comes first.
|
|
rm -f "$DONE_FILE"
|
|
webots "${webots_args[@]}" "$DST" \
|
|
> /tmp/webots_dagger_run.log 2>&1 &
|
|
WEBOTS_PID=$!
|
|
|
|
# Give the controller 10 s to start before polling the sentinel,
|
|
# otherwise a sheep that spawns already penned triggers an instant
|
|
# false-positive kill.
|
|
elapsed=0
|
|
grace=10
|
|
while kill -0 "$WEBOTS_PID" 2>/dev/null; do
|
|
if (( elapsed >= grace )) && [[ -f "$DONE_FILE" ]]; then
|
|
echo " sentinel .DONE detected — killing Webots early"
|
|
kill "$WEBOTS_PID" 2>/dev/null
|
|
wait "$WEBOTS_PID" 2>/dev/null || true
|
|
break
|
|
fi
|
|
if (( elapsed >= RUN_SEC )); then
|
|
echo " timeout ($RUN_SEC s) — killing Webots"
|
|
kill "$WEBOTS_PID" 2>/dev/null
|
|
wait "$WEBOTS_PID" 2>/dev/null || true
|
|
break
|
|
fi
|
|
sleep 2
|
|
elapsed=$((elapsed + 2))
|
|
done
|
|
WEBOTS_PID=""
|
|
|
|
# Quick sanity from the log: did the controller actually run?
|
|
if grep -q "running in mode=dagger" /tmp/webots_dagger_run.log; then
|
|
new_pairs=$(tail -50 /tmp/webots_dagger_run.log | grep -oE 'logged=[0-9]+' | tail -1)
|
|
echo " controller ran ($new_pairs)"
|
|
else
|
|
echo " WARNING: controller may not have started (see /tmp/webots_dagger_run.log)"
|
|
fi
|
|
done
|
|
done
|
|
|
|
after_count=$(ls -1 "$ROOT/training/dagger"/dagger_*.npz 2>/dev/null | wc -l || echo 0)
|
|
new_files=$((after_count - before_count))
|
|
|
|
echo
|
|
echo "Done."
|
|
echo " new dagger files : $new_files"
|
|
echo " total in dir : $after_count"
|
|
echo
|
|
echo "Next:"
|
|
echo " python -m tools.dagger_merge_train --out training/runs/bc_dagger"
|