Checkpoint 5 - incomplete

This commit is contained in:
Johnny Fernandes
2026-05-11 10:35:39 +01:00
parent 6688325d89
commit b457155538
13 changed files with 174 additions and 74 deletions
+2 -2
View File
@@ -16,7 +16,7 @@
#
# Env-var overrides:
# HERDING_POLICY_DIR : policy the controller loads (only used when
# HERDING_DAGGER_DRIVER=student). Default bc_v3.
# HERDING_DAGGER_DRIVER=student). Default bc.
# HERDING_DAGGER_DRIVER : "teacher" (default) or "student".
# HEADLESS=1 : force --no-rendering (default on).
# FLOCKS="1 3 5 8 10" : space-separated flock sizes to iterate over.
@@ -37,7 +37,7 @@ HEADLESS=${HEADLESS:-1}
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
SRC="$ROOT/worlds/field.wbt"
DST="$ROOT/worlds/field_test.wbt"
POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_v3}"
POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
DRIVER="${HERDING_DAGGER_DRIVER:-teacher}"
DONE_FILE="$ROOT/training/dagger/.DONE"
WEBOTS_PID=""
+3 -3
View File
@@ -10,7 +10,7 @@ where:
* ``actions`` is the **active-scan-teacher action computed from
ground-truth sheep positions** (read off the sheep emitter).
Combined with the existing sim demos (``training/demos_v3.npz`` by
Combined with the existing sim demos (``training/demos.npz`` by
default), this gives the BC student a training set that includes the
real Webots false-positive distribution — closing the sim-to-real
perception gap that the all-sim pipeline couldn't bridge.
@@ -19,7 +19,7 @@ Usage::
# Iteration 1 — merge all dagger files with sim demos, retrain
python -m tools.dagger_merge_train \\
--sim training/demos_v3.npz \\
--sim training/demos.npz \\
--out training/runs/bc_dagger1
# Iteration 2 — drop the sim baseline, train only on Webots data
@@ -48,7 +48,7 @@ import numpy as np
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--sim", default="training/demos_v3.npz",
parser.add_argument("--sim", default="training/demos.npz",
help="Sim demo file to mix with the Webots data. "
"Pass --no-sim to train only on dagger data.")
parser.add_argument("--no-sim", action="store_true",
+32 -4
View File
@@ -17,7 +17,7 @@
#
# Notes:
# * The RL mode loads the latest BC policy by default — priority
# bc_dagger_v2 → bc_dagger → bc_c2v3 (the controller resolves it).
# the BC policy (bc/policy.zip) (the controller resolves it).
# (LiDAR-perception, frame-stack K=4). Override via
# HERDING_POLICY_DIR=/path/to/run env var.
# * Conda env "tir" must be active (provides stable-baselines3 + torch).
@@ -50,12 +50,12 @@ echo "------------------------------------------------------------"
echo "World : $DST"
echo "Mode : $MODE"
echo "Sheep : $active active"
echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_v3}"
echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
echo "------------------------------------------------------------"
# Webots strips HERDING_* env vars from controller subprocesses in some
# setups, so we also write a runtime config file the controller reads.
RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_v3}"
RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
cat > "$ROOT/herding_runtime.cfg" <<EOF
HERDING_MODE=$MODE
HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
@@ -65,4 +65,32 @@ EOF
export HERDING_MODE="$MODE"
export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
exec webots "$DST"
# The controller writes this sentinel when all GT sheep are penned. We
# poll for it and kill Webots so the run finishes cleanly instead of
# idling for minutes after the task is done.
DONE_FILE="$ROOT/training/dagger/.DONE"
mkdir -p "$(dirname "$DONE_FILE")"
rm -f "$DONE_FILE"
webots "$DST" &
WEBOTS_PID=$!
cleanup() {
kill "$WEBOTS_PID" 2>/dev/null || true
wait "$WEBOTS_PID" 2>/dev/null || true
exit 0
}
trap cleanup INT TERM
# Poll for the sentinel; bail when Webots exits on its own or when the
# user closes the window.
while kill -0 "$WEBOTS_PID" 2>/dev/null; do
if [[ -f "$DONE_FILE" ]]; then
echo "[run_webots] all sheep penned — closing Webots"
sleep 1 # let the controller print its line
kill "$WEBOTS_PID" 2>/dev/null || true
break
fi
sleep 1
done
wait "$WEBOTS_PID" 2>/dev/null || true