Checkpoint 5 - incomplete
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
#
|
||||
# Env-var overrides:
|
||||
# HERDING_POLICY_DIR : policy the controller loads (only used when
|
||||
# HERDING_DAGGER_DRIVER=student). Default bc_v3.
|
||||
# HERDING_DAGGER_DRIVER=student). Default bc.
|
||||
# HERDING_DAGGER_DRIVER : "teacher" (default) or "student".
|
||||
# HEADLESS=1 : force --no-rendering (default on).
|
||||
# FLOCKS="1 3 5 8 10" : space-separated flock sizes to iterate over.
|
||||
@@ -37,7 +37,7 @@ HEADLESS=${HEADLESS:-1}
|
||||
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||
SRC="$ROOT/worlds/field.wbt"
|
||||
DST="$ROOT/worlds/field_test.wbt"
|
||||
POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_v3}"
|
||||
POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
|
||||
DRIVER="${HERDING_DAGGER_DRIVER:-teacher}"
|
||||
DONE_FILE="$ROOT/training/dagger/.DONE"
|
||||
WEBOTS_PID=""
|
||||
|
||||
@@ -10,7 +10,7 @@ where:
|
||||
* ``actions`` is the **active-scan-teacher action computed from
|
||||
ground-truth sheep positions** (read off the sheep emitter).
|
||||
|
||||
Combined with the existing sim demos (``training/demos_v3.npz`` by
|
||||
Combined with the existing sim demos (``training/demos.npz`` by
|
||||
default), this gives the BC student a training set that includes the
|
||||
real Webots false-positive distribution — closing the sim-to-real
|
||||
perception gap that the all-sim pipeline couldn't bridge.
|
||||
@@ -19,7 +19,7 @@ Usage::
|
||||
|
||||
# Iteration 1 — merge all dagger files with sim demos, retrain
|
||||
python -m tools.dagger_merge_train \\
|
||||
--sim training/demos_v3.npz \\
|
||||
--sim training/demos.npz \\
|
||||
--out training/runs/bc_dagger1
|
||||
|
||||
# Iteration 2 — drop the sim baseline, train only on Webots data
|
||||
@@ -48,7 +48,7 @@ import numpy as np
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--sim", default="training/demos_v3.npz",
|
||||
parser.add_argument("--sim", default="training/demos.npz",
|
||||
help="Sim demo file to mix with the Webots data. "
|
||||
"Pass --no-sim to train only on dagger data.")
|
||||
parser.add_argument("--no-sim", action="store_true",
|
||||
|
||||
+32
-4
@@ -17,7 +17,7 @@
|
||||
#
|
||||
# Notes:
|
||||
# * The RL mode loads the latest BC policy by default — priority
|
||||
# bc_dagger_v2 → bc_dagger → bc_c2v3 (the controller resolves it).
|
||||
# the BC policy (bc/policy.zip) (the controller resolves it).
|
||||
# (LiDAR-perception, frame-stack K=4). Override via
|
||||
# HERDING_POLICY_DIR=/path/to/run env var.
|
||||
# * Conda env "tir" must be active (provides stable-baselines3 + torch).
|
||||
@@ -50,12 +50,12 @@ echo "------------------------------------------------------------"
|
||||
echo "World : $DST"
|
||||
echo "Mode : $MODE"
|
||||
echo "Sheep : $active active"
|
||||
echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_v3}"
|
||||
echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
|
||||
echo "------------------------------------------------------------"
|
||||
|
||||
# Webots strips HERDING_* env vars from controller subprocesses in some
|
||||
# setups, so we also write a runtime config file the controller reads.
|
||||
RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_v3}"
|
||||
RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
|
||||
cat > "$ROOT/herding_runtime.cfg" <<EOF
|
||||
HERDING_MODE=$MODE
|
||||
HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
|
||||
@@ -65,4 +65,32 @@ EOF
|
||||
export HERDING_MODE="$MODE"
|
||||
export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
|
||||
|
||||
exec webots "$DST"
|
||||
# The controller writes this sentinel when all GT sheep are penned. We
|
||||
# poll for it and kill Webots so the run finishes cleanly instead of
|
||||
# idling for minutes after the task is done.
|
||||
DONE_FILE="$ROOT/training/dagger/.DONE"
|
||||
mkdir -p "$(dirname "$DONE_FILE")"
|
||||
rm -f "$DONE_FILE"
|
||||
|
||||
webots "$DST" &
|
||||
WEBOTS_PID=$!
|
||||
|
||||
cleanup() {
|
||||
kill "$WEBOTS_PID" 2>/dev/null || true
|
||||
wait "$WEBOTS_PID" 2>/dev/null || true
|
||||
exit 0
|
||||
}
|
||||
trap cleanup INT TERM
|
||||
|
||||
# Poll for the sentinel; bail when Webots exits on its own or when the
|
||||
# user closes the window.
|
||||
while kill -0 "$WEBOTS_PID" 2>/dev/null; do
|
||||
if [[ -f "$DONE_FILE" ]]; then
|
||||
echo "[run_webots] all sheep penned — closing Webots"
|
||||
sleep 1 # let the controller print its line
|
||||
kill "$WEBOTS_PID" 2>/dev/null || true
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
wait "$WEBOTS_PID" 2>/dev/null || true
|
||||
|
||||
Reference in New Issue
Block a user