Checkpoint 6

2026-05-11 10:35:48 +01:00
parent b457155538
commit fce0e0c786
27 changed files with 194 additions and 704 deletions
@@ -7,19 +7,17 @@
 # Usage:
 #   tools/run_webots.sh [N] [MODE]
 #     N    : number of active sheep (1..10), default 10
-#     MODE : "bc" | "rl" | "strombom" | "sequential" | "dagger", default "bc"
+#     MODE : "bc" | "rl" | "strombom" | "sequential", default "bc"
 #
 # Examples:
-#   tools/run_webots.sh 10 bc         # BC-trained policy, 10 sheep
+#   tools/run_webots.sh 10 bc         # behaviour-cloned MLP, 10 sheep
 #   tools/run_webots.sh 10 rl         # KL-PPO fine-tune of bc, 10 sheep
-#   tools/run_webots.sh 5 sequential  # the analytic teacher, 5 sheep
-#   tools/run_webots.sh 3 strombom    # canonical baseline, 3 sheep
+#   tools/run_webots.sh 5 sequential  # single-target analytic baseline
+#   tools/run_webots.sh 3 strombom    # canonical Strömbom analytic
 #
 # Notes:
-# * The RL mode loads the latest BC policy by default — priority
-#   the BC policy (bc/policy.zip) (the controller resolves it).
-#   (LiDAR-perception, frame-stack K=4). Override via
-#   HERDING_POLICY_DIR=/path/to/run env var.
+# * bc loads training/runs/bc/policy.zip, rl loads training/runs/rl.
+#   Override via HERDING_POLICY_DIR=/path/to/run env var.
 # * Conda env "tir" must be active (provides stable-baselines3 + torch).

 set -e
@@ -30,10 +28,9 @@ if (( N < 1 || N > 10 )); then
    echo "N must be 1..10, got $N" >&2; exit 1
 fi
 case "$MODE" in
-    bc|rl|strombom|sequential|dagger) ;;
-    *) echo "MODE must be bc|rl|strombom|sequential|dagger, got '$MODE'" >&2; exit 1 ;;
+    bc|rl|strombom|sequential) ;;
+    *) echo "MODE must be bc|rl|strombom|sequential, got '$MODE'" >&2; exit 1 ;;
 esac
-DAGGER_DRIVER=${HERDING_DAGGER_DRIVER:-teacher}

 ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
 SRC="$ROOT/worlds/field.wbt"
@@ -59,7 +56,6 @@ RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc}"
 cat > "$ROOT/herding_runtime.cfg" <<EOF
 HERDING_MODE=$MODE
 HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
-HERDING_DAGGER_DRIVER=$DAGGER_DRIVER
 EOF

 export HERDING_MODE="$MODE"
@@ -68,7 +64,7 @@ export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
 # The controller writes this sentinel when all GT sheep are penned. We
 # poll for it and kill Webots so the run finishes cleanly instead of
 # idling for minutes after the task is done.
-DONE_FILE="$ROOT/training/dagger/.DONE"
+DONE_FILE="$ROOT/training/.run_done"
 mkdir -p "$(dirname "$DONE_FILE")"
 rm -f "$DONE_FILE"