Checkpoint 6

2026-05-11 10:35:48 +01:00
parent b457155538
commit fce0e0c786
27 changed files with 194 additions and 704 deletions
@@ -11,14 +11,14 @@ _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
 if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

-from herding.flocking_sim import (  # noqa: F401
+from herding.world.flocking_sim import (  # noqa: F401
    MAX_SPEED, FLEE_SPEED, WANDER_SPEED,
    WALL_MARGIN, WALL_HARD_MARGIN, WALL_HARD_GAIN,
    FLEE_DIST, SEPARATION_DIST, COHESION_DIST,
    PEN_MARGIN,
    compute_heading_speed,
 )
-from herding.geometry import (  # noqa: F401
+from herding.world.geometry import (  # noqa: F401
    FIELD_X, FIELD_Y, PEN_X, PEN_Y,
    in_pen,
 )
@@ -24,9 +24,9 @@ if _PROJECT_ROOT not in sys.path:

 from controller import Supervisor

-from herding.diffdrive import heading_speed_to_wheels
-from herding.flocking_sim import MAX_SPEED, compute_heading_speed
-from herding.geometry import (
+from herding.world.diffdrive import heading_speed_to_wheels
+from herding.world.flocking_sim import MAX_SPEED, compute_heading_speed
+from herding.world.geometry import (
    SHEEP_MAX_WHEEL_OMEGA,
    is_penned_position,
 )
@@ -4,52 +4,39 @@ Mode is selected by ``HERDING_MODE`` (env var, or via the
 ``herding_runtime.cfg`` file the launcher writes since Webots strips
 env vars on some setups):

-    strombom    → canonical Strömbom collect/drive heuristic.
-    sequential  → single-target "pin and push" — drives the sheep
-                  closest to the pen.
-    bc          → behaviour-cloned MLP, trained on Strömbom demos via
-                  sim. Default policy directory: training/runs/bc.
-    rl          → KL-regularised PPO fine-tune of the BC policy. Same
-                  obs/action space as bc; refines time-to-pen via
-                  environment reward while staying anchored to bc.
-                  Default policy directory: training/runs/rl.
-    dagger      → DAgger data collection. Reads sheep ground-truth
-                  via the receiver, computes the active-scan teacher's
-                  recommended action at every step, drives with either
-                  the teacher (HERDING_DAGGER_DRIVER=teacher, default)
-                  or the loaded student (=student), and logs each
-                  (lidar_stacked_obs, teacher_action) pair. On exit
-                  dumps to ``training/dagger/dagger_<ts>.npz`` for
-                  ``tools.dagger_merge_train`` to consume.
+    strombom    → canonical Strömbom (2014) collect/drive heuristic
+                  wrapped in ActiveScanTeacher (opening rotation +
+                  walk-to-centre when the tracker briefly empties).
+    sequential  → single-target "pin-and-push", same wrapper.
+    bc          → behaviour-cloned MLP, trained on Strömbom demos.
+                  Default policy: training/runs/bc/policy.zip.
+    rl          → KL-regularised PPO fine-tune of bc. Same obs/action
+                  space as bc; refines time-to-pen via reward while
+                  staying anchored to bc.
+                  Default policy: training/runs/rl/policy.zip.

 Sheep perception
 ----------------
-The dog now perceives sheep through its **front-mounted 140° LiDAR**
-(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step
-the controller:
+The dog perceives sheep through its **front-mounted 140° LiDAR**
+(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step:

    1. Reads ``lidar.getRangeImage()``.
-    2. Runs ``herding.lidar_perception.detections_from_scan`` to cluster
-       returns into world-frame ``(x, y)`` sheep estimates.
-    3. Folds those into a ``herding.sheep_tracker.SheepTracker`` which
-       maintains last-seen positions for sheep currently out of the
-       FOV and latches "penned" once a track disappears near the gate.
+    2. Runs ``herding.perception.lidar_perception.detections_from_scan``
+       to cluster returns into world-frame ``(x, y)`` sheep estimates.
+    3. Folds those into a ``SheepTracker`` which maintains last-seen
+       positions for sheep currently out of FOV and latches "penned"
+       once a track crosses the gate plane south.

-The output of step 3 is a ``{name: (x, y)}`` dict shaped exactly like
-the receiver-based one we used to consume — so Strömbom, Sequential
-and the BC obs builder run unchanged. The sheep→dog Emitter/Receiver
-link is still up (kept passively for compatibility) but its messages
-are *not* used for control.
+Sheep ``emitter`` messages are read **for diagnostic logging only**
+(GT_penned counter + auto-finish sentinel); they are never used to
+drive the policy. Perception for control comes entirely from LiDAR.

-All modes share the same low-level differential-drive controller
-(``herding.diffdrive.velocity_to_wheels`` with cos(err)-clamped forward
-speed), so switching modes does not retune actuation.
-
-A safety supervisor enforces the "dog stays out of the pen" invariant:
-if the action would push the dog past ``DOG_SOUTH_LIMIT`` it is
-overridden with a north-driving correction. RL fallback: if the policy
-zip can't be loaded (SB3 missing, file missing), the controller drops
-to strombom mode automatically.
+Auto-finish
+-----------
+When the dog observes (via GT, read off the receiver) that all sheep
+are penned, it writes ``training/.run_done`` and the launcher
+(``tools/run_webots.sh``) detects it and closes Webots. This keeps
+batch evaluation runs bounded.
 """

 import math
@@ -62,26 +49,27 @@ _PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
 if _PROJECT_ROOT not in sys.path:
    sys.path.insert(0, _PROJECT_ROOT)

+import numpy as np
+
 from controller import Robot

-from herding.active_scan import ActiveScanTeacher
-from herding.control import modulate_speed_near_sheep
-from herding.diffdrive import velocity_to_wheels
-from herding.geometry import (
+from herding.control.active_scan import ActiveScanTeacher
+from herding.control.modulation import modulate_speed_near_sheep
+from herding.control.sequential import compute_action as sequential_action
+from herding.control.strombom import compute_action as strombom_action
+from herding.obs import build_obs
+from herding.perception.lidar_perception import detections_from_scan
+from herding.perception.sheep_tracker import SheepTracker
+from herding.world.diffdrive import velocity_to_wheels
+from herding.world.geometry import (
    DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
    DOG_SOUTH_LIMIT, DOG_WHEEL_RADIUS,
    PEN_ENTRY, is_penned_position,
 )
-from herding.lidar_perception import detections_from_scan
-from herding.obs import OBS_DIM, build_obs
-from herding.sequential import compute_action_debug as sequential_action_debug
-from herding.sheep_tracker import SheepTracker
-from herding.strombom import compute_action as strombom_action
-from herding.strombom import compute_action_debug as strombom_action_debug


 # ---------------------------------------------------------------------------
-# Mode selection
+# Mode + policy resolution
 # ---------------------------------------------------------------------------

 def _load_runtime_config():
@@ -122,8 +110,8 @@ def _resolve_policy_dir(mode: str) -> str:
      1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
         to a real directory.
      2. Mode-specific default:
-            bc → training/runs/bc (Strömbom-imitated MLP)
-            rl → training/runs/rl (KL-PPO fine-tune of bc)
+            bc → training/runs/bc  (Strömbom-imitated MLP)
+            rl → training/runs/rl  (KL-PPO fine-tune of bc)
      3. Fall back to bc.
    All checkpoints are frame-stacked K = 4; ``policy_loader`` reads
    the stacking factor from the policy's observation space.
@@ -135,60 +123,41 @@ def _resolve_policy_dir(mode: str) -> str:
    mode_default = {
        "bc": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
        "rl": os.path.join(_PROJECT_ROOT, "training", "runs", "rl"),
-        "dagger": os.path.join(_PROJECT_ROOT, "training", "runs", "bc"),
    }
    primary = mode_default.get(mode, mode_default["bc"])
    if os.path.isdir(primary):
        return primary
-    # Fall back to BC if the requested checkpoint isn't there yet
-    # (e.g., user asked for `rl` before training the fine-tune).
    fallback = mode_default["bc"]
    if os.path.isdir(fallback):
        return fallback
    return env_dir or primary


-_VALID_MODES = ("bc", "rl", "strombom", "sequential", "dagger", "diag")
-# Back-compat: an old config saying HERDING_MODE=rl meant "the BC policy".
-# We now use `rl` strictly for the KL-PPO fine-tune. If the rl
-# directory isn't present, _resolve_policy_dir below silently falls
-# back to bc, preserving the old behaviour.
+_VALID_MODES = ("bc", "rl", "strombom", "sequential")
 if MODE not in _VALID_MODES:
    print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
    MODE = "strombom"

-DAGGER_DRIVER = (os.environ.get("HERDING_DAGGER_DRIVER")
-                 or _runtime_cfg.get("HERDING_DAGGER_DRIVER")
-                 or "teacher").lower()
-if DAGGER_DRIVER not in ("teacher", "student"):
-    DAGGER_DRIVER = "teacher"
-
 POLICY_DIR = _resolve_policy_dir(MODE)
 policy_handle = None
-if MODE in ("bc", "rl", "dagger"):
+if MODE in ("bc", "rl"):
    print(f"[dog] resolved POLICY_DIR={POLICY_DIR}  exists={os.path.isdir(POLICY_DIR)}")
    try:
        from policy_loader import load as _load_policy
        policy_handle = _load_policy(POLICY_DIR)
        print(f"[dog] policy loaded from {POLICY_DIR}")
    except Exception as exc:
-        if MODE in ("bc", "rl"):
-            print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
-            MODE = "strombom"
-        else:
-            # In dagger mode, no policy is fine if driver=teacher.
-            print(f"[dog] policy load failed ({exc!r}); dagger driver forced to teacher.")
-            policy_handle = None
-print(f"[dog] running in mode={MODE}"
-      + (f" driver={DAGGER_DRIVER}" if MODE == "dagger" else ""))
+        print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
+        MODE = "strombom"
+print(f"[dog] running in mode={MODE}")


 # ---------------------------------------------------------------------------
-# Action smoothing + safety supervisor
+# Control parameters
 # ---------------------------------------------------------------------------

-ACTION_SMOOTH = 0.55  # was 0.35; bumped for less frame-to-frame action jitter
-prev_action = (0.0, 0.0)
+ACTION_SMOOTH = 0.55           # EMA on (vx, vy) — kills frame-to-frame jitter
+RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")


 def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
@@ -202,10 +171,6 @@ def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
    return (vx, vy)


-# ---------------------------------------------------------------------------
-# Driving
-# ---------------------------------------------------------------------------
-
 def drive(vx: float, vy: float, left_motor, right_motor, compass, motor_max: float):
    if math.hypot(vx, vy) < 1e-3:
        left_motor.setVelocity(0.0)
@@ -245,12 +210,9 @@ receiver = robot.getDevice("receiver"); receiver.enable(timestep)
 emitter = robot.getDevice("emitter")
 lidar = robot.getDevice("lidar");       lidar.enable(timestep)

-# The receiver channel from sheep is no longer consumed for perception
-# (kept enabled in case any peripheral tooling reads it). Sheep
-# positions come exclusively from the LiDAR + tracker pipeline below.
 tracker = SheepTracker()

-# Cosmetic ear motors — ignored by control logic but keep them animated.
+# Cosmetic ear motors — animated; not used by control.
 left_ear = robot.getDevice("left ear motor")
 right_ear = robot.getDevice("right ear motor")
 left_ear.setPosition(float("inf"))
@@ -266,75 +228,26 @@ EAR_RATE = 8.0
 # Main loop
 # ---------------------------------------------------------------------------

-# Active sheep positions come from the LiDAR-fed tracker each step;
-# penned_set is the tracker's ``get_penned_set()`` call. We drain the
-# receiver queue without consuming it, so the small backlog of sheep
-# pings can't grow unbounded.
-step_count = 0
+# Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay
+# the import-time cost). Each gets the same ActiveScanTeacher treatment:
+# rotate-on-empty, walk-to-centre, near-sheep speed modulation.
+analytic_teacher = None
+if MODE in ("strombom", "sequential"):
+    base_fn = strombom_action if MODE == "strombom" else sequential_action
+    analytic_teacher = ActiveScanTeacher(base_fn)

-import atexit
-import time
-import numpy as _np
-
-# DAgger state ----------------------------------------------------------
-# Logged each step in dagger mode: (stacked_lidar_obs, teacher_action).
-DAGGER_LOG_OBS: list = []
-DAGGER_LOG_ACT: list = []
-# Diagnostic mode buffer (one dict per step).
-DIAG_BUF: list = []
-# Frame stack buffer the controller maintains itself when dagger mode is
-# active — the stacked obs we log must match what the policy sees so the
-# downstream BC consumes (stacked_obs, teacher_action) pairs cleanly.
-_FRAME_STACK = (policy_handle.frame_stack if policy_handle is not None else 4)
-_dagger_buffer: list = []
-# Active-scan teacher operates on GT (read from receiver).
-_dagger_teacher = ActiveScanTeacher(strombom_action) if MODE == "dagger" else None
-# GT positions accumulated from the receiver (sheep emit their xy each step).
+# GT positions from sheep emitters — used **only** for the auto-finish
+# sentinel and the GT_penned diagnostic line. Never fed into control.
 _gt_sheep: dict = {}
+_run_done = False

-
-_DAGGER_RUN_TS = int(time.time())  # one file per controller run
-_DAGGER_DUMPED = False
-# Sentinel that the auto-collection script polls — empty file written
-# when this controller decides the run is "done" (all sheep penned, by
-# GT). The launcher then kills Webots and moves on without waiting out
-# its timeout. Honoured only in dagger mode.
-_DAGGER_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", "dagger", ".DONE")
-
-
-def _dump_dagger_log():
-    """Save accumulated (obs, teacher_action) pairs to disk on exit.
-
-    Webots may SIGKILL the controller, so the loop also calls this every
-    DAGGER_FLUSH_STEPS so we lose at most a few seconds of data per run.
-    Idempotent — repeated calls overwrite the same file with the latest
-    accumulated buffer.
-    """
-    global _DAGGER_DUMPED
-    if MODE != "dagger" or not DAGGER_LOG_OBS:
-        return
-    out_dir = os.path.join(_PROJECT_ROOT, "training", "dagger")
-    os.makedirs(out_dir, exist_ok=True)
-    out_path = os.path.join(out_dir, f"dagger_{_DAGGER_RUN_TS}.npz")
-    obs_arr = _np.stack(DAGGER_LOG_OBS).astype(_np.float32)
-    act_arr = _np.stack(DAGGER_LOG_ACT).astype(_np.float32)
-    _np.savez(out_path, obs=obs_arr, actions=act_arr)
-    if not _DAGGER_DUMPED:
-        print(f"[dog dagger] wrote {len(DAGGER_LOG_OBS)} pairs → {out_path}")
-        _DAGGER_DUMPED = True
-
-
-DAGGER_FLUSH_STEPS = 500
-
-
-atexit.register(_dump_dagger_log)
-
+prev_action = (0.0, 0.0)
+step_count = 0

 while robot.step(timestep) != -1:
    step_count += 1

-    # Drain receiver. In every mode we capture GT for the diagnostic
-    # log line — perception still comes from LiDAR, the GT is read-only.
+    # Drain sheep emitter messages → GT (diagnostic only).
    while receiver.getQueueLength() > 0:
        msg = receiver.getString()
        receiver.nextPacket()
@@ -350,115 +263,28 @@ while robot.step(timestep) != -1:
    n = compass.getValues()
    dog_heading = math.atan2(n[0], n[1])

-    # ---- LiDAR perception → tracker → sheep_positions dict ----
-    ranges = _np.asarray(lidar.getRangeImage(), dtype=_np.float32)
+    # ---- LiDAR perception → tracker → active sheep positions ----
+    ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
    detections = detections_from_scan(ranges, dog_xy[0], dog_xy[1], dog_heading)
    sheep_positions = tracker.update(detections)
-    penned_set = tracker.get_penned_set()

-    # ---- Diagnostic mode: dump the first DIAG_STEPS scans + GT to disk.
-    if MODE == "diag":
-        DIAG_STEPS = 80
-        if step_count <= DIAG_STEPS:
-            DIAG_BUF.append(dict(
-                step=step_count,
-                ranges=ranges.copy(),
-                dog_x=dog_xy[0], dog_y=dog_xy[1], dog_h=dog_heading,
-                gt_sheep=dict(_gt_sheep),
-                detections=list(detections),
-            ))
-            if step_count == DIAG_STEPS:
-                _diag_path = os.path.join(_PROJECT_ROOT, "training", "dagger",
-                                          f"diag_{int(time.time())}.npz")
-                os.makedirs(os.path.dirname(_diag_path), exist_ok=True)
-                _np.savez(
-                    _diag_path,
-                    ranges=_np.stack([d["ranges"] for d in DIAG_BUF]),
-                    dog_xy=_np.array([[d["dog_x"], d["dog_y"]] for d in DIAG_BUF],
-                                     dtype=_np.float32),
-                    dog_h=_np.array([d["dog_h"] for d in DIAG_BUF], dtype=_np.float32),
-                    # Per-step GT serialised: max-pad to 10 sheep.
-                    gt_xy=_np.array([
-                        [list(d["gt_sheep"].get(f"sheep{i}", (1e9, 1e9)))
-                         for i in range(1, 11)]
-                        for d in DIAG_BUF
-                    ], dtype=_np.float32),
-                    detections=_np.array([
-                        len(d["detections"]) for d in DIAG_BUF
-                    ], dtype=_np.int32),
-                )
-                print(f"[dog diag] wrote {DIAG_STEPS} scans → {_diag_path}")
-
-    # Build the single-frame LiDAR obs (matches what the env produces).
    sheep_xy_list = list(sheep_positions.values())
    sheep_penned_list = [False] * len(sheep_xy_list)
    single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
-    # Maintain our own frame stack so logged obs == what policy sees.
-    if not _dagger_buffer:
-        _dagger_buffer = [single_obs.copy() for _ in range(_FRAME_STACK)]
-    else:
-        _dagger_buffer.append(single_obs)
-        if len(_dagger_buffer) > _FRAME_STACK:
-            _dagger_buffer = _dagger_buffer[-_FRAME_STACK:]
-    stacked_obs = _np.concatenate(_dagger_buffer, axis=0).astype(_np.float32)

    # ---- Action selection ----
-    if MODE == "diag":
-        # Diagnostic mode: rotate in place so the captured scans cover
-        # all 360° of view from one position. Target = heading + π →
-        # cos(err) clamps forward to ~0, the dog spins.
-        _t = dog_heading + math.pi
-        vx, vy = math.cos(_t), math.sin(_t)
-    elif MODE == "dagger":
-        # Teacher: active-scan + Strömbom on GT (active sheep only).
-        gt_active = {name: xy for name, xy in _gt_sheep.items()
-                     if not is_penned_position(xy[0], xy[1])}
-        t_vx, t_vy, _mode_str = _dagger_teacher(
-            dog_xy, dog_heading, gt_active, PEN_ENTRY,
-        )
-        # Student (if a policy is loaded).
-        s_vx, s_vy = None, None
-        if policy_handle is not None:
-            action = policy_handle.predict(stacked_obs)
-            s_vx, s_vy = float(action[0]), float(action[1])
-        # Drive selection.
-        if DAGGER_DRIVER == "student" and policy_handle is not None:
-            vx, vy = s_vx, s_vy
-        else:
-            vx, vy = t_vx, t_vy
-        # Always log the teacher action (this is the supervision signal).
-        DAGGER_LOG_OBS.append(stacked_obs.copy())
-        DAGGER_LOG_ACT.append(_np.array([t_vx, t_vy], dtype=_np.float32))
-    elif MODE in ("bc", "rl") and policy_handle is not None:
-        # Pass the single-frame obs; the policy_loader maintains its own
-        # frame stack internally. Both bc and rl use the same control
-        # interface — the only difference is which checkpoint loaded.
+    if MODE in ("bc", "rl") and policy_handle is not None:
        action = policy_handle.predict(single_obs)
        vx, vy = float(action[0]), float(action[1])
-    elif MODE in ("strombom", "sequential"):
-        # Wrap the analytic teacher in ActiveScanTeacher so the dog
-        # rotates / walks-to-centre when the tracker briefly empties,
-        # instead of going idle. Without this wrapper, the first 2 s
-        # of LiDAR-blind operation kills the run because Strömbom and
-        # Sequential both return (0, 0) when there are no positions.
-        if "_analytic_teacher" not in globals():
-            from herding.sequential import compute_action as sequential_action
-            _analytic_teacher = ActiveScanTeacher(
-                strombom_action if MODE == "strombom" else sequential_action
-            )
-        vx, vy, _mode_str = _analytic_teacher(
+    else:
+        vx, vy, _mode_str = analytic_teacher(
            dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
        )

-    # Shared post-process: speed modulation near sheep. Applies to bc,
-    # rl, strombom, sequential — every mode where the action source is
-    # nominally unit-magnitude. In dagger mode the active-scan teacher
-    # has already modulated, and the diag mode action is hand-built for
-    # rotation; both skip.
-    if MODE not in ("dagger", "diag"):
-        vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)
+    # Near-sheep speed modulation (shared by every mode).
+    vx, vy = modulate_speed_near_sheep(vx, vy, dog_xy, sheep_positions)

-    # EMA smoothing — reduces oscillation from policy or Strömbom flips.
+    # EMA smoothing — kills frame-to-frame action jitter.
    vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
    vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy

@@ -469,7 +295,7 @@ while robot.step(timestep) != -1:
    drive(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
    emitter.send(f"dog:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")

-    # Cosmetic ear wiggle — purely visual.
+    # Cosmetic ear wiggle.
    ear_phase += 0.12
    ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
    left_ear.setVelocity(EAR_RATE)
@@ -477,38 +303,26 @@ while robot.step(timestep) != -1:
    left_ear.setPosition(ear_pos)
    right_ear.setPosition(-ear_pos)

-    # --- Early-stop when all GT sheep are penned (all modes) ---
-    # The dog isn't a Supervisor so it can't call simulationQuit() —
-    # instead we write a sentinel file the launcher polls for and uses
-    # to kill the Webots process. Bounded by `_gt_sheep` so we don't
+    # Auto-finish: when all GT sheep are penned, write the sentinel.
+    # The launcher polls for it and closes Webots so batch evals don't
+    # hang after the task is done. Bounded by `_gt_sheep` so we don't
    # fire during the first few steps while the receiver fills.
-    if _gt_sheep and not os.path.exists(_DAGGER_DONE_FILE):
-        gt_active_count = sum(1 for x, y in _gt_sheep.values()
-                              if not is_penned_position(x, y))
-        if gt_active_count == 0:
-            if MODE == "dagger":
-                _dump_dagger_log()
-            os.makedirs(os.path.dirname(_DAGGER_DONE_FILE), exist_ok=True)
-            open(_DAGGER_DONE_FILE, "w").close()
+    if _gt_sheep and not _run_done:
+        gt_active = sum(1 for x, y in _gt_sheep.values()
+                        if not is_penned_position(x, y))
+        if gt_active == 0:
+            os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True)
+            open(RUN_DONE_FILE, "w").close()
+            _run_done = True
            print(f"[dog] all {len(_gt_sheep)} sheep penned at step "
-                  f"{step_count} — wrote {_DAGGER_DONE_FILE}, "
-                  f"launcher will close Webots")
-
-    if MODE == "dagger" and step_count % DAGGER_FLUSH_STEPS == 0 and DAGGER_LOG_OBS:
-        _dump_dagger_log()
+                  f"{step_count} — wrote sentinel, launcher will close Webots")

    if step_count % 200 == 0:
        gt_penned = sum(1 for x, y in _gt_sheep.values()
                        if is_penned_position(x, y))
        gt_total = len(_gt_sheep)
-        extra = ""
-        if MODE == "dagger":
-            extra = f" logged={len(DAGGER_LOG_OBS)}"
        print(f"[dog mode={MODE}] step={step_count} "
              f"GT_penned={gt_penned}/{gt_total} "
              f"tracks_active={tracker.n_active()} "
              f"tracks_penned={tracker.n_penned()} "
-              f"detections={len(detections)} action=({vx:+.2f}, {vy:+.2f}){extra}")
-
-# Loop ended (Webots told us to quit). Flush any remaining DAgger log.
-_dump_dagger_log()
+              f"detections={len(detections)} action=({vx:+.2f}, {vy:+.2f})")