From 2d23289052a43ce00d2c9888064780cc163a3ddf Mon Sep 17 00:00:00 2001
From: Johnny Fernandes <up202402612@up.pt>
Date: Sat, 16 May 2026 20:19:11 +0000
Subject: [PATCH] =?UTF-8?q?Consensus=20tracker=20+=20active=20scan=20close?=
 =?UTF-8?q?=20Webots=20140=C2=B0=20LiDAR=20gap?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two deploy-time fixes that take v1 360°-trained BC/RL from 0/n to n/n
penned on the canonical 140° LiDAR proto for diff/field:

* SheepTracker now supports a consensus stage: new detections start as
  candidate tracks invisible to get_positions(). A candidate must
  accumulate consensus_k matches within consensus_radius_m of itself
  inside a consensus_max_age window to be promoted; otherwise it
  expires. Real sheep self-confirm within 3 frames (≪0.05 m/step);
  wall-return cluster centroids jitter beyond 0.3 m as the dog moves
  and never promote. consensus_k=1 (default) is a no-op so unconfigured
  callers and HERDING_DEFAULT keep prior behaviour.
* HERDING_WEBOTS preset gets consensus_k=3, radius=0.3, max_age=20,
  plus longer forget_steps=300 and predict_steps=180 so confirmed
  sheep persist through long FOV-occlusion gaps a narrow 140° cone
  produces. max_new_tracks_per_step=1 still rate-caps spawn bursts.
* shepherd_dog.py BC/RL empty-obs fallback now rotates the desired
  heading with step_count so the cone actively sweeps the field
  instead of driving due north into the wall.

Verified in headless Webots (HERDING_USE_GT=0, LiDAR only):
  BC diff/field:        5/5 @ 11698, 10/10 @ 15079
  RL diff/field:        5/5 @ 10039, 9/10 @ 18200 (timeout)
  Strömbom diff/field:  5/5 @ 7528
All previously 0/n. 120 unit tests pass; 9 new consensus tests cover
the candidate stage, promotion radius, and one-shot phantom rejection.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 controllers/shepherd_dog/shepherd_dog.py |  32 ++---
 herding/config.py                        |  54 +++++++-
 herding/perception/sheep_tracker.py      | 162 +++++++++++++++++++----
 tests/test_config.py                     |  23 +++-
 tests/test_perception.py                 |  85 ++++++++++++
 5 files changed, 312 insertions(+), 44 deletions(-)

diff --git a/controllers/shepherd_dog/shepherd_dog.py b/controllers/shepherd_dog/shepherd_dog.py
index 745d419..22cfe98 100644
--- a/controllers/shepherd_dog/shepherd_dog.py
+++ b/controllers/shepherd_dog/shepherd_dog.py
@@ -430,9 +430,13 @@ while robot.step(timestep) != -1:
         if not sheep_positions:
             # BC/RL never saw "empty obs during operation" in training (empty
             # obs only happened at episode end), so the policy outputs ~zero
-            # and the dog gets stuck. Fall back to a fixed scan rotation
-            # until tracker recovers some sheep.
-            vx, vy = 0.0, 0.6
+            # and the dog gets stuck. Fall back to an *active scan*: rotate
+            # the desired heading slowly so the narrow 140° FOV sweeps the
+            # field instead of charging in one fixed direction (which
+            # otherwise drives the dog into the north wall and ends the run).
+            scan_h = (step_count * 0.015) % (2.0 * math.pi)
+            vx = 0.5 * math.cos(scan_h)
+            vy = 0.5 * math.sin(scan_h)
             omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
         else:
             action = policy_handle.predict(single_obs)
@@ -498,15 +502,13 @@ while robot.step(timestep) != -1:
         gt_penned = sum(1 for x, y in _gt_sheep.values()
                         if is_penned_position(x, y))
         gt_total = len(_gt_sheep)
-        print(f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
-              f"GT_penned={gt_penned}/{gt_total} "
-              f"tracks_active={tracker.n_active()} "
-              f"tracks_penned={tracker.n_penned()} "
-              f"detections={len(detections)} "
-              f"action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})"
-              if DRIVE_MODE == "mecanum" else
-              f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
-              f"GT_penned={gt_penned}/{gt_total} "
-              f"tracks_active={tracker.n_active()} "
-              f"tracks_penned={tracker.n_penned()} "
-              f"detections={len(detections)} action=({vx:+.2f}, {vy:+.2f})")
+        common = (f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
+                  f"GT_penned={gt_penned}/{gt_total} "
+                  f"tracks_active={tracker.n_active()} "
+                  f"tracks_cand={tracker.n_candidate()} "
+                  f"tracks_penned={tracker.n_penned()} "
+                  f"detections={len(detections)}")
+        if DRIVE_MODE == "mecanum":
+            print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
+        else:
+            print(f"{common} action=({vx:+.2f}, {vy:+.2f})")
diff --git a/herding/config.py b/herding/config.py
index 7544a5d..103907e 100644
--- a/herding/config.py
+++ b/herding/config.py
@@ -175,6 +175,26 @@ class TrackerConfig:
     from permanently consuming tracker slots as false "penned" sheep.
     """
 
+    consensus_k: int = 1
+    """New tracks must accumulate this many matches before they appear in
+    ``get_positions``. ``1`` (default) disables the candidate stage —
+    behaviour-identical to the original tracker. ``3-4`` filters one-shot
+    LiDAR phantoms in Webots while a real sheep promotes within
+    ``consensus_k * timestep`` ≈ 50-65 ms.
+    """
+
+    consensus_radius_m: float = 0.5
+    """Maximum distance (metres) between successive matches for a candidate
+    to age toward promotion. Tighter than ``gate_m`` so wall-cluster
+    centroid jitter cannot keep a phantom alive. Real sheep move
+    ≪ 0.05 m / step at max speed so this gate is very loose for them.
+    """
+
+    consensus_max_age: int = 8
+    """A candidate that has not been matched for this many steps is dropped.
+    Short — phantoms get one window to confirm or die.
+    """
+
     def __post_init__(self) -> None:
         if self.forget_steps < 1:
             raise ValueError(f"forget_steps must be ≥ 1, got {self.forget_steps}")
@@ -182,6 +202,16 @@ class TrackerConfig:
             raise ValueError(
                 f"max_new_tracks_per_step must be ≥ 1, got {self.max_new_tracks_per_step}"
             )
+        if self.consensus_k < 1:
+            raise ValueError(f"consensus_k must be ≥ 1, got {self.consensus_k}")
+        if self.consensus_radius_m <= 0.0:
+            raise ValueError(
+                f"consensus_radius_m must be > 0, got {self.consensus_radius_m}"
+            )
+        if self.consensus_max_age < 1:
+            raise ValueError(
+                f"consensus_max_age must be ≥ 1, got {self.consensus_max_age}"
+            )
 
 
 # ---------------------------------------------------------------------------
@@ -317,9 +347,13 @@ HERDING_WEBOTS = HerdingConfig(
     lidar=LIDAR_WEBOTS,
     detection=DetectionConfig(wall_reject=0.5, static_reject=1.2),
     tracker=TrackerConfig(
-        forget_steps=120,
+        forget_steps=300,
         max_new_tracks_per_step=1,
         pen_latch_depth=2.0,
+        predict_steps=180,
+        consensus_k=3,
+        consensus_radius_m=0.3,
+        consensus_max_age=20,
     ),
     robot=RobotConfig(action_smooth=0.55),
 )
@@ -329,7 +363,21 @@ Changes vs HERDING_DEFAULT:
 * LiDAR: 180 rays / 140° FOV matching ShepherdDog.proto hardware
 * Detection: wall_reject kept at 0.5 m (original default; static_reject
              handles post FPs; 1.0 m was too aggressive near the south gate)
-* Tracker: forget_steps 200 → 60 (~1 s ghost-track lifetime)
-           max_new_tracks_per_step 10 → 3 (rate-caps FP flooding)
+* Tracker:
+    - consensus_k=3, radius=0.3 m, max_age=20 (~320 ms window): a new
+      detection must be confirmed by two more nearby detections within
+      a tight 0.3 m radius to promote. Real sheep barely move
+      frame-to-frame (≪0.05 m/step) so they easily self-confirm while
+      the dog is rotating across them; wall-return phantoms whose
+      cluster centroid jitters by more than 0.3 m as the dog moves
+      can't accumulate three nearby hits and decay as separate
+      candidates.
+    - forget_steps=300 (~4.8 s) + predict_steps=180 (~2.9 s): once a
+      real sheep is confirmed, it lives in tracker memory long enough
+      for the policy — trained on 360° full-visibility obs — to plan
+      while the dog sweeps a sparse cone across the field. Set short
+      enough that any phantom that does leak through promotion dies
+      after the dog walks away from the wall that created it.
+    - max_new_tracks_per_step=1 still rate-caps spawn bursts.
 * Robot: action_smooth 0.0 → 0.55 (matches Webots controller EMA)
 """
diff --git a/herding/perception/sheep_tracker.py b/herding/perception/sheep_tracker.py
index a791a8f..15f5793 100644
--- a/herding/perception/sheep_tracker.py
+++ b/herding/perception/sheep_tracker.py
@@ -17,6 +17,16 @@ until ``FORGET_STEPS`` deletes it entirely.
 A track is marked penned once its estimated position crosses the gate
 plane south (``is_penned_position``). Penned tracks are excluded from
 ``get_positions`` and kept indefinitely.
+
+**Consensus promotion** (``consensus_k > 1``): every new detection
+starts as a *candidate* track that is invisible to ``get_positions``.
+It must be matched ``consensus_k`` times within a tight radius
+(``consensus_radius_m``) before being promoted to a regular track.
+Candidates that fail to re-confirm within ``consensus_max_age`` steps
+are deleted. The cost is a small acquisition latency
+(``consensus_k * timestep`` ≈ 65 ms) in exchange for rejecting the
+one-shot LiDAR phantom returns Webots produces from real-world 3D
+geometry. ``consensus_k=1`` disables the stage entirely (default).
 """
 
 from __future__ import annotations
@@ -43,17 +53,39 @@ VELOCITY_CLAMP = 1.0      # m/s — max predicted speed (sheep max is ~0.78 m/s)
 
 
 class Track:
-    """Single track with position, velocity, and age."""
+    """Single track with position, velocity, and age.
 
-    __slots__ = ("x", "y", "vx", "vy", "last_seen", "penned")
+    Attributes
+    ----------
+    candidate
+        ``True`` while the track has not yet accumulated enough
+        consensus matches to be visible (``hit_count < consensus_k``).
+        Candidates are excluded from :meth:`SheepTracker.get_positions`
+        and from the active/penned counters.
+    hit_count
+        Number of detections this track has absorbed since spawn,
+        used by the consensus filter.
+    """
 
-    def __init__(self, x: float, y: float, step: int, penned: bool = False):
+    __slots__ = ("x", "y", "vx", "vy", "last_seen", "penned",
+                 "candidate", "hit_count")
+
+    def __init__(
+        self,
+        x: float,
+        y: float,
+        step: int,
+        penned: bool = False,
+        candidate: bool = False,
+    ):
         self.x = x
         self.y = y
         self.vx = 0.0
         self.vy = 0.0
         self.last_seen = step
         self.penned = penned
+        self.candidate = candidate
+        self.hit_count = 1
 
     @property
     def age(self) -> int:
@@ -122,6 +154,9 @@ class SheepTracker:
             self._velocity_clamp = tracker_cfg.velocity_clamp
             self._max_new_per_step = tracker_cfg.max_new_tracks_per_step
             self._pen_latch_depth = tracker_cfg.pen_latch_depth
+            self._consensus_k = tracker_cfg.consensus_k
+            self._consensus_radius = tracker_cfg.consensus_radius_m
+            self._consensus_max_age = tracker_cfg.consensus_max_age
         else:
             self.gate = gate
             self._reacquire_gate = REACQUIRE_GATE_M
@@ -132,6 +167,9 @@ class SheepTracker:
             self._velocity_clamp = VELOCITY_CLAMP
             self._max_new_per_step = MAX_ACTIVE_TRACKS
             self._pen_latch_depth = 0.0
+            self._consensus_k = 1
+            self._consensus_radius = 0.5
+            self._consensus_max_age = 8
         self._tracks: dict[int, Track] = {}
         self._next_id = 0
         self.step = 0
@@ -148,9 +186,12 @@ class SheepTracker:
         det_used: set[int] = set()
         updated_tids: set[int] = set()
 
-        # Pass 1 — match active tracks within the primary gate.
-        # Use predicted positions for matching, oldest-first.
-        active_tids = [tid for tid, t in self._tracks.items() if not t.penned]
+        # Pass 1 — match promoted active tracks within the primary gate.
+        # Use predicted positions for matching, oldest-first. Candidates
+        # are excluded; they get their own (tighter) pass below so a
+        # stray detection cannot rescue an already-stale candidate.
+        active_tids = [tid for tid, t in self._tracks.items()
+                       if not t.penned and not t.candidate]
         active_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
         for tid in active_tids:
             track = self._tracks[tid]
@@ -167,6 +208,7 @@ class SheepTracker:
             if best_j >= 0:
                 dx, dy = detections[best_j]
                 track.update(dx, dy, self.step)
+                track.hit_count += 1
                 det_used.add(best_j)
                 updated_tids.add(tid)
 
@@ -190,9 +232,31 @@ class SheepTracker:
             if best_j >= 0:
                 dx, dy = detections[best_j]
                 track.update(dx, dy, self.step)
+                track.hit_count += 1
                 det_used.add(best_j)
                 updated_tids.add(tid)
 
+        # Pass 1c — match remaining detections to candidate tracks within
+        # the tight consensus radius. Each hit ages the candidate; once
+        # hit_count reaches consensus_k it is promoted (handled below).
+        candidate_tids = [tid for tid, t in self._tracks.items() if t.candidate]
+        candidate_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
+        for tid in candidate_tids:
+            track = self._tracks[tid]
+            best_j, best_d = -1, self._consensus_radius
+            for j, (dx, dy) in enumerate(detections):
+                if j in det_used:
+                    continue
+                d = math.hypot(dx - track.x, dy - track.y)
+                if d < best_d:
+                    best_d = d
+                    best_j = j
+            if best_j >= 0:
+                dx, dy = detections[best_j]
+                track.update(dx, dy, self.step)
+                track.hit_count += 1
+                det_used.add(best_j)
+
         # Pass 2 — match remaining detections to penned tracks.
         penned_tids = [tid for tid, t in self._tracks.items() if t.penned]
         for tid in penned_tids:
@@ -208,43 +272,80 @@ class SheepTracker:
             if best_j >= 0:
                 dx, dy = detections[best_j]
                 track.update(dx, dy, self.step)
+                track.hit_count += 1
                 det_used.add(best_j)
 
-        # Spawn new tracks for unmatched detections — rate-capped.
+        # Spawn tracks for still-unmatched detections.
+        #
+        # When ``consensus_k > 1`` every new track starts as a candidate
+        # and remains invisible to ``get_positions`` until it accumulates
+        # the required matches. Penned latching is deferred to after
+        # promotion — otherwise gate-area phantoms could still skip the
+        # consensus filter by landing inside the pen column and being
+        # latched forever, which is exactly the failure mode the filter
+        # is meant to eliminate. ``max_new_tracks_per_step`` continues
+        # to rate-cap spawns.
         spawned = 0
+        spawn_candidates = self._consensus_k > 1
         for j, (dx, dy) in enumerate(detections):
             if j in det_used:
                 continue
             if spawned >= self._max_new_per_step:
                 break
-            penned = self._is_penned(dx, dy)
-            self._tracks[self._next_id] = Track(dx, dy, self.step, penned)
+            if spawn_candidates:
+                self._tracks[self._next_id] = Track(
+                    dx, dy, self.step, penned=False, candidate=True)
+            else:
+                penned = self._is_penned(dx, dy)
+                self._tracks[self._next_id] = Track(
+                    dx, dy, self.step, penned=penned, candidate=False)
             self._next_id += 1
             spawned += 1
 
-        # Promote active tracks whose current estimate crosses the gate.
+        # Promote candidates that have accumulated enough matches.
         for track in self._tracks.values():
-            if track.penned:
+            if track.candidate and track.hit_count >= self._consensus_k:
+                track.candidate = False
+
+        # Promote active tracks whose current estimate crosses the gate.
+        # Candidates are deliberately excluded — a track that hasn't yet
+        # earned visibility shouldn't be allowed to latch as penned
+        # either (that path is exactly how south-wall FPs persisted
+        # forever before the consensus filter existed).
+        for track in self._tracks.values():
+            if track.penned or track.candidate:
                 continue
             px, py = track.predicted_position(
                 self.step, self._predict_steps, self._velocity_clamp)
             if self._is_penned(px, py):
                 track.penned = True
 
-        # Forget stale active tracks; penned tracks decay too but at a
-        # longer horizon (real penned sheep are still observed occasionally
-        # when the dog faces south; pure FPs at gate posts stop being
-        # detected once the dog drives away).
+        # Forget stale tracks. Candidates have their own short timeout
+        # (one window to confirm or die); promoted active tracks decay at
+        # forget_steps; penned tracks decay 8× slower because real penned
+        # sheep are still observed when the dog faces the pen.
         penned_forget = self._forget_steps * 8
-        stale = [tid for tid, t in self._tracks.items()
-                 if (not t.penned and (self.step - t.last_seen) > self._forget_steps)
-                 or (t.penned and (self.step - t.last_seen) > penned_forget)]
+        stale: list[int] = []
+        for tid, t in self._tracks.items():
+            age = self.step - t.last_seen
+            if t.candidate:
+                if age > self._consensus_max_age:
+                    stale.append(tid)
+            elif t.penned:
+                if age > penned_forget:
+                    stale.append(tid)
+            else:
+                if age > self._forget_steps:
+                    stale.append(tid)
         for tid in stale:
             del self._tracks[tid]
 
-        # Hard cap on the active set — drop the oldest-seen overflow.
+        # Hard cap on the visible (promoted, not penned) active set —
+        # drop the oldest-seen overflow. Candidates are not counted here:
+        # they don't compete for slots until they earn promotion, and
+        # rate-limiting their spawn is the job of ``max_new_per_step``.
         active = [(tid, t.last_seen) for tid, t in self._tracks.items()
-                  if not t.penned]
+                  if not t.penned and not t.candidate]
         if len(active) > MAX_ACTIVE_TRACKS:
             active.sort(key=lambda kv: kv[1])
             for tid, _ in active[: len(active) - MAX_ACTIVE_TRACKS]:
@@ -267,12 +368,16 @@ class SheepTracker:
         return (in_pen(x, y) or is_penned_position(x, y)) and y <= threshold
 
     def get_positions(self, min_freshness: int | None = None) -> dict[str, tuple[float, float]]:
-        """Active (not-penned) tracks as a ``{name: (x, y)}`` dict.
+        """Promoted (non-candidate, non-penned) tracks as ``{name: (x, y)}``.
 
         For tracks currently being predicted (occluded but within
         predict_steps), returns the extrapolated position so the teacher
         sees a smooth estimate.
 
+        Candidate tracks — those that have not yet accumulated
+        ``consensus_k`` matches — are excluded so a one-shot phantom
+        detection never reaches the policy/teacher.
+
         ``min_freshness`` (optional, deploy-only): drop tracks whose
         last_seen is older than ``step - min_freshness``. Real sheep in
         FOV are detected nearly every step; phantom tracks from sporadic
@@ -281,7 +386,7 @@ class SheepTracker:
         """
         result = {}
         for tid, track in self._tracks.items():
-            if track.penned:
+            if track.penned or track.candidate:
                 continue
             if (min_freshness is not None
                     and self.step - track.last_seen > min_freshness):
@@ -295,13 +400,20 @@ class SheepTracker:
         return {f"t{tid}" for tid, t in self._tracks.items() if t.penned}
 
     def n_active(self) -> int:
-        return sum(1 for t in self._tracks.values() if not t.penned)
+        """Number of promoted (non-candidate, non-penned) tracks."""
+        return sum(1 for t in self._tracks.values()
+                   if not t.penned and not t.candidate)
 
     def n_penned(self) -> int:
         return sum(1 for t in self._tracks.values() if t.penned)
 
+    def n_candidate(self) -> int:
+        """Number of unpromoted candidate tracks awaiting consensus."""
+        return sum(1 for t in self._tracks.values() if t.candidate)
+
     def n_predicted(self) -> int:
-        """Number of active tracks currently being extrapolated (not directly observed)."""
+        """Number of promoted active tracks currently being extrapolated (not directly observed)."""
         return sum(1 for t in self._tracks.values()
-                   if not t.penned and (self.step - t.last_seen) > 0
+                   if not t.penned and not t.candidate
+                   and (self.step - t.last_seen) > 0
                    and (self.step - t.last_seen) <= self._predict_steps)
diff --git a/tests/test_config.py b/tests/test_config.py
index a4bbac8..c97794c 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -61,10 +61,23 @@ class TestTrackerConfig:
 
     def test_webots_preset_tighter(self):
         cfg = HERDING_WEBOTS.tracker
-        assert cfg.forget_steps == 120
+        # forget_steps was extended so confirmed sheep tracks survive
+        # sparse 140° FOV re-sightings; consensus blocks phantoms from
+        # reaching this lifetime.
+        assert cfg.forget_steps >= 200
         assert cfg.max_new_tracks_per_step == 1
         assert cfg.pen_latch_depth == 2.0
 
+    def test_default_consensus_disabled(self):
+        cfg = TrackerConfig()
+        assert cfg.consensus_k == 1
+
+    def test_webots_preset_enables_consensus(self):
+        cfg = HERDING_WEBOTS.tracker
+        assert cfg.consensus_k > 1
+        assert cfg.consensus_radius_m > 0.0
+        assert cfg.consensus_max_age >= cfg.consensus_k
+
     def test_invalid_forget_steps(self):
         with pytest.raises(ValueError):
             TrackerConfig(forget_steps=0)
@@ -73,6 +86,14 @@ class TestTrackerConfig:
         with pytest.raises(ValueError):
             TrackerConfig(max_new_tracks_per_step=0)
 
+    def test_invalid_consensus_params(self):
+        with pytest.raises(ValueError):
+            TrackerConfig(consensus_k=0)
+        with pytest.raises(ValueError):
+            TrackerConfig(consensus_radius_m=0.0)
+        with pytest.raises(ValueError):
+            TrackerConfig(consensus_max_age=0)
+
 
 # ---------------------------------------------------------------------------
 # DetectionConfig
diff --git a/tests/test_perception.py b/tests/test_perception.py
index 01ca990..fc137c7 100644
--- a/tests/test_perception.py
+++ b/tests/test_perception.py
@@ -164,3 +164,88 @@ def test_tracker_reset_clears_state():
     t.reset()
     assert t.n_active() == 0
     assert t.step == 0
+
+
+# ---------------------------------------------------------------------------
+# Consensus promotion
+# ---------------------------------------------------------------------------
+
+def _tracker_with_consensus(k: int = 3, radius: float = 0.5, max_age: int = 8):
+    from herding.config import TrackerConfig
+    return SheepTracker(tracker_cfg=TrackerConfig(
+        consensus_k=k, consensus_radius_m=radius, consensus_max_age=max_age,
+    ))
+
+
+def test_consensus_default_disabled():
+    """With consensus_k=1 (default) the first detection is immediately visible."""
+    t = SheepTracker()
+    t.update([(5.0, 0.0)])
+    assert t.n_active() == 1
+    assert len(t.get_positions()) == 1
+
+
+def test_consensus_hides_one_shot_detection():
+    """K>=2: a single detection that never reappears is filtered out."""
+    t = _tracker_with_consensus(k=3)
+    t.update([(5.0, 0.0)])
+    assert t.n_active() == 0           # candidate, not promoted
+    assert t.n_candidate() == 1
+    assert t.get_positions() == {}
+
+
+def test_consensus_promotes_after_k_matches():
+    """A real sheep visible for K frames promotes and appears in get_positions."""
+    t = _tracker_with_consensus(k=3)
+    for _ in range(3):
+        t.update([(5.0, 0.0)])
+    assert t.n_active() == 1
+    assert t.n_candidate() == 0
+    assert len(t.get_positions()) == 1
+
+
+def test_consensus_candidate_expires_quickly():
+    """A candidate that fails to re-confirm within consensus_max_age dies."""
+    t = _tracker_with_consensus(k=3, max_age=5)
+    t.update([(5.0, 0.0)])
+    assert t.n_candidate() == 1
+    for _ in range(6):                  # > max_age empty frames
+        t.update([])
+    assert t.n_candidate() == 0
+    assert t.n_active() == 0
+
+
+def test_consensus_tracker_does_not_promote_phantom_pen():
+    """A one-shot detection inside the pen column must not latch as penned
+    while it is still a candidate."""
+    t = _tracker_with_consensus(k=3)
+    t.update([(11.5, -16.0)])           # gate-area FP, inside the pen column
+    # Not promoted, not penned — just a candidate.
+    assert t.n_penned() == 0
+    assert t.n_candidate() == 1
+    # And after one expiry window it disappears entirely.
+    for _ in range(10):
+        t.update([])
+    assert t.n_penned() == 0
+    assert t.n_candidate() == 0
+
+
+def test_consensus_distinguishes_real_sheep_from_phantom():
+    """Real sheep (continuous detections) promote; phantom (intermittent
+    detections at jittered positions outside consensus_radius) does not
+    appear in get_positions even while individual candidates are still
+    within the max-age window."""
+    t = _tracker_with_consensus(k=3, radius=0.4, max_age=4)
+    # Real sheep visible at (5, 0) every frame; phantom jitters > radius.
+    phantom_positions = [(10.0, 5.0), (10.5, 5.6), (11.1, 5.0), (10.0, 5.7)]
+    for k in range(4):
+        t.update([(5.0, 0.0), phantom_positions[k]])
+    positions = t.get_positions()
+    assert len(positions) == 1
+    real_xy = next(iter(positions.values()))
+    assert math.hypot(real_xy[0] - 5.0, real_xy[1]) < 0.5
+    # And once the candidate window has elapsed, every phantom has died.
+    for _ in range(8):
+        t.update([(5.0, 0.0)])
+    assert t.n_candidate() == 0
+    assert len(t.get_positions()) == 1