Checkpoint 9

2026-05-13 13:46:50 +01:00
parent be58ad2054
commit 683de740af
2 changed files with 24 additions and 9 deletions
@@ -71,12 +71,14 @@ BC_NET_ARCH      ?= 512,512
 ifeq ($(WORLD),field_round)
 PPO_STEPS        ?= 4000000
 KL               ?= 0.02
-TIME_W           ?= 0.0
 else
 PPO_STEPS        ?= 2000000
 KL               ?= 0.05
-TIME_W           ?= -0.05
 endif
+# Time penalty is 0 until success rate is high. Earlier runs showed
+# TIME_W=-0.05 traded ~10 pts of success for speed on hard combos —
+# learn to succeed first, optimize speed in a later pass.
+TIME_W           ?= 0.0
 IMITATE          ?= 0.0
 # PPO rollouts at full difficulty so the training distribution matches
 # eval (deployment).  Anything lower causes a train/eval mismatch that
@@ -43,9 +43,14 @@ DELTA_DRIVE = 2.0        # standoff behind flock CoM
 # Omega gain for mecanum (how strongly the dog turns to face target)
 OMEGA_GAIN = 0.6

-# Recovery: push the last straggler straight through the gate.
-RECOVERY_GATE_DIST = 6.0  # only when straggler is this close to gate centre
-RECOVERY_PUSH_DIST = 1.2   # stand-off behind straggler, away from gate
+# Recovery: push small flocks (≤ RECOVERY_MAX_N) through the gate one
+# sheep at a time. n=1 alone is not enough — at n=2..3 on the round
+# field the flock is too small to self-cohere through the 3 m gate but
+# the standard collect/drive standoff just orbits them. Push the sheep
+# nearest the gate first; once it pens, the rule re-applies to the next.
+RECOVERY_MAX_N = 3
+RECOVERY_GATE_DIST = 8.0   # only when target sheep is this close to gate
+RECOVERY_PUSH_DIST = 1.2   # stand-off behind sheep, away from gate


 # ---------------------------------------------------------------------------
@@ -114,11 +119,19 @@ def compute_action(dog_xy, dog_heading, sheep_positions,
    dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
    radius = max(dists)

-    # ---- Last-straggler recovery (single sheep circling near gate) ----
+    # ---- Small-flock recovery (push sheep through the gate one by one) ----
+    # Triggers when the active flock is small (≤ RECOVERY_MAX_N) and the
+    # sheep nearest the gate is close enough that direct pushing works.
+    # For larger flocks the standard collect/drive logic handles them.
    gc = _gate_center()
-    if n == 1:
-        sx, sy = active[0]
-        d_to_gate = math.hypot(sx - gc[0], sy - gc[1])
+    if n <= RECOVERY_MAX_N:
+        # Pick the sheep closest to the gate as the recovery target —
+        # finishing that one first reduces the active count and lets the
+        # remaining sheep get their own recovery turn.
+        gate_dists = [math.hypot(p[0] - gc[0], p[1] - gc[1]) for p in active]
+        target_idx = min(range(n), key=lambda i: gate_dists[i])
+        sx, sy = active[target_idx]
+        d_to_gate = gate_dists[target_idx]
        if d_to_gate < RECOVERY_GATE_DIST:
            dx_g = sx - gc[0]
            dy_g = sy - gc[1]