diff --git a/Makefile b/Makefile index 308d72d..07cc553 100644 --- a/Makefile +++ b/Makefile @@ -71,12 +71,14 @@ BC_NET_ARCH ?= 512,512 ifeq ($(WORLD),field_round) PPO_STEPS ?= 4000000 KL ?= 0.02 -TIME_W ?= 0.0 else PPO_STEPS ?= 2000000 KL ?= 0.05 -TIME_W ?= -0.05 endif +# Time penalty is 0 until success rate is high. Earlier runs showed +# TIME_W=-0.05 traded ~10 pts of success for speed on hard combos — +# learn to succeed first, optimize speed in a later pass. +TIME_W ?= 0.0 IMITATE ?= 0.0 # PPO rollouts at full difficulty so the training distribution matches # eval (deployment). Anything lower causes a train/eval mismatch that diff --git a/herding/control/universal.py b/herding/control/universal.py index de945db..12d054b 100644 --- a/herding/control/universal.py +++ b/herding/control/universal.py @@ -43,9 +43,14 @@ DELTA_DRIVE = 2.0 # standoff behind flock CoM # Omega gain for mecanum (how strongly the dog turns to face target) OMEGA_GAIN = 0.6 -# Recovery: push the last straggler straight through the gate. -RECOVERY_GATE_DIST = 6.0 # only when straggler is this close to gate centre -RECOVERY_PUSH_DIST = 1.2 # stand-off behind straggler, away from gate +# Recovery: push small flocks (≤ RECOVERY_MAX_N) through the gate one +# sheep at a time. n=1 alone is not enough — at n=2..3 on the round +# field the flock is too small to self-cohere through the 3 m gate but +# the standard collect/drive standoff just orbits them. Push the sheep +# nearest the gate first; once it pens, the rule re-applies to the next. +RECOVERY_MAX_N = 3 +RECOVERY_GATE_DIST = 8.0 # only when target sheep is this close to gate +RECOVERY_PUSH_DIST = 1.2 # stand-off behind sheep, away from gate # --------------------------------------------------------------------------- @@ -114,11 +119,19 @@ def compute_action(dog_xy, dog_heading, sheep_positions, dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active] radius = max(dists) - # ---- Last-straggler recovery (single sheep circling near gate) ---- + # ---- Small-flock recovery (push sheep through the gate one by one) ---- + # Triggers when the active flock is small (≤ RECOVERY_MAX_N) and the + # sheep nearest the gate is close enough that direct pushing works. + # For larger flocks the standard collect/drive logic handles them. gc = _gate_center() - if n == 1: - sx, sy = active[0] - d_to_gate = math.hypot(sx - gc[0], sy - gc[1]) + if n <= RECOVERY_MAX_N: + # Pick the sheep closest to the gate as the recovery target — + # finishing that one first reduces the active count and lets the + # remaining sheep get their own recovery turn. + gate_dists = [math.hypot(p[0] - gc[0], p[1] - gc[1]) for p in active] + target_idx = min(range(n), key=lambda i: gate_dists[i]) + sx, sy = active[target_idx] + d_to_gate = gate_dists[target_idx] if d_to_gate < RECOVERY_GATE_DIST: dx_g = sx - gc[0] dy_g = sy - gc[1]