Checkpoint 9
This commit is contained in:
@@ -71,12 +71,14 @@ BC_NET_ARCH ?= 512,512
|
||||
ifeq ($(WORLD),field_round)
|
||||
PPO_STEPS ?= 4000000
|
||||
KL ?= 0.02
|
||||
TIME_W ?= 0.0
|
||||
else
|
||||
PPO_STEPS ?= 2000000
|
||||
KL ?= 0.05
|
||||
TIME_W ?= -0.05
|
||||
endif
|
||||
# Time penalty is 0 until success rate is high. Earlier runs showed
|
||||
# TIME_W=-0.05 traded ~10 pts of success for speed on hard combos —
|
||||
# learn to succeed first, optimize speed in a later pass.
|
||||
TIME_W ?= 0.0
|
||||
IMITATE ?= 0.0
|
||||
# PPO rollouts at full difficulty so the training distribution matches
|
||||
# eval (deployment). Anything lower causes a train/eval mismatch that
|
||||
|
||||
@@ -43,9 +43,14 @@ DELTA_DRIVE = 2.0 # standoff behind flock CoM
|
||||
# Omega gain for mecanum (how strongly the dog turns to face target)
|
||||
OMEGA_GAIN = 0.6
|
||||
|
||||
# Recovery: push the last straggler straight through the gate.
|
||||
RECOVERY_GATE_DIST = 6.0 # only when straggler is this close to gate centre
|
||||
RECOVERY_PUSH_DIST = 1.2 # stand-off behind straggler, away from gate
|
||||
# Recovery: push small flocks (≤ RECOVERY_MAX_N) through the gate one
|
||||
# sheep at a time. n=1 alone is not enough — at n=2..3 on the round
|
||||
# field the flock is too small to self-cohere through the 3 m gate but
|
||||
# the standard collect/drive standoff just orbits them. Push the sheep
|
||||
# nearest the gate first; once it pens, the rule re-applies to the next.
|
||||
RECOVERY_MAX_N = 3
|
||||
RECOVERY_GATE_DIST = 8.0 # only when target sheep is this close to gate
|
||||
RECOVERY_PUSH_DIST = 1.2 # stand-off behind sheep, away from gate
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -114,11 +119,19 @@ def compute_action(dog_xy, dog_heading, sheep_positions,
|
||||
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||
radius = max(dists)
|
||||
|
||||
# ---- Last-straggler recovery (single sheep circling near gate) ----
|
||||
# ---- Small-flock recovery (push sheep through the gate one by one) ----
|
||||
# Triggers when the active flock is small (≤ RECOVERY_MAX_N) and the
|
||||
# sheep nearest the gate is close enough that direct pushing works.
|
||||
# For larger flocks the standard collect/drive logic handles them.
|
||||
gc = _gate_center()
|
||||
if n == 1:
|
||||
sx, sy = active[0]
|
||||
d_to_gate = math.hypot(sx - gc[0], sy - gc[1])
|
||||
if n <= RECOVERY_MAX_N:
|
||||
# Pick the sheep closest to the gate as the recovery target —
|
||||
# finishing that one first reduces the active count and lets the
|
||||
# remaining sheep get their own recovery turn.
|
||||
gate_dists = [math.hypot(p[0] - gc[0], p[1] - gc[1]) for p in active]
|
||||
target_idx = min(range(n), key=lambda i: gate_dists[i])
|
||||
sx, sy = active[target_idx]
|
||||
d_to_gate = gate_dists[target_idx]
|
||||
if d_to_gate < RECOVERY_GATE_DIST:
|
||||
dx_g = sx - gc[0]
|
||||
dy_g = sy - gc[1]
|
||||
|
||||
Reference in New Issue
Block a user