Sheep training flock _ improver

2026-04-24 21:29:44 +01:00
parent 8b54b2a934
commit d599181d22
2 changed files with 37 additions and 5 deletions
@@ -133,6 +133,19 @@ while robot.step(timestep) != -1:
    fx, fy = 0.0, 0.0
    # Outside the pen: repel from the exterior of the side and back walls so
    # sheep don't get pinned against them when fleeing from the dog.
    # The pen entrance is open on the north (y > PEN_Y_MAX), so only push away
    # from the west (x≈PEN_X_MIN), east (x≈PEN_X_MAX), and south (y≈PEN_Y_MIN) exteriors.
    PEN_EXT_MARGIN = 1.2
    if not penned:
        if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x < PEN_X_MIN + PEN_EXT_MARGIN:
            fx -= ((PEN_X_MIN + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 8.0
        if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x > PEN_X_MAX - PEN_EXT_MARGIN:
            fx += ((x - (PEN_X_MAX - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 8.0
        if y < PEN_Y_MIN + PEN_EXT_MARGIN and PEN_X_MIN < x < PEN_X_MAX:
            fy += ((PEN_Y_MIN + PEN_EXT_MARGIN - y) / PEN_EXT_MARGIN) * 8.0
    if penned:
        # Inside pen: wander freely, strong boundary forces prevent exit,
        # separation still active to avoid collisions with other penned sheep.
@@ -54,6 +54,8 @@ class HerdingEnv(gym.Env):
    # Reward weights  (simple per-sheep progress — no phases, no gating)
    # -----------------------------------------------------------------------
    W_PER_SHEEP   = 2.0   # progress: sum of per-sheep distance-to-pen reductions
    W_SCATTER_PEN = 0.5   # penalty per metre the active flock radius exceeds threshold
    SCATTER_THRESH = 8.0  # metres — allow natural spread, penalise excessive scatter
    W_PEN_BONUS   = 10.0  # per sheep penned
    W_COMPLETE    = 100.0 # all sheep penned
    W_STEP_COST   = 0.02  # time penalty — strong enough to punish doing nothing
@@ -312,7 +314,12 @@ class HerdingEnv(gym.Env):
        else:
            r_progress = 0.0
-        reward  = r_progress
+        # Soft scatter penalty: discourages abandoning the remaining active flock.
        # Only fires when radius exceeds threshold so normal spread isn't punished.
        _, radius, _ = self._flock_stats()
        r_scatter = -max(0.0, radius - self.SCATTER_THRESH) * self.W_SCATTER_PEN
        reward  = r_progress + r_scatter
        reward += newly_penned * self.W_PEN_BONUS
        reward -= self.W_STEP_COST
        if n_penned == self.n_sheep:
@@ -362,6 +369,18 @@ class HerdingEnv(gym.Env):
        if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
        if pos[1] >  F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
        # Pen exterior wall avoidance — mirrors sheep.py addition.
        # Prevents sheep getting pinned against the pen side/back walls when fleeing.
        EM = 1.2
        px0, px1 = self.PEN_X[0], self.PEN_X[1]
        py0, py1 = self.PEN_Y[0], self.PEN_Y[1]
        if py0 - EM < pos[1] < py1 and pos[0] < px0 + EM:
            fx -= ((px0 + EM - pos[0]) / EM) * 8.0
        if py0 - EM < pos[1] < py1 and pos[0] > px1 - EM:
            fx += ((pos[0] - (px1 - EM)) / EM) * 8.0
        if pos[1] < py0 + EM and px0 < pos[0] < px1:
            fy += ((py0 + EM - pos[1]) / EM) * 8.0
        # Hard-stop clamp: mirrors sheep.py — zero any force driving further
        # into the wall within 0.5 m so the flee force cannot pin the sheep.
        HS = 0.5