From b3251fcca3e6324bde044ed196be7d47aabe59e6 Mon Sep 17 00:00:00 2001 From: Johnny Fernandes Date: Fri, 24 Apr 2026 22:46:51 +0100 Subject: [PATCH] Sheep training flock _ improver --- controllers/sheep/sheep.py | 22 ++++++++++------------ training/herding_env.py | 35 ++++++++++++++--------------------- 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/controllers/sheep/sheep.py b/controllers/sheep/sheep.py index 010b28e..7b2f1c8 100644 --- a/controllers/sheep/sheep.py +++ b/controllers/sheep/sheep.py @@ -133,18 +133,16 @@ while robot.step(timestep) != -1: fx, fy = 0.0, 0.0 - # Outside the pen: repel from the exterior of the side and back walls so - # sheep don't get pinned against them when fleeing from the dog. - # The pen entrance is open on the north (y > PEN_Y_MAX), so only push away - # from the west (x≈PEN_X_MIN), east (x≈PEN_X_MAX), and south (y≈PEN_Y_MIN) exteriors. - PEN_EXT_MARGIN = 1.2 - if not penned: - if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x < PEN_X_MIN + PEN_EXT_MARGIN: - fx -= ((PEN_X_MIN + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 8.0 - if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x > PEN_X_MAX - PEN_EXT_MARGIN: - fx += ((x - (PEN_X_MAX - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 8.0 - if y < PEN_Y_MIN + PEN_EXT_MARGIN and PEN_X_MIN < x < PEN_X_MAX: - fy += ((PEN_Y_MIN + PEN_EXT_MARGIN - y) / PEN_EXT_MARGIN) * 8.0 + # Repel unpenned sheep from the exterior of the pen's side walls so they + # don't get pinned by flee forces. Only fires when strictly outside the pen + # (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range). + # Entrance is open on the north (y > PEN_Y_MAX) — no force there. + PEN_EXT_MARGIN = 0.8 + if not penned and PEN_Y_MIN < y < PEN_Y_MAX: + if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN: + fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0 + if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN: + fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0 if penned: # Inside pen: wander freely, strong boundary forces prevent exit, diff --git a/training/herding_env.py b/training/herding_env.py index 0d56527..84f7519 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -53,12 +53,10 @@ class HerdingEnv(gym.Env): # ----------------------------------------------------------------------- # Reward weights (simple per-sheep progress — no phases, no gating) # ----------------------------------------------------------------------- - W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions - W_SCATTER_PEN = 0.5 # penalty per metre the active flock radius exceeds threshold - SCATTER_THRESH = 8.0 # metres — allow natural spread, penalise excessive scatter - W_PEN_BONUS = 10.0 # per sheep penned - W_COMPLETE = 100.0 # all sheep penned - W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing + W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions + W_PEN_BONUS = 10.0 # per sheep penned + W_COMPLETE = 100.0 # all sheep penned + W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing def __init__(self, n_sheep: int = 1, max_steps: int = 2000, render_mode: str = None, random_n_sheep: bool = False): @@ -314,12 +312,7 @@ class HerdingEnv(gym.Env): else: r_progress = 0.0 - # Soft scatter penalty: discourages abandoning the remaining active flock. - # Only fires when radius exceeds threshold so normal spread isn't punished. - _, radius, _ = self._flock_stats() - r_scatter = -max(0.0, radius - self.SCATTER_THRESH) * self.W_SCATTER_PEN - - reward = r_progress + r_scatter + reward = r_progress reward += newly_penned * self.W_PEN_BONUS reward -= self.W_STEP_COST if n_penned == self.n_sheep: @@ -369,17 +362,17 @@ class HerdingEnv(gym.Env): if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0 if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0 - # Pen exterior wall avoidance — mirrors sheep.py addition. - # Prevents sheep getting pinned against the pen side/back walls when fleeing. - EM = 1.2 + + # Pen exterior wall avoidance: mirrors sheep.py. + # Only fires when strictly outside the pen at pen height. + EM = 0.8 px0, px1 = self.PEN_X[0], self.PEN_X[1] py0, py1 = self.PEN_Y[0], self.PEN_Y[1] - if py0 - EM < pos[1] < py1 and pos[0] < px0 + EM: - fx -= ((px0 + EM - pos[0]) / EM) * 8.0 - if py0 - EM < pos[1] < py1 and pos[0] > px1 - EM: - fx += ((pos[0] - (px1 - EM)) / EM) * 8.0 - if pos[1] < py0 + EM and px0 < pos[0] < px1: - fy += ((py0 + EM - pos[1]) / EM) * 8.0 + if py0 < pos[1] < py1: + if px0 - EM < pos[0] < px0: + fx -= ((pos[0] - (px0 - EM)) / EM) * 6.0 + if px1 < pos[0] < px1 + EM: + fx += ((px1 + EM - pos[0]) / EM) * 6.0 # Hard-stop clamp: mirrors sheep.py — zero any force driving further # into the wall within 0.5 m so the flee force cannot pin the sheep.