Sheep training flock _ improver

This commit is contained in:
Johnny Fernandes
2026-04-24 22:46:51 +01:00
parent d599181d22
commit b3251fcca3
2 changed files with 24 additions and 33 deletions
+10 -12
View File
@@ -133,18 +133,16 @@ while robot.step(timestep) != -1:
fx, fy = 0.0, 0.0 fx, fy = 0.0, 0.0
# Outside the pen: repel from the exterior of the side and back walls so # Repel unpenned sheep from the exterior of the pen's side walls so they
# sheep don't get pinned against them when fleeing from the dog. # don't get pinned by flee forces. Only fires when strictly outside the pen
# The pen entrance is open on the north (y > PEN_Y_MAX), so only push away # (x < PEN_X_MIN or x > PEN_X_MAX) at pen height (y in pen y-range).
# from the west (x≈PEN_X_MIN), east (x≈PEN_X_MAX), and south (y≈PEN_Y_MIN) exteriors. # Entrance is open on the north (y > PEN_Y_MAX) — no force there.
PEN_EXT_MARGIN = 1.2 PEN_EXT_MARGIN = 0.8
if not penned: if not penned and PEN_Y_MIN < y < PEN_Y_MAX:
if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x < PEN_X_MIN + PEN_EXT_MARGIN: if PEN_X_MIN - PEN_EXT_MARGIN < x < PEN_X_MIN:
fx -= ((PEN_X_MIN + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 8.0 fx -= ((x - (PEN_X_MIN - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 6.0
if PEN_Y_MIN - PEN_EXT_MARGIN < y < PEN_Y_MAX and x > PEN_X_MAX - PEN_EXT_MARGIN: if PEN_X_MAX < x < PEN_X_MAX + PEN_EXT_MARGIN:
fx += ((x - (PEN_X_MAX - PEN_EXT_MARGIN)) / PEN_EXT_MARGIN) * 8.0 fx += ((PEN_X_MAX + PEN_EXT_MARGIN - x) / PEN_EXT_MARGIN) * 6.0
if y < PEN_Y_MIN + PEN_EXT_MARGIN and PEN_X_MIN < x < PEN_X_MAX:
fy += ((PEN_Y_MIN + PEN_EXT_MARGIN - y) / PEN_EXT_MARGIN) * 8.0
if penned: if penned:
# Inside pen: wander freely, strong boundary forces prevent exit, # Inside pen: wander freely, strong boundary forces prevent exit,
+10 -17
View File
@@ -54,8 +54,6 @@ class HerdingEnv(gym.Env):
# Reward weights (simple per-sheep progress — no phases, no gating) # Reward weights (simple per-sheep progress — no phases, no gating)
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
W_SCATTER_PEN = 0.5 # penalty per metre the active flock radius exceeds threshold
SCATTER_THRESH = 8.0 # metres — allow natural spread, penalise excessive scatter
W_PEN_BONUS = 10.0 # per sheep penned W_PEN_BONUS = 10.0 # per sheep penned
W_COMPLETE = 100.0 # all sheep penned W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
@@ -314,12 +312,7 @@ class HerdingEnv(gym.Env):
else: else:
r_progress = 0.0 r_progress = 0.0
# Soft scatter penalty: discourages abandoning the remaining active flock. reward = r_progress
# Only fires when radius exceeds threshold so normal spread isn't punished.
_, radius, _ = self._flock_stats()
r_scatter = -max(0.0, radius - self.SCATTER_THRESH) * self.W_SCATTER_PEN
reward = r_progress + r_scatter
reward += newly_penned * self.W_PEN_BONUS reward += newly_penned * self.W_PEN_BONUS
reward -= self.W_STEP_COST reward -= self.W_STEP_COST
if n_penned == self.n_sheep: if n_penned == self.n_sheep:
@@ -369,17 +362,17 @@ class HerdingEnv(gym.Env):
if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0 if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0 if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
# Pen exterior wall avoidance — mirrors sheep.py addition.
# Prevents sheep getting pinned against the pen side/back walls when fleeing. # Pen exterior wall avoidance: mirrors sheep.py.
EM = 1.2 # Only fires when strictly outside the pen at pen height.
EM = 0.8
px0, px1 = self.PEN_X[0], self.PEN_X[1] px0, px1 = self.PEN_X[0], self.PEN_X[1]
py0, py1 = self.PEN_Y[0], self.PEN_Y[1] py0, py1 = self.PEN_Y[0], self.PEN_Y[1]
if py0 - EM < pos[1] < py1 and pos[0] < px0 + EM: if py0 < pos[1] < py1:
fx -= ((px0 + EM - pos[0]) / EM) * 8.0 if px0 - EM < pos[0] < px0:
if py0 - EM < pos[1] < py1 and pos[0] > px1 - EM: fx -= ((pos[0] - (px0 - EM)) / EM) * 6.0
fx += ((pos[0] - (px1 - EM)) / EM) * 8.0 if px1 < pos[0] < px1 + EM:
if pos[1] < py0 + EM and px0 < pos[0] < px1: fx += ((px1 + EM - pos[0]) / EM) * 6.0
fy += ((py0 + EM - pos[1]) / EM) * 8.0
# Hard-stop clamp: mirrors sheep.py — zero any force driving further # Hard-stop clamp: mirrors sheep.py — zero any force driving further
# into the wall within 0.5 m so the flee force cannot pin the sheep. # into the wall within 0.5 m so the flee force cannot pin the sheep.