Sheep training flock _ improver
This commit is contained in:
+14
-21
@@ -53,12 +53,10 @@ class HerdingEnv(gym.Env):
|
||||
# -----------------------------------------------------------------------
|
||||
# Reward weights (simple per-sheep progress — no phases, no gating)
|
||||
# -----------------------------------------------------------------------
|
||||
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
|
||||
W_SCATTER_PEN = 0.5 # penalty per metre the active flock radius exceeds threshold
|
||||
SCATTER_THRESH = 8.0 # metres — allow natural spread, penalise excessive scatter
|
||||
W_PEN_BONUS = 10.0 # per sheep penned
|
||||
W_COMPLETE = 100.0 # all sheep penned
|
||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
|
||||
W_PEN_BONUS = 10.0 # per sheep penned
|
||||
W_COMPLETE = 100.0 # all sheep penned
|
||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||
|
||||
def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
|
||||
render_mode: str = None, random_n_sheep: bool = False):
|
||||
@@ -314,12 +312,7 @@ class HerdingEnv(gym.Env):
|
||||
else:
|
||||
r_progress = 0.0
|
||||
|
||||
# Soft scatter penalty: discourages abandoning the remaining active flock.
|
||||
# Only fires when radius exceeds threshold so normal spread isn't punished.
|
||||
_, radius, _ = self._flock_stats()
|
||||
r_scatter = -max(0.0, radius - self.SCATTER_THRESH) * self.W_SCATTER_PEN
|
||||
|
||||
reward = r_progress + r_scatter
|
||||
reward = r_progress
|
||||
reward += newly_penned * self.W_PEN_BONUS
|
||||
reward -= self.W_STEP_COST
|
||||
if n_penned == self.n_sheep:
|
||||
@@ -369,17 +362,17 @@ class HerdingEnv(gym.Env):
|
||||
if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
|
||||
if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
|
||||
|
||||
# Pen exterior wall avoidance — mirrors sheep.py addition.
|
||||
# Prevents sheep getting pinned against the pen side/back walls when fleeing.
|
||||
EM = 1.2
|
||||
|
||||
# Pen exterior wall avoidance: mirrors sheep.py.
|
||||
# Only fires when strictly outside the pen at pen height.
|
||||
EM = 0.8
|
||||
px0, px1 = self.PEN_X[0], self.PEN_X[1]
|
||||
py0, py1 = self.PEN_Y[0], self.PEN_Y[1]
|
||||
if py0 - EM < pos[1] < py1 and pos[0] < px0 + EM:
|
||||
fx -= ((px0 + EM - pos[0]) / EM) * 8.0
|
||||
if py0 - EM < pos[1] < py1 and pos[0] > px1 - EM:
|
||||
fx += ((pos[0] - (px1 - EM)) / EM) * 8.0
|
||||
if pos[1] < py0 + EM and px0 < pos[0] < px1:
|
||||
fy += ((py0 + EM - pos[1]) / EM) * 8.0
|
||||
if py0 < pos[1] < py1:
|
||||
if px0 - EM < pos[0] < px0:
|
||||
fx -= ((pos[0] - (px0 - EM)) / EM) * 6.0
|
||||
if px1 < pos[0] < px1 + EM:
|
||||
fx += ((px1 + EM - pos[0]) / EM) * 6.0
|
||||
|
||||
# Hard-stop clamp: mirrors sheep.py — zero any force driving further
|
||||
# into the wall within 0.5 m so the flee force cannot pin the sheep.
|
||||
|
||||
Reference in New Issue
Block a user