Sheep training flock _ improver
This commit is contained in:
+15
-12
@@ -54,6 +54,8 @@ class HerdingEnv(gym.Env):
|
|||||||
# Reward weights (simple per-sheep progress — no phases, no gating)
|
# Reward weights (simple per-sheep progress — no phases, no gating)
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
|
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
|
||||||
|
W_ALIGN = 0.05 # dog on anti-pen side of COM — directional hint only,
|
||||||
|
# kept tiny so sit-still is never profitable vs completion
|
||||||
W_PEN_BONUS = 10.0 # per sheep penned
|
W_PEN_BONUS = 10.0 # per sheep penned
|
||||||
W_COMPLETE = 100.0 # all sheep penned
|
W_COMPLETE = 100.0 # all sheep penned
|
||||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||||
@@ -312,7 +314,19 @@ class HerdingEnv(gym.Env):
|
|||||||
else:
|
else:
|
||||||
r_progress = 0.0
|
r_progress = 0.0
|
||||||
|
|
||||||
reward = r_progress
|
com, _, _ = self._flock_stats()
|
||||||
|
com_dist = float(np.linalg.norm(com - self.PEN_CENTER))
|
||||||
|
d_dog_com = float(np.linalg.norm(self.dog_pos - com))
|
||||||
|
if d_dog_com > 0.1 and com_dist > 0.1:
|
||||||
|
pen_dir = (self.PEN_CENTER - com) / com_dist
|
||||||
|
dog_dir = (self.dog_pos - com) / d_dog_com
|
||||||
|
cosine = -float(np.dot(pen_dir, dog_dir))
|
||||||
|
proximity = max(0.0, 1.0 - d_dog_com / self.FLEE_DIST)
|
||||||
|
alignment = cosine * proximity * self.W_ALIGN
|
||||||
|
else:
|
||||||
|
alignment = 0.0
|
||||||
|
|
||||||
|
reward = r_progress + alignment
|
||||||
reward += newly_penned * self.W_PEN_BONUS
|
reward += newly_penned * self.W_PEN_BONUS
|
||||||
reward -= self.W_STEP_COST
|
reward -= self.W_STEP_COST
|
||||||
if n_penned == self.n_sheep:
|
if n_penned == self.n_sheep:
|
||||||
@@ -363,17 +377,6 @@ class HerdingEnv(gym.Env):
|
|||||||
if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
|
if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
|
||||||
|
|
||||||
|
|
||||||
# Pen exterior wall avoidance: mirrors sheep.py.
|
|
||||||
# Only fires when strictly outside the pen at pen height.
|
|
||||||
EM = 0.8
|
|
||||||
px0, px1 = self.PEN_X[0], self.PEN_X[1]
|
|
||||||
py0, py1 = self.PEN_Y[0], self.PEN_Y[1]
|
|
||||||
if py0 < pos[1] < py1:
|
|
||||||
if px0 - EM < pos[0] < px0:
|
|
||||||
fx -= ((pos[0] - (px0 - EM)) / EM) * 6.0
|
|
||||||
if px1 < pos[0] < px1 + EM:
|
|
||||||
fx += ((px1 + EM - pos[0]) / EM) * 6.0
|
|
||||||
|
|
||||||
# Hard-stop clamp: mirrors sheep.py — zero any force driving further
|
# Hard-stop clamp: mirrors sheep.py — zero any force driving further
|
||||||
# into the wall within 0.5 m so the flee force cannot pin the sheep.
|
# into the wall within 0.5 m so the flee force cannot pin the sheep.
|
||||||
HS = 0.5
|
HS = 0.5
|
||||||
|
|||||||
Reference in New Issue
Block a user