Sheep training flock _ improver
This commit is contained in:
@@ -54,8 +54,9 @@ class HerdingEnv(gym.Env):
|
|||||||
# Reward weights (simple per-sheep progress — no phases, no gating)
|
# Reward weights (simple per-sheep progress — no phases, no gating)
|
||||||
# -----------------------------------------------------------------------
|
# -----------------------------------------------------------------------
|
||||||
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
|
W_PER_SHEEP = 2.0 # progress: sum of per-sheep distance-to-pen reductions
|
||||||
W_ALIGN = 0.05 # dog on anti-pen side of COM — directional hint only,
|
W_ALIGN = 0.0 # disabled: created a sit-still trap from n_sheep≥2.
|
||||||
# kept tiny so sit-still is never profitable vs completion
|
# Progress reward already encodes "be on anti-pen side"
|
||||||
|
# implicitly (sheep flee toward pen → positive progress).
|
||||||
W_PEN_BONUS = 10.0 # per sheep penned
|
W_PEN_BONUS = 10.0 # per sheep penned
|
||||||
W_COMPLETE = 100.0 # all sheep penned
|
W_COMPLETE = 100.0 # all sheep penned
|
||||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||||
|
|||||||
Reference in New Issue
Block a user