Approach refinement

This commit is contained in:
Johnny Fernandes
2026-04-26 02:02:25 +01:00
parent 61f8a7db15
commit 287743709a
2 changed files with 7 additions and 8 deletions
+5 -6
View File
@@ -61,12 +61,11 @@ class HerdingEnv(gym.Env):
W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
W_WALL_TOUCH = 0.15 # per-sheep max penalty at wall surface. Linear ramp
# within WALL_TOUCH_BUFFER gives the RL agent a gradient
# signal to avoid pinning sheep against pen walls.
# 0.15 ≈ 7.5× step_cost — strong enough to shape behavior
# without overwhelming progress reward.
WALL_TOUCH_BUFFER = 0.8 # metres from wall where penalty starts ramping
W_WALL_TOUCH = 0.04 # per-sheep max penalty at wall surface. Linear ramp
# within WALL_TOUCH_BUFFER nudges the agent to avoid
# pinning sheep against pen walls. 0.04 ≈ 2× step_cost
# — noticeable but never dominates progress reward.
WALL_TOUCH_BUFFER = 0.3 # metres from wall where penalty starts ramping
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
ALIGN_GATED = True # gate alignment on action magnitude
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead