Approach refinement
This commit is contained in:
@@ -5,8 +5,8 @@
|
|||||||
"W_COMPLETE": 100.0,
|
"W_COMPLETE": 100.0,
|
||||||
"W_STEP_COST": 0.02,
|
"W_STEP_COST": 0.02,
|
||||||
"W_COMPACT": 0.0,
|
"W_COMPACT": 0.0,
|
||||||
"W_WALL_TOUCH": 0.15,
|
"W_WALL_TOUCH": 0.04,
|
||||||
"WALL_TOUCH_BUFFER": 0.8,
|
"WALL_TOUCH_BUFFER": 0.3,
|
||||||
"ALIGN_SHAPE": "standoff",
|
"ALIGN_SHAPE": "standoff",
|
||||||
"ALIGN_GATED": true,
|
"ALIGN_GATED": true,
|
||||||
"ENTRY_AWARE": false,
|
"ENTRY_AWARE": false,
|
||||||
|
|||||||
@@ -61,12 +61,11 @@ class HerdingEnv(gym.Env):
|
|||||||
W_COMPLETE = 100.0 # all sheep penned
|
W_COMPLETE = 100.0 # all sheep penned
|
||||||
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
|
||||||
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
|
||||||
W_WALL_TOUCH = 0.15 # per-sheep max penalty at wall surface. Linear ramp
|
W_WALL_TOUCH = 0.04 # per-sheep max penalty at wall surface. Linear ramp
|
||||||
# within WALL_TOUCH_BUFFER gives the RL agent a gradient
|
# within WALL_TOUCH_BUFFER nudges the agent to avoid
|
||||||
# signal to avoid pinning sheep against pen walls.
|
# pinning sheep against pen walls. 0.04 ≈ 2× step_cost
|
||||||
# 0.15 ≈ 7.5× step_cost — strong enough to shape behavior
|
# — noticeable but never dominates progress reward.
|
||||||
# without overwhelming progress reward.
|
WALL_TOUCH_BUFFER = 0.3 # metres from wall where penalty starts ramping
|
||||||
WALL_TOUCH_BUFFER = 0.8 # metres from wall where penalty starts ramping
|
|
||||||
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
|
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
|
||||||
ALIGN_GATED = True # gate alignment on action magnitude
|
ALIGN_GATED = True # gate alignment on action magnitude
|
||||||
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead
|
ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead
|
||||||
|
|||||||
Reference in New Issue
Block a user