Behaviour refinement - fence penalty

This commit is contained in:
Johnny Fernandes
2026-04-26 01:09:50 +01:00
parent 6253850620
commit b031473758
+5 -5
View File
@@ -61,11 +61,11 @@ class HerdingEnv(gym.Env):
W_COMPLETE = 100.0 # all sheep penned W_COMPLETE = 100.0 # all sheep penned
W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing
W_COMPACT = 0.0 # reward for flock-radius reduction (off by default) W_COMPACT = 0.0 # reward for flock-radius reduction (off by default)
W_WALL_TOUCH = 0.05 # per-sheep, per-step penalty when an active sheep is W_WALL_TOUCH = 0.01 # per-sheep, per-step penalty when an active sheep is
# pinned against the outside of a pen W/E wall. Direct # pinned against the outside of a pen W/E wall. Kept
# signal against the wall-corraling exploit so the # small (<step_cost) so the dog isn't incentivised to
# policy generalises better to Webots pillared walls. # hover above the entrance to avoid the penalty.
WALL_TOUCH_BUFFER = 0.5 # metres outside the wall counted as "touching" WALL_TOUCH_BUFFER = 0.3 # metres outside the wall counted as "touching"
ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0) ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0)
ALIGN_GATED = True # gate alignment on action magnitude ALIGN_GATED = True # gate alignment on action magnitude
ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not ENTRY_AWARE = True # progress reward targets PEN_ENTRY (entrance face), not