From 287743709a6b3064f692d928e45a47efda9f3847 Mon Sep 17 00:00:00 2001 From: Johnny Fernandes Date: Sun, 26 Apr 2026 02:02:25 +0100 Subject: [PATCH] Approach refinement --- training/config.json | 4 ++-- training/herding_env.py | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/training/config.json b/training/config.json index 3c0774e..d29068c 100644 --- a/training/config.json +++ b/training/config.json @@ -5,8 +5,8 @@ "W_COMPLETE": 100.0, "W_STEP_COST": 0.02, "W_COMPACT": 0.0, - "W_WALL_TOUCH": 0.15, - "WALL_TOUCH_BUFFER": 0.8, + "W_WALL_TOUCH": 0.04, + "WALL_TOUCH_BUFFER": 0.3, "ALIGN_SHAPE": "standoff", "ALIGN_GATED": true, "ENTRY_AWARE": false, diff --git a/training/herding_env.py b/training/herding_env.py index 6665906..dd647e7 100644 --- a/training/herding_env.py +++ b/training/herding_env.py @@ -61,12 +61,11 @@ class HerdingEnv(gym.Env): W_COMPLETE = 100.0 # all sheep penned W_STEP_COST = 0.02 # time penalty — strong enough to punish doing nothing W_COMPACT = 0.0 # reward for flock-radius reduction (off by default) - W_WALL_TOUCH = 0.15 # per-sheep max penalty at wall surface. Linear ramp - # within WALL_TOUCH_BUFFER gives the RL agent a gradient - # signal to avoid pinning sheep against pen walls. - # 0.15 ≈ 7.5× step_cost — strong enough to shape behavior - # without overwhelming progress reward. - WALL_TOUCH_BUFFER = 0.8 # metres from wall where penalty starts ramping + W_WALL_TOUCH = 0.04 # per-sheep max penalty at wall surface. Linear ramp + # within WALL_TOUCH_BUFFER nudges the agent to avoid + # pinning sheep against pen walls. 0.04 ≈ 2× step_cost + # — noticeable but never dominates progress reward. + WALL_TOUCH_BUFFER = 0.3 # metres from wall where penalty starts ramping ALIGN_SHAPE = "standoff" # "standoff" (peaks at IDEAL) | "near" (peaks at 0) ALIGN_GATED = True # gate alignment on action magnitude ENTRY_AWARE = False # When True, targets PEN_ENTRY (entrance face) instead