Approach refinement

This commit is contained in:
Johnny Fernandes
2026-04-26 12:59:04 +01:00
parent acf0810425
commit a44ddb7b08
17 changed files with 10593 additions and 194 deletions
+43 -5
View File
@@ -4,7 +4,7 @@ Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
Setup
-----
1. Copy your trained files into this directory:
controllers/shepherd_dog_rl/best_model.zip
controllers/shepherd_dog_rl/final_model.zip
controllers/shepherd_dog_rl/vecnorm.pkl
2. In field.wbt, set the ShepherdDog robot's controller field to
@@ -15,9 +15,15 @@ Setup
from the default of 5.
The controller reads GPS (dog position) and Receiver (sheep broadcasts),
builds the same 13-dim flock observation the training env used, normalises
builds the same 16-dim flock observation the training env used, normalises
it with the saved VecNormalize stats, and converts the (vx, vy) policy
output into differential wheel speeds.
Debug logging
-------------
Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
raw obs, normalised obs, action) to debug.csv alongside this script. Use
plot_debug.py to render trajectories from it.
"""
import sys
@@ -48,8 +54,10 @@ EAR_AMPLITUDE = 0.35
EAR_RATE = 8.0
# ── model paths ─────────────────────────────────────────────────────────────
MODEL_PATH = os.path.join(_HERE, "best_model.zip")
MODEL_PATH = os.path.join(_HERE, "final_model.zip")
VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
DEBUG_ENABLED = True # set False to disable debug.csv logging
def norm_angle(a: float) -> float:
@@ -148,13 +156,26 @@ vecnorm = VecNormalize.load(VECNORM_PATH, dummy_env)
vecnorm.training = False
vecnorm.norm_reward = False
model = PPO.load(MODEL_PATH)
model = PPO.load(MODEL_PATH, device="cpu")
print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
# ── Runtime state ─────────────────────────────────────────────────────────────
sheep_positions: dict = {} # {name: (x, y)} — updated every step from receiver
step_count = 0
# Debug CSV — written every step when DOG_DEBUG=1
debug_file = None
if DEBUG_ENABLED:
import csv
debug_file = open(DEBUG_CSV, "w", newline="")
debug_writer = csv.writer(debug_file)
debug_writer.writerow([
"step", "dog_x", "dog_y", "heading",
"sheep_xs", "sheep_ys", "n_active", "n_penned",
"raw_obs", "norm_obs", "vx", "vy",
])
print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
def bearing() -> float:
"""Current robot heading in world frame (radians)."""
@@ -226,5 +247,22 @@ while robot.step(timestep) != -1:
if step_count % 100 == 0:
n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
print(f"[RL dog] step={step_count} known_sheep={len(sheep_positions)}"
f" penned={n_in_pen}/{n_sheep}"
f" penned={n_in_pen}/{n_sheep} dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
f" action=({vx:.2f}, {vy:.2f})")
# Debug CSV row
if debug_file is not None:
n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
n_in_pen = len(sheep_positions) - n_active
debug_writer.writerow([
step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
f"{bearing():.4f}",
";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
n_active, n_in_pen,
";".join(f"{x:.4f}" for x in raw_obs),
";".join(f"{x:.4f}" for x in obs_norm[0]),
f"{vx:.4f}", f"{vy:.4f}",
])
if step_count % 200 == 0:
debug_file.flush()