Approach refinement
This commit is contained in:
@@ -4,7 +4,7 @@ Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
|
||||
Setup
|
||||
-----
|
||||
1. Copy your trained files into this directory:
|
||||
controllers/shepherd_dog_rl/best_model.zip
|
||||
controllers/shepherd_dog_rl/final_model.zip
|
||||
controllers/shepherd_dog_rl/vecnorm.pkl
|
||||
|
||||
2. In field.wbt, set the ShepherdDog robot's controller field to
|
||||
@@ -15,9 +15,15 @@ Setup
|
||||
from the default of 5.
|
||||
|
||||
The controller reads GPS (dog position) and Receiver (sheep broadcasts),
|
||||
builds the same 13-dim flock observation the training env used, normalises
|
||||
builds the same 16-dim flock observation the training env used, normalises
|
||||
it with the saved VecNormalize stats, and converts the (vx, vy) policy
|
||||
output into differential wheel speeds.
|
||||
|
||||
Debug logging
|
||||
-------------
|
||||
Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
|
||||
raw obs, normalised obs, action) to debug.csv alongside this script. Use
|
||||
plot_debug.py to render trajectories from it.
|
||||
"""
|
||||
|
||||
import sys
|
||||
@@ -48,8 +54,10 @@ EAR_AMPLITUDE = 0.35
|
||||
EAR_RATE = 8.0
|
||||
|
||||
# ── model paths ─────────────────────────────────────────────────────────────
|
||||
MODEL_PATH = os.path.join(_HERE, "best_model.zip")
|
||||
MODEL_PATH = os.path.join(_HERE, "final_model.zip")
|
||||
VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
|
||||
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
|
||||
DEBUG_ENABLED = True # set False to disable debug.csv logging
|
||||
|
||||
|
||||
def norm_angle(a: float) -> float:
|
||||
@@ -148,13 +156,26 @@ vecnorm = VecNormalize.load(VECNORM_PATH, dummy_env)
|
||||
vecnorm.training = False
|
||||
vecnorm.norm_reward = False
|
||||
|
||||
model = PPO.load(MODEL_PATH)
|
||||
model = PPO.load(MODEL_PATH, device="cpu")
|
||||
print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
|
||||
|
||||
# ── Runtime state ─────────────────────────────────────────────────────────────
|
||||
sheep_positions: dict = {} # {name: (x, y)} — updated every step from receiver
|
||||
step_count = 0
|
||||
|
||||
# Debug CSV — written every step when DOG_DEBUG=1
|
||||
debug_file = None
|
||||
if DEBUG_ENABLED:
|
||||
import csv
|
||||
debug_file = open(DEBUG_CSV, "w", newline="")
|
||||
debug_writer = csv.writer(debug_file)
|
||||
debug_writer.writerow([
|
||||
"step", "dog_x", "dog_y", "heading",
|
||||
"sheep_xs", "sheep_ys", "n_active", "n_penned",
|
||||
"raw_obs", "norm_obs", "vx", "vy",
|
||||
])
|
||||
print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
|
||||
|
||||
|
||||
def bearing() -> float:
|
||||
"""Current robot heading in world frame (radians)."""
|
||||
@@ -226,5 +247,22 @@ while robot.step(timestep) != -1:
|
||||
if step_count % 100 == 0:
|
||||
n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
|
||||
print(f"[RL dog] step={step_count} known_sheep={len(sheep_positions)}"
|
||||
f" penned={n_in_pen}/{n_sheep}"
|
||||
f" penned={n_in_pen}/{n_sheep} dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
|
||||
f" action=({vx:.2f}, {vy:.2f})")
|
||||
|
||||
# Debug CSV row
|
||||
if debug_file is not None:
|
||||
n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
|
||||
n_in_pen = len(sheep_positions) - n_active
|
||||
debug_writer.writerow([
|
||||
step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
|
||||
f"{bearing():.4f}",
|
||||
";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
|
||||
";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
|
||||
n_active, n_in_pen,
|
||||
";".join(f"{x:.4f}" for x in raw_obs),
|
||||
";".join(f"{x:.4f}" for x in obs_norm[0]),
|
||||
f"{vx:.4f}", f"{vy:.4f}",
|
||||
])
|
||||
if step_count % 200 == 0:
|
||||
debug_file.flush()
|
||||
|
||||
Reference in New Issue
Block a user