Approach refinement

This commit is contained in:
Johnny Fernandes
2026-04-26 12:59:04 +01:00
parent acf0810425
commit a44ddb7b08
17 changed files with 10593 additions and 194 deletions
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 233 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

File diff suppressed because it is too large Load Diff
Binary file not shown.
+153
View File
@@ -0,0 +1,153 @@
"""
Render Webots-side debug trajectory from debug.csv.
The shepherd_dog_rl controller writes per-step state to debug.csv when
DOG_DEBUG=1. This script reads it and produces:
trajectory.png — dog path + sheep paths overlaid on the field
obs_drift.png — normalized observation distribution over time
actions.png — vx, vy time series
Run:
python plot_debug.py # uses debug.csv next to this file
python plot_debug.py --csv path/to.csv --out-dir somewhere/
"""
import argparse
import csv
import os
import sys
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
def load_csv(path):
rows = []
with open(path) as f:
rd = csv.DictReader(f)
for r in rd:
rows.append(r)
if not rows:
sys.exit(f"empty CSV: {path}")
return rows
def parse_floats(s):
return [float(x) for x in s.split(";") if x]
def plot_trajectory(rows, out_path):
fig, ax = plt.subplots(figsize=(7, 7))
ax.set_xlim(-16, 16); ax.set_ylim(-16, 16); ax.set_aspect("equal")
ax.set_facecolor("#dcedc8")
ax.add_patch(mpatches.Rectangle((-15, -15), 30, 30,
fill=False, edgecolor="#795548", lw=2))
ax.add_patch(mpatches.Rectangle((10, -15), 3, 7,
facecolor="#ffe082", edgecolor="#795548", lw=2))
ax.text(11.5, -11.5, "pen", ha="center", va="center", fontsize=8)
dog_x = [float(r["dog_x"]) for r in rows]
dog_y = [float(r["dog_y"]) for r in rows]
ax.plot(dog_x, dog_y, color="#4e342e", lw=1.5, alpha=0.7, label="dog")
ax.plot(dog_x[0], dog_y[0], "s", color="#4e342e", ms=10)
ax.plot(dog_x[-1], dog_y[-1], "D", color="#4e342e", ms=10)
# Sheep — re-shape into per-sheep tracks
sx_all = [parse_floats(r["sheep_xs"]) for r in rows]
sy_all = [parse_floats(r["sheep_ys"]) for r in rows]
if sx_all and sx_all[-1]:
n_sheep = len(sx_all[-1])
palette = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
"#a65628","#f781bf","#999999","#66c2a5","#fc8d62"]
for i in range(n_sheep):
xs = [r[i] if i < len(r) else None for r in sx_all]
ys = [r[i] if i < len(r) else None for r in sy_all]
xs = [x for x in xs if x is not None]
ys = [y for y in ys if y is not None]
if xs:
c = palette[i % len(palette)]
ax.plot(xs, ys, color=c, lw=0.8, alpha=0.6, label=f"sheep {i+1}")
ax.plot(xs[0], ys[0], "o", color=c, ms=6)
ax.plot(xs[-1], ys[-1], "*", color=c, ms=10)
n_in_pen = int(rows[-1]["n_penned"])
ax.set_title(f"Webots trajectory {len(rows)} steps penned={n_in_pen}",
fontsize=12)
ax.legend(loc="upper left", fontsize=7, ncol=2)
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_actions(rows, out_path):
t = np.arange(len(rows))
vx = np.array([float(r["vx"]) for r in rows])
vy = np.array([float(r["vy"]) for r in rows])
mag = np.sqrt(vx ** 2 + vy ** 2)
fig, axes = plt.subplots(3, 1, figsize=(12, 7), sharex=True)
axes[0].plot(t, vx, color="tab:red", lw=0.8); axes[0].set_ylabel("vx")
axes[0].axhline(0, color="black", lw=0.4); axes[0].set_ylim(-1.1, 1.1)
axes[1].plot(t, vy, color="tab:blue", lw=0.8); axes[1].set_ylabel("vy")
axes[1].axhline(0, color="black", lw=0.4); axes[1].set_ylim(-1.1, 1.1)
axes[2].plot(t, mag, color="tab:purple", lw=0.8); axes[2].set_ylabel("||action||")
axes[2].axhline(np.sqrt(2), color="orange", ls="--", lw=1, label="saturated √2")
axes[2].axhline(1.0, color="gray", ls="--", lw=1)
axes[2].set_xlabel("step"); axes[2].legend(fontsize=8)
fig.suptitle("Webots action time series")
plt.tight_layout()
fig.savefig(out_path, dpi=120)
plt.close(fig)
def plot_obs(rows, out_path):
norm = np.array([parse_floats(r["norm_obs"]) for r in rows])
raw = np.array([parse_floats(r["raw_obs"]) for r in rows])
if norm.size == 0:
return
n_dims = norm.shape[1]
labels = [
"dog_x", "dog_y", "com-dog_x", "com-dog_y",
"far1-com_x", "far1-com_y", "far2-com_x", "far2-com_y",
"far3-com_x", "far3-com_y", "pen-com_x", "pen-com_y",
"pen-far1_x", "pen-far1_y", "radius", "frac_active",
][:n_dims]
t = np.arange(norm.shape[0])
fig, axes = plt.subplots(n_dims, 1, figsize=(11, 1.0 * n_dims), sharex=True)
if n_dims == 1: axes = [axes]
for i in range(n_dims):
axes[i].plot(t, raw[:, i], color="tab:gray", lw=0.6, alpha=0.6, label="raw")
axes[i].plot(t, norm[:, i], color="tab:red", lw=0.8, label="normalised")
axes[i].set_ylabel(labels[i], fontsize=8)
axes[i].tick_params(labelsize=7)
if i == 0:
axes[i].legend(fontsize=7, loc="upper right")
axes[-1].set_xlabel("step")
fig.suptitle("Observation values over time (raw vs VecNormalize-normalised)")
plt.tight_layout()
fig.savefig(out_path, dpi=110)
plt.close(fig)
def main():
p = argparse.ArgumentParser()
here = os.path.dirname(os.path.abspath(__file__))
p.add_argument("--csv", default=os.path.join(here, "debug.csv"))
p.add_argument("--out-dir", default=os.path.join(here, "debug_out"))
args = p.parse_args()
rows = load_csv(args.csv)
os.makedirs(args.out_dir, exist_ok=True)
print(f"loaded {len(rows)} rows from {args.csv}")
plot_trajectory(rows, os.path.join(args.out_dir, "trajectory.png"))
plot_actions(rows, os.path.join(args.out_dir, "actions.png"))
plot_obs(rows, os.path.join(args.out_dir, "obs.png"))
print(f"saved trajectory.png + actions.png + obs.png to {args.out_dir}/")
if __name__ == "__main__":
main()
+43 -5
View File
@@ -4,7 +4,7 @@ Shepherd Dog RL controller — runs a trained SB3 PPO policy inside Webots.
Setup
-----
1. Copy your trained files into this directory:
controllers/shepherd_dog_rl/best_model.zip
controllers/shepherd_dog_rl/final_model.zip
controllers/shepherd_dog_rl/vecnorm.pkl
2. In field.wbt, set the ShepherdDog robot's controller field to
@@ -15,9 +15,15 @@ Setup
from the default of 5.
The controller reads GPS (dog position) and Receiver (sheep broadcasts),
builds the same 13-dim flock observation the training env used, normalises
builds the same 16-dim flock observation the training env used, normalises
it with the saved VecNormalize stats, and converts the (vx, vy) policy
output into differential wheel speeds.
Debug logging
-------------
Set env var DOG_DEBUG=1 to write a per-step CSV (dog pos, sheep positions,
raw obs, normalised obs, action) to debug.csv alongside this script. Use
plot_debug.py to render trajectories from it.
"""
import sys
@@ -48,8 +54,10 @@ EAR_AMPLITUDE = 0.35
EAR_RATE = 8.0
# ── model paths ─────────────────────────────────────────────────────────────
MODEL_PATH = os.path.join(_HERE, "best_model.zip")
MODEL_PATH = os.path.join(_HERE, "final_model.zip")
VECNORM_PATH = os.path.join(_HERE, "vecnorm.pkl")
DEBUG_CSV = os.path.join(_HERE, "debug.csv")
DEBUG_ENABLED = True # set False to disable debug.csv logging
def norm_angle(a: float) -> float:
@@ -148,13 +156,26 @@ vecnorm = VecNormalize.load(VECNORM_PATH, dummy_env)
vecnorm.training = False
vecnorm.norm_reward = False
model = PPO.load(MODEL_PATH)
model = PPO.load(MODEL_PATH, device="cpu")
print(f"[RL dog] Model loaded — running with n_sheep={n_sheep}")
# ── Runtime state ─────────────────────────────────────────────────────────────
sheep_positions: dict = {} # {name: (x, y)} — updated every step from receiver
step_count = 0
# Debug CSV — written every step when DOG_DEBUG=1
debug_file = None
if DEBUG_ENABLED:
import csv
debug_file = open(DEBUG_CSV, "w", newline="")
debug_writer = csv.writer(debug_file)
debug_writer.writerow([
"step", "dog_x", "dog_y", "heading",
"sheep_xs", "sheep_ys", "n_active", "n_penned",
"raw_obs", "norm_obs", "vx", "vy",
])
print(f"[RL dog] DEBUG logging to {DEBUG_CSV}")
def bearing() -> float:
"""Current robot heading in world frame (radians)."""
@@ -226,5 +247,22 @@ while robot.step(timestep) != -1:
if step_count % 100 == 0:
n_in_pen = sum(1 for x, y in sheep_positions.values() if in_pen(x, y))
print(f"[RL dog] step={step_count} known_sheep={len(sheep_positions)}"
f" penned={n_in_pen}/{n_sheep}"
f" penned={n_in_pen}/{n_sheep} dog=({dog_pos[0]:.2f},{dog_pos[1]:.2f})"
f" action=({vx:.2f}, {vy:.2f})")
# Debug CSV row
if debug_file is not None:
n_active = sum(1 for x, y in sheep_positions.values() if not in_pen(x, y))
n_in_pen = len(sheep_positions) - n_active
debug_writer.writerow([
step_count, f"{dog_pos[0]:.4f}", f"{dog_pos[1]:.4f}",
f"{bearing():.4f}",
";".join(f"{v[0]:.3f}" for v in sheep_positions.values()),
";".join(f"{v[1]:.3f}" for v in sheep_positions.values()),
n_active, n_in_pen,
";".join(f"{x:.4f}" for x in raw_obs),
";".join(f"{x:.4f}" for x in obs_norm[0]),
f"{vx:.4f}", f"{vy:.4f}",
])
if step_count % 200 == 0:
debug_file.flush()
Binary file not shown.