Files
TIR_PROJ/training/bc/collect.py
T
Johnny Fernandes 5c2ee4bba5 Checkpoint 8
2026-05-12 22:41:03 +01:00

212 lines
8.5 KiB
Python

"""Collect (obs, action) demonstrations from an analytic teacher.
Runs the chosen teacher across a grid of ``(n_sheep, seed)`` combos at
full difficulty, logs every Nth ``(obs, action)`` pair, and saves
successful trajectories to ``.npz`` for behaviour cloning. The teacher
is wrapped in :class:`ActiveScanTeacher` by default so it operates on
the same partial-obs view the student will have at deployment.
Usage::
python -m training.bc.collect --teacher strombom \\
--out training/bc/demos.npz --frame-stack 4
"""
from __future__ import annotations
import argparse
import os
import time
from pathlib import Path
import numpy as np
# Early CLI parse so we can configure geometry before heavy imports.
# (argparse is used again below for the full parse; this is a lightweight
# pre-pass that only reads --world.)
_pre_argv = [a for a in os.sys.argv[1:]]
_pre_world = None
for i, a in enumerate(_pre_argv):
if a == "--world" and i + 1 < len(_pre_argv):
_pre_world = _pre_argv[i + 1]
break
if a.startswith("--world="):
_pre_world = a.split("=", 1)[1]
break
if _pre_world is not None:
from herding.world.geometry import configure as _geo_configure
_geo_configure(_pre_world)
os.environ["HERDING_WORLD"] = _pre_world
from herding.control.active_scan import ActiveScanTeacher
from herding.world.geometry import PEN_ENTRY, FIELD_SHAPE
from herding.control.sequential import compute_action as sequential_action
from herding.control.strombom import compute_action as strombom_action
from herding.control.universal import compute_action as universal_action
from training.herding_env import HerdingEnv
TEACHERS = {
"sequential": sequential_action,
"strombom": strombom_action,
"universal": universal_action,
}
def _call_teacher(fn, dog_xy, dog_heading, sheep_positions, pen_target,
drive_mode="differential"):
"""Call any teacher function and return (vx, vy, omega, mode).
Normalizes across 3-tuple teachers (vx, vy, mode) and 4-tuple
universal teacher (vx, vy, omega, mode). ActiveScanTeacher (when
invoked with drive_mode="mecanum") propagates the base teacher's
omega — see test_active_scan_preserves_mecanum_omega.
"""
# The universal teacher and ActiveScanTeacher accept the extended
# (dog_xy, heading, sheep, pen, drive_mode) signature. Older
# teachers accept (dog_xy, sheep, pen). Detect by trying the
# extended call first.
try:
result = fn(dog_xy, dog_heading, sheep_positions, pen_target,
drive_mode)
except TypeError:
try:
result = fn(dog_xy, dog_heading, sheep_positions, pen_target)
except TypeError:
result = fn(dog_xy, sheep_positions, pen_target)
if len(result) == 4:
return result # (vx, vy, omega, mode)
vx, vy, mode = result
return vx, vy, 0.0, mode
def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
teacher_fn, frame_stack: int = 1, privileged: bool = False,
drive_mode: str = "differential"):
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
difficulty=1.0, seed=seed, frame_stack=frame_stack,
drive_mode=drive_mode)
obs, _ = env.reset(seed=seed)
obs_list, action_list = [], []
scan_teacher = ActiveScanTeacher(teacher_fn)
for step in range(max_steps):
if privileged:
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
for i in range(env.n_sheep) if not env.sheep_penned[i]}
if not positions:
break
vx, vy, omega, _mode = _call_teacher(
teacher_fn, (env.dog_x, env.dog_y), env.dog_heading,
positions, PEN_ENTRY, drive_mode,
)
else:
positions = env.perceived_positions()
result = _call_teacher(
scan_teacher, (env.dog_x, env.dog_y), env.dog_heading,
positions, PEN_ENTRY, drive_mode,
)
vx, vy, omega, _mode = result
if drive_mode == "mecanum":
action = np.array([vx, vy, omega], dtype=np.float32)
else:
action = np.array([vx, vy], dtype=np.float32)
if step % subsample == 0:
obs_list.append(obs.copy())
action_list.append(action.copy())
obs, _r, term, trunc, _info = env.step(action)
if term or trunc:
break
success = bool(env.sheep_penned.all())
return (
np.asarray(obs_list, dtype=np.float32),
np.asarray(action_list, dtype=np.float32),
success,
env.steps,
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--out", default="training/bc/demos.npz")
parser.add_argument("--n-sheep-list", default="1,2,3,5,8,10")
parser.add_argument("--seeds-per-n", type=int, default=15)
parser.add_argument("--max-steps", type=int, default=30000)
parser.add_argument("--subsample", type=int, default=5,
help="Keep every Nth (obs, action) pair.")
parser.add_argument("--keep-failures", action="store_true",
help="Include partial-success trajectories. Default off.")
parser.add_argument("--teacher", default="universal",
choices=list(TEACHERS.keys()),
help="Which analytic teacher to demonstrate.")
parser.add_argument("--frame-stack", type=int, default=1,
help="Concatenate the last K obs into a "
"(32·K)-D vector for the policy.")
parser.add_argument("--privileged", action="store_true",
help="Teacher reads ground truth instead of "
"tracker output (asymmetric BC).")
parser.add_argument("--drive-mode", default="differential",
choices=["differential", "mecanum"],
help="Drive mode for the dog robot.")
parser.add_argument("--world", default=None,
choices=["field", "field_round"],
help="World shape. If not set, uses HERDING_WORLD "
"env var or defaults to 'field'. Must be set "
"before geometry is imported.")
args = parser.parse_args()
# Validate --world matches geometry (already configured by the
# early pre-parse above, or by HERDING_WORLD env var).
if args.world is not None and args.world != FIELD_SHAPE:
print(f"[demos] WARNING: --world={args.world} but geometry is "
f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
teacher_fn = TEACHERS[args.teacher]
print(f"[demos] teacher: {args.teacher} world: {FIELD_SHAPE}")
n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
f"max_steps={args.max_steps}, subsample={args.subsample}")
all_obs, all_actions, all_meta = [], [], []
t_start = time.time()
n_success = 0; n_total = 0
for n in n_sheep_list:
for seed in range(args.seeds_per_n):
obs, actions, success, total_steps = collect_one(
n, seed, args.max_steps, args.subsample, teacher_fn,
frame_stack=args.frame_stack, privileged=args.privileged,
drive_mode=args.drive_mode,
)
n_total += 1
if success:
n_success += 1
keep = success or args.keep_failures
if keep and len(obs) > 0:
all_obs.append(obs)
all_actions.append(actions)
all_meta.append((n, seed, len(obs), int(success), total_steps))
tag = "✓" if success else "✗"
print(f" [{tag}] n={n:>2d} seed={seed:>2d} steps={total_steps:>6d} "
f"logged={len(obs):>5d}")
if not all_obs:
raise RuntimeError("No trajectories kept — try --keep-failures.")
obs = np.concatenate(all_obs, axis=0)
actions = np.concatenate(all_actions, axis=0)
meta = np.array(all_meta, dtype=np.int32)
Path(args.out).parent.mkdir(parents=True, exist_ok=True)
np.savez(args.out, obs=obs, actions=actions, meta=meta)
elapsed = time.time() - t_start
print(f"\n=== {n_success}/{n_total} trajectories successful ({100*n_success/n_total:.0f}%) ===")
print(f"=== {len(obs)} transitions saved to {args.out} ===")
print(f"=== obs={obs.shape}, actions={actions.shape}, elapsed={elapsed:.0f}s ===")
if __name__ == "__main__":
main()