Checkpoint 8
This commit is contained in:
+82
-15
@@ -15,51 +15,102 @@ Usage::
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Early CLI parse so we can configure geometry before heavy imports.
|
||||
# (argparse is used again below for the full parse; this is a lightweight
|
||||
# pre-pass that only reads --world.)
|
||||
_pre_argv = [a for a in os.sys.argv[1:]]
|
||||
_pre_world = None
|
||||
for i, a in enumerate(_pre_argv):
|
||||
if a == "--world" and i + 1 < len(_pre_argv):
|
||||
_pre_world = _pre_argv[i + 1]
|
||||
break
|
||||
if a.startswith("--world="):
|
||||
_pre_world = a.split("=", 1)[1]
|
||||
break
|
||||
if _pre_world is not None:
|
||||
from herding.world.geometry import configure as _geo_configure
|
||||
_geo_configure(_pre_world)
|
||||
os.environ["HERDING_WORLD"] = _pre_world
|
||||
|
||||
from herding.control.active_scan import ActiveScanTeacher
|
||||
from herding.world.geometry import PEN_ENTRY
|
||||
from herding.world.geometry import PEN_ENTRY, FIELD_SHAPE
|
||||
from herding.control.sequential import compute_action as sequential_action
|
||||
from herding.control.strombom import compute_action as strombom_action
|
||||
from herding.control.universal import compute_action as universal_action
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
TEACHERS = {
|
||||
"sequential": sequential_action,
|
||||
"strombom": strombom_action,
|
||||
"universal": universal_action,
|
||||
}
|
||||
|
||||
|
||||
def _call_teacher(fn, dog_xy, dog_heading, sheep_positions, pen_target,
|
||||
drive_mode="differential"):
|
||||
"""Call any teacher function and return (vx, vy, omega, mode).
|
||||
|
||||
Normalizes across 3-tuple teachers (vx, vy, mode) and 4-tuple
|
||||
universal teacher (vx, vy, omega, mode). ActiveScanTeacher (when
|
||||
invoked with drive_mode="mecanum") propagates the base teacher's
|
||||
omega — see test_active_scan_preserves_mecanum_omega.
|
||||
"""
|
||||
# The universal teacher and ActiveScanTeacher accept the extended
|
||||
# (dog_xy, heading, sheep, pen, drive_mode) signature. Older
|
||||
# teachers accept (dog_xy, sheep, pen). Detect by trying the
|
||||
# extended call first.
|
||||
try:
|
||||
result = fn(dog_xy, dog_heading, sheep_positions, pen_target,
|
||||
drive_mode)
|
||||
except TypeError:
|
||||
try:
|
||||
result = fn(dog_xy, dog_heading, sheep_positions, pen_target)
|
||||
except TypeError:
|
||||
result = fn(dog_xy, sheep_positions, pen_target)
|
||||
|
||||
if len(result) == 4:
|
||||
return result # (vx, vy, omega, mode)
|
||||
vx, vy, mode = result
|
||||
return vx, vy, 0.0, mode
|
||||
|
||||
|
||||
def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
|
||||
teacher_fn, frame_stack: int = 1, privileged: bool = False):
|
||||
teacher_fn, frame_stack: int = 1, privileged: bool = False,
|
||||
drive_mode: str = "differential"):
|
||||
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
||||
difficulty=1.0, seed=seed, frame_stack=frame_stack)
|
||||
difficulty=1.0, seed=seed, frame_stack=frame_stack,
|
||||
drive_mode=drive_mode)
|
||||
obs, _ = env.reset(seed=seed)
|
||||
obs_list, action_list = [], []
|
||||
# Wrap the base teacher so it opens with a rotation and walks to
|
||||
# centre when the tracker briefly empties — matches the student.
|
||||
scan_teacher = ActiveScanTeacher(teacher_fn)
|
||||
for step in range(max_steps):
|
||||
if privileged:
|
||||
# Asymmetric variant: teacher reads ground truth while the
|
||||
# student keeps the LiDAR obs. Default off.
|
||||
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
|
||||
for i in range(env.n_sheep) if not env.sheep_penned[i]}
|
||||
if not positions:
|
||||
break
|
||||
vx, vy, _mode = teacher_fn(
|
||||
(env.dog_x, env.dog_y), positions, PEN_ENTRY,
|
||||
vx, vy, omega, _mode = _call_teacher(
|
||||
teacher_fn, (env.dog_x, env.dog_y), env.dog_heading,
|
||||
positions, PEN_ENTRY, drive_mode,
|
||||
)
|
||||
else:
|
||||
positions = env.perceived_positions()
|
||||
vx, vy, _mode = scan_teacher(
|
||||
(env.dog_x, env.dog_y), env.dog_heading,
|
||||
positions, PEN_ENTRY,
|
||||
result = _call_teacher(
|
||||
scan_teacher, (env.dog_x, env.dog_y), env.dog_heading,
|
||||
positions, PEN_ENTRY, drive_mode,
|
||||
)
|
||||
action = np.array([vx, vy], dtype=np.float32)
|
||||
vx, vy, omega, _mode = result
|
||||
if drive_mode == "mecanum":
|
||||
action = np.array([vx, vy, omega], dtype=np.float32)
|
||||
else:
|
||||
action = np.array([vx, vy], dtype=np.float32)
|
||||
if step % subsample == 0:
|
||||
obs_list.append(obs.copy())
|
||||
action_list.append(action.copy())
|
||||
@@ -85,7 +136,7 @@ def main():
|
||||
help="Keep every Nth (obs, action) pair.")
|
||||
parser.add_argument("--keep-failures", action="store_true",
|
||||
help="Include partial-success trajectories. Default off.")
|
||||
parser.add_argument("--teacher", default="sequential",
|
||||
parser.add_argument("--teacher", default="universal",
|
||||
choices=list(TEACHERS.keys()),
|
||||
help="Which analytic teacher to demonstrate.")
|
||||
parser.add_argument("--frame-stack", type=int, default=1,
|
||||
@@ -94,9 +145,24 @@ def main():
|
||||
parser.add_argument("--privileged", action="store_true",
|
||||
help="Teacher reads ground truth instead of "
|
||||
"tracker output (asymmetric BC).")
|
||||
parser.add_argument("--drive-mode", default="differential",
|
||||
choices=["differential", "mecanum"],
|
||||
help="Drive mode for the dog robot.")
|
||||
parser.add_argument("--world", default=None,
|
||||
choices=["field", "field_round"],
|
||||
help="World shape. If not set, uses HERDING_WORLD "
|
||||
"env var or defaults to 'field'. Must be set "
|
||||
"before geometry is imported.")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate --world matches geometry (already configured by the
|
||||
# early pre-parse above, or by HERDING_WORLD env var).
|
||||
if args.world is not None and args.world != FIELD_SHAPE:
|
||||
print(f"[demos] WARNING: --world={args.world} but geometry is "
|
||||
f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
|
||||
|
||||
teacher_fn = TEACHERS[args.teacher]
|
||||
print(f"[demos] teacher: {args.teacher}")
|
||||
print(f"[demos] teacher: {args.teacher} world: {FIELD_SHAPE}")
|
||||
|
||||
n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
|
||||
print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
|
||||
@@ -111,6 +177,7 @@ def main():
|
||||
obs, actions, success, total_steps = collect_one(
|
||||
n, seed, args.max_steps, args.subsample, teacher_fn,
|
||||
frame_stack=args.frame_stack, privileged=args.privileged,
|
||||
drive_mode=args.drive_mode,
|
||||
)
|
||||
n_total += 1
|
||||
if success:
|
||||
|
||||
Binary file not shown.
+19
-3
@@ -35,14 +35,15 @@ from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
def build_model(net_arch_pi, net_arch_vf, log_std_init: float,
|
||||
frame_stack: int = 1):
|
||||
frame_stack: int = 1, drive_mode: str = "differential"):
|
||||
"""Build a fresh SB3 PPO solely as a vehicle for the policy weights.
|
||||
|
||||
PPO's training-loop plumbing isn't used during BC. ``frame_stack``
|
||||
must match the demo file so the env's obs space agrees with the
|
||||
recorded obs shape.
|
||||
"""
|
||||
env = DummyVecEnv([lambda: HerdingEnv(frame_stack=frame_stack)])
|
||||
env = DummyVecEnv([lambda: HerdingEnv(frame_stack=frame_stack,
|
||||
drive_mode=drive_mode)])
|
||||
model = PPO(
|
||||
"MlpPolicy", env,
|
||||
policy_kwargs=dict(
|
||||
@@ -83,6 +84,10 @@ def main():
|
||||
"term; balances against MSE.")
|
||||
parser.add_argument("--seed", type=int, default=0)
|
||||
parser.add_argument("--device", default="cpu")
|
||||
parser.add_argument("--drive-mode", default=None,
|
||||
choices=["differential", "mecanum"],
|
||||
help="Drive mode. If not set, inferred from "
|
||||
"demo action dimension (2→differential, 3→mecanum).")
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.manual_seed(args.seed)
|
||||
@@ -130,8 +135,19 @@ def main():
|
||||
frame_stack = obs_dim // _SINGLE
|
||||
if frame_stack > 1:
|
||||
print(f"[bc] inferred frame_stack={frame_stack} from demo obs dim {obs_dim}")
|
||||
|
||||
# Infer drive mode from action dimension if not explicitly set.
|
||||
action_dim = actions.shape[1]
|
||||
if args.drive_mode is not None:
|
||||
drive_mode = args.drive_mode
|
||||
elif action_dim == 3:
|
||||
drive_mode = "mecanum"
|
||||
else:
|
||||
drive_mode = "differential"
|
||||
print(f"[bc] drive_mode={drive_mode} (action_dim={action_dim})")
|
||||
|
||||
model, _env = build_model(net_arch_pi, net_arch_vf, args.log_std_init,
|
||||
frame_stack=frame_stack)
|
||||
frame_stack=frame_stack, drive_mode=drive_mode)
|
||||
policy = model.policy.to(args.device)
|
||||
optimizer = optim.Adam(policy.parameters(), lr=args.lr)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user