Checkpoint 4
This commit is contained in:
+37
-9
@@ -26,12 +26,16 @@ if _PROJECT_ROOT not in sys.path:
|
||||
|
||||
import numpy as np
|
||||
|
||||
from herding.active_scan import ActiveScanTeacher
|
||||
from herding.geometry import PEN_ENTRY
|
||||
from herding.sequential import compute_action as sequential_action
|
||||
from herding.strombom import compute_action as strombom_action
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
# Base analytic teachers (no scanning). The default at demo-collection
|
||||
# time wraps these in ActiveScanTeacher, which under LiDAR makes the
|
||||
# teacher operate on the same partial obs as the student.
|
||||
TEACHERS = {
|
||||
"sequential": sequential_action,
|
||||
"strombom": strombom_action,
|
||||
@@ -39,19 +43,34 @@ TEACHERS = {
|
||||
|
||||
|
||||
def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
|
||||
teacher_fn):
|
||||
teacher_fn, frame_stack: int = 1, privileged: bool = False):
|
||||
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
||||
difficulty=1.0, seed=seed)
|
||||
difficulty=1.0, seed=seed, frame_stack=frame_stack)
|
||||
obs, _ = env.reset(seed=seed)
|
||||
obs_list, action_list = [], []
|
||||
# Active-scan wrapper: scan first, then run the base teacher on the
|
||||
# tracker dict. Reset state per episode so the opening scan kicks in.
|
||||
scan_teacher = ActiveScanTeacher(teacher_fn)
|
||||
for step in range(max_steps):
|
||||
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
|
||||
for i in range(env.n_sheep) if not env.sheep_penned[i]}
|
||||
if not positions:
|
||||
break
|
||||
vx, vy, _mode = teacher_fn(
|
||||
(env.dog_x, env.dog_y), positions, PEN_ENTRY,
|
||||
)
|
||||
if privileged:
|
||||
# Asymmetric "learning by cheating": teacher reads GT, student
|
||||
# gets LiDAR obs. Kept available for ablation; default off.
|
||||
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
|
||||
for i in range(env.n_sheep) if not env.sheep_penned[i]}
|
||||
if not positions:
|
||||
break
|
||||
vx, vy, _mode = teacher_fn(
|
||||
(env.dog_x, env.dog_y), positions, PEN_ENTRY,
|
||||
)
|
||||
else:
|
||||
# Matched-perception teacher: it sees what the student sees
|
||||
# (the tracker dict), with active scanning to fill the
|
||||
# tracker before driving.
|
||||
positions = env.perceived_positions()
|
||||
vx, vy, _mode = scan_teacher(
|
||||
(env.dog_x, env.dog_y), env.dog_heading,
|
||||
positions, PEN_ENTRY,
|
||||
)
|
||||
action = np.array([vx, vy], dtype=np.float32)
|
||||
if step % subsample == 0:
|
||||
obs_list.append(obs.copy())
|
||||
@@ -81,6 +100,14 @@ def main():
|
||||
parser.add_argument("--teacher", default="sequential",
|
||||
choices=list(TEACHERS.keys()),
|
||||
help="Which analytic teacher to demonstrate.")
|
||||
parser.add_argument("--frame-stack", type=int, default=1,
|
||||
help="K — concatenate the last K env obs into a "
|
||||
"single (32·K)-D vector. Lets a memoryless "
|
||||
"MLP recover temporal info under partial "
|
||||
"LiDAR observability.")
|
||||
parser.add_argument("--privileged", action="store_true",
|
||||
help="Teacher reads ground truth (asymmetric BC). "
|
||||
"Default: matched-perception with active scan.")
|
||||
args = parser.parse_args()
|
||||
teacher_fn = TEACHERS[args.teacher]
|
||||
print(f"[demos] teacher: {args.teacher}")
|
||||
@@ -97,6 +124,7 @@ def main():
|
||||
for seed in range(args.seeds_per_n):
|
||||
obs, actions, success, total_steps = collect_one(
|
||||
n, seed, args.max_steps, args.subsample, teacher_fn,
|
||||
frame_stack=args.frame_stack, privileged=args.privileged,
|
||||
)
|
||||
n_total += 1
|
||||
if success:
|
||||
|
||||
Reference in New Issue
Block a user