Checkpoint 3
This commit is contained in:
@@ -1,22 +0,0 @@
|
||||
"""
|
||||
Viewpoint inspector — prints position, orientation and FOV to the console
|
||||
once per second. Attach as the controller of a dummy supervisor robot to
|
||||
copy-paste exact camera values into field.wbt.
|
||||
"""
|
||||
|
||||
from controller import Supervisor
|
||||
|
||||
robot = Supervisor()
|
||||
timestep = int(robot.getBasicTimeStep())
|
||||
vp = robot.getFromDef("VIEWPOINT")
|
||||
|
||||
step = 0
|
||||
while robot.step(timestep) != -1:
|
||||
if step % 60 == 0:
|
||||
pos = vp.getField("position").getSFVec3f()
|
||||
ori = vp.getField("orientation").getSFRotation()
|
||||
fov = vp.getField("fieldOfView").getSFFloat()
|
||||
print(f"position: {pos[0]:.3f} {pos[1]:.3f} {pos[2]:.3f}")
|
||||
print(f"orientation: {ori[0]:.3f} {ori[1]:.3f} {ori[2]:.3f} {ori[3]:.3f}")
|
||||
print(f"fieldOfView: {fov:.3f}\n")
|
||||
step += 1
|
||||
+17
-4
@@ -27,11 +27,19 @@ if _PROJECT_ROOT not in sys.path:
|
||||
import numpy as np
|
||||
|
||||
from herding.geometry import PEN_ENTRY
|
||||
from herding.sequential import compute_action
|
||||
from herding.sequential import compute_action as sequential_action
|
||||
from herding.strombom import compute_action as strombom_action
|
||||
from training.herding_env import HerdingEnv
|
||||
|
||||
|
||||
def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int):
|
||||
TEACHERS = {
|
||||
"sequential": sequential_action,
|
||||
"strombom": strombom_action,
|
||||
}
|
||||
|
||||
|
||||
def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
|
||||
teacher_fn):
|
||||
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
||||
difficulty=1.0, seed=seed)
|
||||
obs, _ = env.reset(seed=seed)
|
||||
@@ -41,7 +49,7 @@ def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int):
|
||||
for i in range(env.n_sheep) if not env.sheep_penned[i]}
|
||||
if not positions:
|
||||
break
|
||||
vx, vy, _mode = compute_action(
|
||||
vx, vy, _mode = teacher_fn(
|
||||
(env.dog_x, env.dog_y), positions, PEN_ENTRY,
|
||||
)
|
||||
action = np.array([vx, vy], dtype=np.float32)
|
||||
@@ -70,7 +78,12 @@ def main():
|
||||
help="Keep every Nth (obs, action) pair.")
|
||||
parser.add_argument("--keep-failures", action="store_true",
|
||||
help="Include partial-success trajectories. Default off.")
|
||||
parser.add_argument("--teacher", default="sequential",
|
||||
choices=list(TEACHERS.keys()),
|
||||
help="Which analytic teacher to demonstrate.")
|
||||
args = parser.parse_args()
|
||||
teacher_fn = TEACHERS[args.teacher]
|
||||
print(f"[demos] teacher: {args.teacher}")
|
||||
|
||||
n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
|
||||
print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
|
||||
@@ -83,7 +96,7 @@ def main():
|
||||
for n in n_sheep_list:
|
||||
for seed in range(args.seeds_per_n):
|
||||
obs, actions, success, total_steps = collect_one(
|
||||
n, seed, args.max_steps, args.subsample,
|
||||
n, seed, args.max_steps, args.subsample, teacher_fn,
|
||||
)
|
||||
n_total += 1
|
||||
if success:
|
||||
|
||||
+3
-3
@@ -15,7 +15,7 @@
|
||||
# tools/run_webots.sh 3 strombom # canonical baseline, 3 sheep
|
||||
#
|
||||
# Notes:
|
||||
# * The RL mode loads training/runs/bc_pretrained/policy.zip by default.
|
||||
# * The RL mode loads training/runs/bc_solo/policy.zip by default.
|
||||
# Override via HERDING_POLICY_DIR=/path/to/run env var.
|
||||
# * Conda env "tir" must be active (provides stable-baselines3 + torch).
|
||||
|
||||
@@ -46,12 +46,12 @@ echo "------------------------------------------------------------"
|
||||
echo "World : $DST"
|
||||
echo "Mode : $MODE"
|
||||
echo "Sheep : $active active"
|
||||
echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_pretrained}"
|
||||
echo "Policy dir : ${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_solo}"
|
||||
echo "------------------------------------------------------------"
|
||||
|
||||
# Webots strips HERDING_* env vars from controller subprocesses in some
|
||||
# setups, so we also write a runtime config file the controller reads.
|
||||
RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_pretrained}"
|
||||
RESOLVED_POLICY_DIR="${HERDING_POLICY_DIR:-$ROOT/training/runs/bc_solo}"
|
||||
cat > "$ROOT/herding_runtime.cfg" <<EOF
|
||||
HERDING_MODE=$MODE
|
||||
HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
|
||||
|
||||
Reference in New Issue
Block a user