Compare commits
112 Commits
main
..
0a27ad9a26
| Author | SHA1 | Date | |
|---|---|---|---|
| 0a27ad9a26 | |||
| 07d1ece3d4 | |||
| 62ea811655 | |||
| 27c0f65722 | |||
| 1df84ae4b5 | |||
| e86fee5ae8 | |||
| bdaff6a3e1 | |||
| eadeeafb32 | |||
| cfbf4a0267 | |||
| d00da52c3c | |||
| 7ab69ab0f3 | |||
| 10c01a938e | |||
| a584a034e9 | |||
| 3b4c99a6c4 | |||
| ee77c8606c | |||
| b3cf9909a8 | |||
| 1c197e0ff7 | |||
| 03b2df5656 | |||
| 2d23289052 | |||
| 876e14e74f | |||
| dd5ac669e5 | |||
| c61df91950 | |||
| aa598fcb83 | |||
| 0f807003a5 | |||
| 683de740af | |||
| be58ad2054 | |||
| 5c2ee4bba5 | |||
| a01a5c9cef | |||
| fce0e0c786 | |||
| b457155538 | |||
| 6688325d89 | |||
| 2a6db038df | |||
| 1bb9415414 | |||
| 90aa3bbcb4 | |||
| 80a314b9e9 | |||
| a2363d882f | |||
| 57b1735e1a | |||
| deeae3193e | |||
| 1af7d03ce2 | |||
| 8110fc3143 | |||
| ad185b4d7e | |||
| 27fe6d1bf5 | |||
| e2883212c5 | |||
| 11e13c6980 | |||
| a561f8a697 | |||
| a44ddb7b08 | |||
| acf0810425 | |||
| 3cfd6b5e81 | |||
| d1aab20322 | |||
| 287743709a | |||
| 61f8a7db15 | |||
| b031473758 | |||
| 6253850620 | |||
| 6612dbc1ba | |||
| 7b87908410 | |||
| e302c76886 | |||
| 841f5fa520 | |||
| 7bfb7d3aae | |||
| 5005128c07 | |||
| 16878c5a0b | |||
| 75d030cb49 | |||
| cc6d72e472 | |||
| 3a5decb185 | |||
| 75c5b7c014 | |||
| 4350c7d320 | |||
| cd7e62b1b2 | |||
| 9bbef28515 | |||
| 438fa1be1d | |||
| e7c1d82f5c | |||
| f889dc78cc | |||
| 19bfac9bd9 | |||
| 02b20fbdb4 | |||
| 433652cb94 | |||
| fbe76a0d04 | |||
| 062de676c9 | |||
| 7d5725cc3e | |||
| 5a61a424ee | |||
| c029c3fc6c | |||
| b77f36b713 | |||
| 0716c6c3c8 | |||
| b3251fcca3 | |||
| d599181d22 | |||
| 8b54b2a934 | |||
| eb29cdf402 | |||
| 36b3216c5f | |||
| 7bb545eab6 | |||
| efe996a5a9 | |||
| 3bac24f406 | |||
| fc961e651c | |||
| 65d881aa0f | |||
| bf9fe902d9 | |||
| 4d7f365358 | |||
| c2da9c10e4 | |||
| d8b4e2c042 | |||
| e0426bf320 | |||
| 3574d57ba2 | |||
| 58d773cb7c | |||
| fe5174e0bd | |||
| 678d757fe8 | |||
| 44b2788e78 | |||
| bdbe8ba1de | |||
| fcfa2c35c8 | |||
| 17eb25864e | |||
| 4189cc8dba | |||
| 1e3b67d194 | |||
| f68dea44da | |||
| a13f5d0ff0 | |||
| 81dc2aca01 | |||
| fdac0ae0b0 | |||
| 9e13eb060d | |||
| ea6e66b16c | |||
| ffbfaa3977 |
+35
-2
@@ -1,2 +1,35 @@
|
|||||||
# Stuff
|
# Python
|
||||||
_example/
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# Training artefacts: ignore all run outputs except deployable policies
|
||||||
|
training/runs/**
|
||||||
|
!training/runs/
|
||||||
|
!training/runs/.gitkeep
|
||||||
|
!training/runs/*/
|
||||||
|
!training/runs/*/policy.zip
|
||||||
|
|
||||||
|
# BC demo blobs — these get regenerated by `python -m training.bc.collect`
|
||||||
|
# and are too large to track. Keep them out of git.
|
||||||
|
training/bc/*.npz
|
||||||
|
training/bc/v1/
|
||||||
|
|
||||||
|
# Webots launcher scratch (the _test.wbt files are emitted on every run)
|
||||||
|
worlds/**
|
||||||
|
!worlds/field.wbt
|
||||||
|
!worlds/field_round.wbt
|
||||||
|
herding_runtime.cfg
|
||||||
|
|
||||||
|
# Runtime logs — all of these are produced by training/eval/webots runs
|
||||||
|
# and are not useful to track in git. Keep summary docs/markdown only.
|
||||||
|
*.log
|
||||||
|
*.stdout
|
||||||
|
calibrate_mecanum.log
|
||||||
|
training/.run_done
|
||||||
|
|
||||||
|
# Local-only training backups (never committed).
|
||||||
|
_backup_pretrain/
|
||||||
|
|
||||||
|
# Tooling
|
||||||
|
.claude/
|
||||||
|
|||||||
@@ -0,0 +1,355 @@
|
|||||||
|
# Training pipeline for the shepherd-dog herding project.
|
||||||
|
# Stages chain via output files in training/.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# make # full pipeline: bc_demos -> bc -> rl -> eval
|
||||||
|
# make bc_demos # generate sim demos
|
||||||
|
# make bc # behaviour clone (rebuilds bc_demos if missing)
|
||||||
|
# make rl # KL-PPO fine-tune (rebuilds bc if missing)
|
||||||
|
# make eval # 10-seed env eval of rl
|
||||||
|
# make test # pytest suite
|
||||||
|
# make webots N=10 MODE=rl # launch Webots in the chosen mode
|
||||||
|
# WEBOTS_HEADLESS=1 make webots # no 3D view, fast mode (still needs DISPLAY or xvfb-run)
|
||||||
|
# make clean # delete bc_demos and run artefacts
|
||||||
|
# make clean_all # delete artefacts for all combinations
|
||||||
|
# make help # print the target table
|
||||||
|
#
|
||||||
|
# Override any hyperparameter on the command line, for example:
|
||||||
|
# make rl PPO_STEPS=2000000 KL=0.02
|
||||||
|
# make eval EVAL_SEEDS=20
|
||||||
|
#
|
||||||
|
# Drive mode selects the locomotion model:
|
||||||
|
# make DRIVE=differential 2-wheel diff-drive (default)
|
||||||
|
# make DRIVE=mecanum 4-wheel omnidirectional
|
||||||
|
#
|
||||||
|
# World shape:
|
||||||
|
# make WORLD=field rectangular (default)
|
||||||
|
# make WORLD=field_round circular fence
|
||||||
|
#
|
||||||
|
# To train all 4 combinations:
|
||||||
|
# make train_all
|
||||||
|
|
||||||
|
|
||||||
|
PY := python
|
||||||
|
|
||||||
|
# Drive mode and world shape — each combination gets its own artefacts.
|
||||||
|
DRIVE ?= differential
|
||||||
|
WORLD ?= field
|
||||||
|
|
||||||
|
# Derived tag and paths.
|
||||||
|
TAG = $(DRIVE)_$(WORLD)
|
||||||
|
BC_DEMOS = training/bc/demos_$(TAG).npz
|
||||||
|
BC_DIR = training/runs/bc_$(TAG)
|
||||||
|
RL_DIR = training/runs/rl_$(TAG)
|
||||||
|
# Stage-2 "speed pass": continue PPO from RL_DIR with TIME_W < 0 so the
|
||||||
|
# policy keeps Stage-1's success rate but cuts time-to-pen. Output is a
|
||||||
|
# separate run dir so Stage-1 stays comparable.
|
||||||
|
RL_FAST_DIR = training/runs/rl_fast_$(TAG)
|
||||||
|
BC_POLICY = $(BC_DIR)/policy.zip
|
||||||
|
RL_POLICY = $(RL_DIR)/policy.zip
|
||||||
|
RL_FAST_POLICY = $(RL_FAST_DIR)/policy.zip
|
||||||
|
|
||||||
|
# --- Demo collection ---
|
||||||
|
TEACHER ?= universal
|
||||||
|
# Mecanum has more complex dynamics and a weaker teacher imitation signal
|
||||||
|
# (val_cos ≈ 0.70 vs ≥ 0.88 for differential). Give it more demos and
|
||||||
|
# longer BC training to compensate.
|
||||||
|
ifeq ($(DRIVE),mecanum)
|
||||||
|
ifeq ($(WORLD),field_round)
|
||||||
|
SEEDS_PER_N ?= 80
|
||||||
|
else
|
||||||
|
SEEDS_PER_N ?= 50
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
# Round field is harder; more demos give BC a fair shot at 60%+.
|
||||||
|
ifeq ($(WORLD),field_round)
|
||||||
|
SEEDS_PER_N ?= 60
|
||||||
|
else
|
||||||
|
SEEDS_PER_N ?= 25
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
SUBSAMPLE ?= 3
|
||||||
|
FRAME_STACK ?= 4
|
||||||
|
DEMO_MAX_STEPS ?= 100000
|
||||||
|
|
||||||
|
# --- Behaviour cloning ---
|
||||||
|
ifeq ($(DRIVE),mecanum)
|
||||||
|
ifeq ($(WORLD),field_round)
|
||||||
|
BC_EPOCHS ?= 200
|
||||||
|
else
|
||||||
|
BC_EPOCHS ?= 100
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
ifeq ($(WORLD),field_round)
|
||||||
|
BC_EPOCHS ?= 150
|
||||||
|
else
|
||||||
|
BC_EPOCHS ?= 60
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
BC_NET_ARCH ?= 512,512
|
||||||
|
|
||||||
|
# --- Domain randomisation (used by bc_demos and rl targets) ---
|
||||||
|
# FP_RATE: mean false-positive detections injected per step (Poisson λ).
|
||||||
|
# ACTION_SMOOTH_TRAIN: EMA on actions to match Webots controller (0.55).
|
||||||
|
# WHEEL_SLIP_STD: Gaussian wheel-speed noise for mecanum dynamics gap.
|
||||||
|
#
|
||||||
|
# FP_RATE is used consistently in BC demos *and* RL: BC collection runs
|
||||||
|
# in PRIVILEGED mode (teacher sees GT; student obs sees the FP-injected
|
||||||
|
# tracker output), so the policy learns to denoise to the GT signal.
|
||||||
|
# Mismatched FP_RATE between BC/RL was the root cause of an earlier
|
||||||
|
# regression (BC=0, RL=2 → PPO stalled at 0% success).
|
||||||
|
FP_RATE ?= 0.0
|
||||||
|
ACTION_SMOOTH_TRAIN ?= 0.55
|
||||||
|
WHEEL_SLIP_STD ?= 0.05
|
||||||
|
|
||||||
|
# --- KL-PPO fine-tune ---
|
||||||
|
# Round field: longer training, looser KL, no time penalty (success
|
||||||
|
# must be learned before speed is rewarded).
|
||||||
|
ifeq ($(WORLD),field_round)
|
||||||
|
PPO_STEPS ?= 4000000
|
||||||
|
KL ?= 0.02
|
||||||
|
else
|
||||||
|
PPO_STEPS ?= 2000000
|
||||||
|
KL ?= 0.05
|
||||||
|
endif
|
||||||
|
# Time penalty is 0 until success rate is high. Earlier runs showed
|
||||||
|
# TIME_W=-0.05 traded ~10 pts of success for speed on hard combos —
|
||||||
|
# learn to succeed first, optimize speed in a later pass.
|
||||||
|
TIME_W ?= 0.0
|
||||||
|
IMITATE ?= 0.0
|
||||||
|
# PPO rollouts at full difficulty so the training distribution matches
|
||||||
|
# eval (deployment). Anything lower causes a train/eval mismatch that
|
||||||
|
# can make RL eval worse than BC.
|
||||||
|
DIFFICULTY ?= 1.0
|
||||||
|
|
||||||
|
# --- Stage-2 "speed pass" (rl_fast) ---
|
||||||
|
# Continues from RL_DIR with a negative TIME_W. Tighter KL keeps the
|
||||||
|
# policy near the Stage-1 success rate while step-count drops.
|
||||||
|
# Differential and mecanum respond differently: mecanum needs a stronger
|
||||||
|
# time penalty to achieve speed gains; differential only needs a light
|
||||||
|
# touch (-0.02) — stronger penalties trade success for speed without gain.
|
||||||
|
RL_FAST_STEPS ?= 1000000
|
||||||
|
RL_FAST_KL ?= 0.05
|
||||||
|
ifeq ($(DRIVE),mecanum)
|
||||||
|
RL_FAST_TIME_W ?= -0.05
|
||||||
|
else
|
||||||
|
RL_FAST_TIME_W ?= -0.02
|
||||||
|
endif
|
||||||
|
|
||||||
|
# --- Evaluation ---
|
||||||
|
EVAL_SEEDS ?= 10
|
||||||
|
EVAL_MAX_STEPS ?= 15000
|
||||||
|
|
||||||
|
# --- Webots launcher ---
|
||||||
|
N ?= 10
|
||||||
|
MODE ?= rl
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: all bc_demos bc rl rl_fast eval eval_fast eval_all eval_all_fast \
|
||||||
|
test webots webots_sweep clean clean_all help \
|
||||||
|
train_all train_diff_rect train_diff_round \
|
||||||
|
train_mec_rect train_mec_round \
|
||||||
|
train_all_fast train_diff_rect_fast train_diff_round_fast \
|
||||||
|
train_mec_rect_fast train_mec_round_fast \
|
||||||
|
remote_full
|
||||||
|
|
||||||
|
all: eval
|
||||||
|
|
||||||
|
# Export HERDING_WORLD so that geometry.py picks it up at import time.
|
||||||
|
export HERDING_WORLD = $(WORLD)
|
||||||
|
# Force Python stdout/stderr unbuffered so progress is visible live when
|
||||||
|
# the build is run under tee / nohup / tmux pipes.
|
||||||
|
export PYTHONUNBUFFERED = 1
|
||||||
|
|
||||||
|
# Mecanum needs --use-webots-preset so collect/rl pick up
|
||||||
|
# HERDING_MEC_WEBOTS — the gym mecanum kinematics get the strafe
|
||||||
|
# efficiency and forward-bleed match against the physical-roller
|
||||||
|
# Webots proto. Without this flag the policy trains on textbook
|
||||||
|
# X-pattern mecanum and fails on deployment.
|
||||||
|
ifeq ($(DRIVE),mecanum)
|
||||||
|
WEBOTS_PRESET_FLAG = --use-webots-preset
|
||||||
|
else
|
||||||
|
WEBOTS_PRESET_FLAG =
|
||||||
|
endif
|
||||||
|
|
||||||
|
bc_demos: $(BC_DEMOS)
|
||||||
|
$(BC_DEMOS):
|
||||||
|
$(PY) -m training.bc.collect \
|
||||||
|
--teacher $(TEACHER) --out $(BC_DEMOS) \
|
||||||
|
--seeds-per-n $(SEEDS_PER_N) --subsample $(SUBSAMPLE) \
|
||||||
|
--frame-stack $(FRAME_STACK) --drive-mode $(DRIVE) \
|
||||||
|
--world $(WORLD) \
|
||||||
|
--max-steps $(DEMO_MAX_STEPS) \
|
||||||
|
--fp-rate $(FP_RATE) \
|
||||||
|
--action-smooth $(ACTION_SMOOTH_TRAIN) \
|
||||||
|
--wheel-slip-std $(WHEEL_SLIP_STD) \
|
||||||
|
$(WEBOTS_PRESET_FLAG)
|
||||||
|
|
||||||
|
bc: $(BC_POLICY)
|
||||||
|
$(BC_POLICY): $(BC_DEMOS)
|
||||||
|
$(PY) -m training.bc.pretrain \
|
||||||
|
--demos $(BC_DEMOS) --out $(BC_DIR) \
|
||||||
|
--epochs $(BC_EPOCHS) --net-arch $(BC_NET_ARCH)
|
||||||
|
|
||||||
|
rl: $(RL_POLICY)
|
||||||
|
$(RL_POLICY): $(BC_POLICY)
|
||||||
|
$(PY) -m training.rl.train \
|
||||||
|
--bc $(BC_DIR) --out $(RL_DIR) \
|
||||||
|
--total-timesteps $(PPO_STEPS) --kl-coef $(KL) \
|
||||||
|
--imitate-weight $(IMITATE) --time-weight $(TIME_W) \
|
||||||
|
--difficulty $(DIFFICULTY) \
|
||||||
|
--drive-mode $(DRIVE) --world $(WORLD) \
|
||||||
|
--fp-rate $(FP_RATE) \
|
||||||
|
--action-smooth $(ACTION_SMOOTH_TRAIN) \
|
||||||
|
--wheel-slip-std $(WHEEL_SLIP_STD)
|
||||||
|
# (rl/train.py auto-applies HERDING_MEC_WEBOTS when drive=mecanum;
|
||||||
|
# no --use-webots-preset flag needed.)
|
||||||
|
|
||||||
|
eval: $(RL_POLICY)
|
||||||
|
$(PY) -m training.eval --policy $(RL_DIR) \
|
||||||
|
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
|
||||||
|
--drive-mode $(DRIVE) --world $(WORLD)
|
||||||
|
|
||||||
|
# --- Stage-2 speed pass ---
|
||||||
|
# Continues PPO from $(RL_DIR) with a per-step time penalty so the
|
||||||
|
# policy keeps Stage-1's success rate but cuts mean steps-to-pen. Use
|
||||||
|
# `make rl_fast` after Stage-1 RL has converged (success ≥ teacher).
|
||||||
|
rl_fast: $(RL_FAST_POLICY)
|
||||||
|
$(RL_FAST_POLICY): $(RL_POLICY)
|
||||||
|
$(PY) -m training.rl.train \
|
||||||
|
--bc $(RL_DIR) --out $(RL_FAST_DIR) \
|
||||||
|
--total-timesteps $(RL_FAST_STEPS) --kl-coef $(RL_FAST_KL) \
|
||||||
|
--imitate-weight $(IMITATE) --time-weight $(RL_FAST_TIME_W) \
|
||||||
|
--difficulty $(DIFFICULTY) \
|
||||||
|
--drive-mode $(DRIVE) --world $(WORLD) \
|
||||||
|
--fp-rate $(FP_RATE) \
|
||||||
|
--action-smooth $(ACTION_SMOOTH_TRAIN) \
|
||||||
|
--wheel-slip-std $(WHEEL_SLIP_STD)
|
||||||
|
|
||||||
|
eval_fast: $(RL_FAST_POLICY)
|
||||||
|
$(PY) -m training.eval --policy $(RL_FAST_DIR) \
|
||||||
|
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
|
||||||
|
--drive-mode $(DRIVE) --world $(WORLD)
|
||||||
|
|
||||||
|
test:
|
||||||
|
$(PY) -m pytest tests/
|
||||||
|
|
||||||
|
webots:
|
||||||
|
@bash tools/webots_menu.sh
|
||||||
|
|
||||||
|
# Headless sweep across all modes × worlds × flock sizes.
|
||||||
|
# Results are written to webots_sweep.log.
|
||||||
|
# Set USE_GT=1 to bypass LiDAR tracker (isolate perception from policy).
|
||||||
|
webots_sweep:
|
||||||
|
env $(if $(USE_GT),HERDING_USE_GT=1,) \
|
||||||
|
PATH="$(CONDA_PREFIX)/bin:$(PATH)" \
|
||||||
|
bash tools/webots_sweep.sh webots_sweep.log
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(BC_DEMOS)
|
||||||
|
rm -rf $(BC_DIR) $(RL_DIR)
|
||||||
|
|
||||||
|
clean_all:
|
||||||
|
rm -f training/bc/demos_*.npz
|
||||||
|
rm -rf training/runs/bc_* training/runs/rl_*
|
||||||
|
|
||||||
|
# --- Train all 4 combinations ---
|
||||||
|
train_diff_rect:
|
||||||
|
$(MAKE) DRIVE=differential WORLD=field
|
||||||
|
|
||||||
|
train_diff_round:
|
||||||
|
$(MAKE) DRIVE=differential WORLD=field_round
|
||||||
|
|
||||||
|
train_mec_rect:
|
||||||
|
$(MAKE) DRIVE=mecanum WORLD=field
|
||||||
|
|
||||||
|
train_mec_round:
|
||||||
|
$(MAKE) DRIVE=mecanum WORLD=field_round
|
||||||
|
|
||||||
|
train_all: train_diff_rect train_diff_round train_mec_rect train_mec_round
|
||||||
|
|
||||||
|
# Gym eval sweep over all 4 combos. Use after train_all / train_all_fast.
|
||||||
|
eval_all:
|
||||||
|
@for d in differential mecanum; do \
|
||||||
|
for w in field field_round; do \
|
||||||
|
echo ""; \
|
||||||
|
echo "=== BC $$d / $$w ==="; \
|
||||||
|
$(PY) -m training.eval --policy training/runs/bc_$${d}_$${w} \
|
||||||
|
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
|
||||||
|
--drive-mode $$d --world $$w; \
|
||||||
|
echo ""; \
|
||||||
|
echo "=== RL $$d / $$w ==="; \
|
||||||
|
$(PY) -m training.eval --policy training/runs/rl_$${d}_$${w} \
|
||||||
|
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
|
||||||
|
--drive-mode $$d --world $$w; \
|
||||||
|
done; \
|
||||||
|
done
|
||||||
|
|
||||||
|
# One-shot remote runbook: clean → Stage-1 train → Stage-1 eval → Stage-2
|
||||||
|
# train → Stage-2 eval. Each step pipes to its own log file in the repo
|
||||||
|
# root so the run is fully unattended.
|
||||||
|
remote_full:
|
||||||
|
$(MAKE) clean_all
|
||||||
|
$(MAKE) train_all 2>&1 | tee stage1_train.log
|
||||||
|
$(MAKE) eval_all 2>&1 | tee stage1_eval.log
|
||||||
|
$(MAKE) train_all_fast 2>&1 | tee stage2_train.log
|
||||||
|
$(MAKE) eval_all_fast 2>&1 | tee stage2_eval.log
|
||||||
|
@echo ""
|
||||||
|
@echo "===================================================="
|
||||||
|
@echo " Done. Logs: stage1_train.log stage1_eval.log"
|
||||||
|
@echo " stage2_train.log stage2_eval.log"
|
||||||
|
@echo "===================================================="
|
||||||
|
|
||||||
|
eval_all_fast:
|
||||||
|
@for d in differential mecanum; do \
|
||||||
|
for w in field field_round; do \
|
||||||
|
echo ""; \
|
||||||
|
echo "=== RL_FAST $$d / $$w ==="; \
|
||||||
|
$(PY) -m training.eval --policy training/runs/rl_fast_$${d}_$${w} \
|
||||||
|
--max-flock 10 --max-steps $(EVAL_MAX_STEPS) --n-seeds $(EVAL_SEEDS) \
|
||||||
|
--drive-mode $$d --world $$w; \
|
||||||
|
done; \
|
||||||
|
done
|
||||||
|
|
||||||
|
# --- Stage-2 sweep ---
|
||||||
|
train_diff_rect_fast:
|
||||||
|
$(MAKE) DRIVE=differential WORLD=field rl_fast
|
||||||
|
|
||||||
|
train_diff_round_fast:
|
||||||
|
$(MAKE) DRIVE=differential WORLD=field_round rl_fast
|
||||||
|
|
||||||
|
train_mec_rect_fast:
|
||||||
|
$(MAKE) DRIVE=mecanum WORLD=field rl_fast
|
||||||
|
|
||||||
|
train_mec_round_fast:
|
||||||
|
$(MAKE) DRIVE=mecanum WORLD=field_round rl_fast
|
||||||
|
|
||||||
|
train_all_fast: train_diff_rect_fast train_diff_round_fast \
|
||||||
|
train_mec_rect_fast train_mec_round_fast
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "Targets:"
|
||||||
|
@echo " make full pipeline (bc_demos -> bc -> rl -> eval)"
|
||||||
|
@echo " make bc_demos sim demos via the '$(TEACHER)' teacher"
|
||||||
|
@echo " make bc train BC (rebuilds bc_demos if missing)"
|
||||||
|
@echo " make rl KL-PPO fine-tune (rebuilds bc if missing)"
|
||||||
|
@echo " make eval $(EVAL_SEEDS)-seed env eval of rl"
|
||||||
|
@echo " make test pytest suite"
|
||||||
|
@echo " make webots [N=$(N)] [MODE=$(MODE)] [DRIVE=$(DRIVE)] [WORLD=$(WORLD)]"
|
||||||
|
@echo " launch Webots in the chosen mode"
|
||||||
|
@echo " WEBOTS_HEADLESS=1 make webots … no 3D view + fast + --batch"
|
||||||
|
@echo " make clean delete artefacts for current DRIVE+WORLD"
|
||||||
|
@echo " make clean_all delete artefacts for all combinations"
|
||||||
|
@echo ""
|
||||||
|
@echo "Combinations:"
|
||||||
|
@echo " make DRIVE=differential WORLD=field diff + rectangular (default)"
|
||||||
|
@echo " make DRIVE=differential WORLD=field_round diff + circular"
|
||||||
|
@echo " make DRIVE=mecanum WORLD=field mecanum + rectangular"
|
||||||
|
@echo " make DRIVE=mecanum WORLD=field_round mecanum + circular"
|
||||||
|
@echo " make train_all all 4 in sequence"
|
||||||
|
@echo ""
|
||||||
|
@echo "Hyperparameter overrides (showing defaults):"
|
||||||
|
@echo " TEACHER=$(TEACHER) SEEDS_PER_N=$(SEEDS_PER_N) SUBSAMPLE=$(SUBSAMPLE) FRAME_STACK=$(FRAME_STACK) DEMO_MAX_STEPS=$(DEMO_MAX_STEPS)"
|
||||||
|
@echo " BC_EPOCHS=$(BC_EPOCHS) BC_NET_ARCH=$(BC_NET_ARCH)"
|
||||||
|
@echo " PPO_STEPS=$(PPO_STEPS) KL=$(KL) IMITATE=$(IMITATE) TIME_W=$(TIME_W)"
|
||||||
|
@echo " EVAL_SEEDS=$(EVAL_SEEDS) EVAL_MAX_STEPS=$(EVAL_MAX_STEPS)"
|
||||||
@@ -0,0 +1,252 @@
|
|||||||
|
# Autonomous Shepherd-Dog Herding (Webots + RL)
|
||||||
|
|
||||||
|
Group G25 — *Diogo Costa, Johnny Fernandes, Nelson Neto*
|
||||||
|
|
||||||
|
A shepherd dog that herds 1–10 sheep through a 3 m gate into an
|
||||||
|
external pen. Two worlds (`field` rectangular, `field_round` circular),
|
||||||
|
two drives (`differential`, `mecanum`), and four deployable control
|
||||||
|
modes:
|
||||||
|
|
||||||
|
| Mode | Source | Role |
|
||||||
|
|---|---|---|
|
||||||
|
| `strombom` | Strömbom et al. (2014) collect/drive heuristic | Analytic baseline |
|
||||||
|
| `sequential` | Single-target "pin-and-push" | Alternative analytic baseline |
|
||||||
|
| `bc` | Behaviour cloning of the universal teacher | Imitation learning result |
|
||||||
|
| `rl` | KL-regularised PPO fine-tune of `bc` | Reward-driven refinement |
|
||||||
|
|
||||||
|
## Perception
|
||||||
|
|
||||||
|
The dog perceives sheep **only through its front-mounted 140° LiDAR**
|
||||||
|
(180 rays, 12 m max range — see `protos/ShepherdDog.proto`). Each
|
||||||
|
control step:
|
||||||
|
|
||||||
|
1. Read `lidar.getRangeImage()`,
|
||||||
|
2. Cluster returns into world-frame `(x, y)` estimates
|
||||||
|
(`herding/perception/lidar_perception.py`),
|
||||||
|
3. Fold them into a multi-target tracker that maintains last-seen
|
||||||
|
positions for sheep currently outside the FOV
|
||||||
|
(`herding/perception/sheep_tracker.py`).
|
||||||
|
|
||||||
|
**LiDAR validation** (intermediate-goal item v from `docs/project.md`):
|
||||||
|
during development a diagnostic-dump controller captured 80 real
|
||||||
|
Webots scans plus the ground-truth sheep positions. Comparing
|
||||||
|
detections against GT showed clustered centroids match GT positions
|
||||||
|
within 0.15 m after the +SHEEP_RADIUS surface-to-centre correction —
|
||||||
|
i.e. the LiDAR pipeline produces correct sheep-position estimates
|
||||||
|
from the real Webots scan, validating the sensor for the herding
|
||||||
|
task.
|
||||||
|
|
||||||
|
The tracker outputs a `{name: (x, y)}` dict shaped exactly like the
|
||||||
|
prior receiver-based one, so Strömbom, Sequential, and the BC obs
|
||||||
|
builder all run unchanged on top of it. The 2D Gymnasium env
|
||||||
|
(`herding/perception/lidar_sim.py`) raycasts sheep discs at training time, so
|
||||||
|
demos collected in the env match the perception the deployed
|
||||||
|
controller sees in Webots.
|
||||||
|
|
||||||
|
Privileged ground-truth perception is available for ablation —
|
||||||
|
`HerdingEnv(use_lidar=False)`.
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Set up the Python env (any venv with PyTorch + SB3)
|
||||||
|
pip install -r training/requirements.txt
|
||||||
|
|
||||||
|
# 2. Smoke test (126 pytest cases, < 1 s)
|
||||||
|
make test
|
||||||
|
|
||||||
|
# 3. Reproduce a full pipeline (DRIVE+WORLD specific, ~1 h CPU)
|
||||||
|
make DRIVE=differential WORLD=field # demos -> bc -> rl -> eval
|
||||||
|
make DRIVE=differential WORLD=field_round
|
||||||
|
make DRIVE=mecanum WORLD=field # see note below
|
||||||
|
make train_all # all 4 combos sequentially
|
||||||
|
|
||||||
|
# Individual stages (each rebuilds upstream artefacts if missing):
|
||||||
|
make DRIVE=differential WORLD=field bc_demos # sim demos
|
||||||
|
make DRIVE=differential WORLD=field bc # behaviour clone
|
||||||
|
make DRIVE=differential WORLD=field rl # KL-PPO fine-tune
|
||||||
|
make DRIVE=differential WORLD=field eval # 10-seed env eval
|
||||||
|
|
||||||
|
# 4. Run in Webots — interactive picker (recommended starting point)
|
||||||
|
tools/webots_menu.sh
|
||||||
|
# Prompts for mode / drive / world / LiDAR FOV / number of dogs /
|
||||||
|
# flock size / perception (LiDAR vs GT) / headless, then dispatches.
|
||||||
|
|
||||||
|
# Or invoke the launcher directly:
|
||||||
|
tools/run_webots.sh 10 bc differential field # BC, diff, rect field
|
||||||
|
tools/run_webots.sh 10 rl differential field_round # RL, diff, round field
|
||||||
|
tools/run_webots.sh 5 strombom differential field # analytic baseline
|
||||||
|
HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
|
||||||
|
# GT bypass ablation
|
||||||
|
HERDING_LIDAR=360 tools/run_webots.sh 5 bc differential field
|
||||||
|
# 360° FOV ablation
|
||||||
|
HERDING_NDOGS=2 HERDING_AXIS_LEAK=0.3 tools/run_webots.sh 5 strombom differential field
|
||||||
|
# dual-shepherd axis split
|
||||||
|
```
|
||||||
|
|
||||||
|
`make help` lists every Makefile target and the overridable hyperparameters.
|
||||||
|
|
||||||
|
**Mecanum note**: the `ShepherdDogMecanum.proto` uses physical roller
|
||||||
|
hinges in Webots. The Webots calibration shows ~60% strafe efficiency
|
||||||
|
and ~28% backward bleed compared to textbook mecanum; the gym
|
||||||
|
kinematics in `HERDING_MEC_WEBOTS` are tuned to match. **Mecanum BC/RL
|
||||||
|
policies need to be retrained against this preset** — see the retrain
|
||||||
|
flow in the Mecanum results section below.
|
||||||
|
|
||||||
|
## Documentation map
|
||||||
|
|
||||||
|
- This README is the project overview: architecture, quick start, and
|
||||||
|
headline results.
|
||||||
|
- `training/README.md` has the command-level training and evaluation
|
||||||
|
details for demo collection, BC, PPO fine-tuning, and policy artifacts.
|
||||||
|
- `docs/project.md` is the original course proposal/goals document, kept
|
||||||
|
for traceability rather than as run instructions.
|
||||||
|
|
||||||
|
## Layout
|
||||||
|
|
||||||
|
```
|
||||||
|
herding/ — perception / control / world primitives
|
||||||
|
config.py — frozen dataclasses for all tunable parameters;
|
||||||
|
named presets HERDING_DEFAULT / HERDING_WEBOTS /
|
||||||
|
HERDING_MEC_WEBOTS
|
||||||
|
world/
|
||||||
|
geometry.py field/pen constants, world-shape switch
|
||||||
|
diffdrive.py differential + mecanum kinematics
|
||||||
|
flocking_sim.py Reynolds + Strömbom 2014 sheep dynamics
|
||||||
|
perception/
|
||||||
|
lidar_sim.py fast 2D raycast for the gym env
|
||||||
|
lidar_perception.py scan → world-frame cluster centroids + filters
|
||||||
|
sheep_tracker.py multi-target NN tracker with FOV memory
|
||||||
|
and the consensus-promotion stage
|
||||||
|
obs.py 32-D order-invariant observation builder
|
||||||
|
control/
|
||||||
|
strombom.py canonical CoM collect/drive heuristic
|
||||||
|
(round-world aware)
|
||||||
|
sequential.py single-target "pin-and-push" alternative
|
||||||
|
universal.py teacher used for BC demo collection
|
||||||
|
(Strömbom + mecanum omega + straggler recovery)
|
||||||
|
active_scan.py rotate-on-empty + walk-to-centre fallback
|
||||||
|
modulation.py shared near-sheep speed-modulation helper
|
||||||
|
|
||||||
|
controllers/
|
||||||
|
sheep/sheep.py — Webots sheep controller
|
||||||
|
shepherd_dog/
|
||||||
|
shepherd_dog.py — Webots dog controller, mode-switched
|
||||||
|
policy_loader.py — SB3 PPO / RecurrentPPO loader with frame stack
|
||||||
|
|
||||||
|
training/
|
||||||
|
herding_env.py — Gymnasium env (LiDAR + tracker by default)
|
||||||
|
bc/collect.py — sim demos via the active-scan teacher
|
||||||
|
bc/pretrain.py — supervised BC into MLP
|
||||||
|
rl/train.py — KL-regularised PPO fine-tune of BC
|
||||||
|
rl/train_lstm.py — RecurrentPPO variant (ablation)
|
||||||
|
eval.py — analytic + learned policy comparison harness
|
||||||
|
runs/ — checkpoints (whitelisted in .gitignore)
|
||||||
|
requirements.txt
|
||||||
|
|
||||||
|
tests/ — 126 pytest cases, < 1 s on CPU
|
||||||
|
|
||||||
|
tools/
|
||||||
|
run_webots.sh — launch Webots with N sheep + chosen mode + world
|
||||||
|
webots_sweep.sh — headless sweep across modes × drives × worlds
|
||||||
|
webots_sweep_gt.sh — same with HERDING_USE_GT=1 (perfect perception)
|
||||||
|
calibrate_mecanum.sh — measure mecanum body velocity vs gym prediction
|
||||||
|
gen_mecanum_wheels.py — regenerate the 32 mecanum roller hinges
|
||||||
|
benchmark_lidar.py — tracker quality benchmark
|
||||||
|
|
||||||
|
Makefile — pipeline orchestrator
|
||||||
|
(make DRIVE=… WORLD=… rl, make train_all, …)
|
||||||
|
|
||||||
|
worlds/
|
||||||
|
field.wbt — rectangular world (3 m gate, external pen)
|
||||||
|
field_round.wbt — circular world (radius 15 m, same pen)
|
||||||
|
|
||||||
|
protos/
|
||||||
|
Sheep.proto — sheep robot
|
||||||
|
ShepherdDog.proto — diff-drive dog, 140° LiDAR
|
||||||
|
ShepherdDog360.proto — diff-drive dog, 360° LiDAR (ablation)
|
||||||
|
ShepherdDogMecanum.proto — 4-wheel mecanum with physical roller hinges
|
||||||
|
|
||||||
|
docs/project.md — original course proposal/goals
|
||||||
|
```
|
||||||
|
|
||||||
|
## Shared low-level control
|
||||||
|
|
||||||
|
Every dog mode (Strömbom, Sequential, BC, RL) routes its action
|
||||||
|
through `herding/control/modulation.py:modulate_speed_near_sheep`,
|
||||||
|
which scales action magnitude down when within ~2.5 m of the nearest
|
||||||
|
tracked sheep. This stops the dog from charging in at full speed and
|
||||||
|
scattering the flock. Direction (intent) is preserved.
|
||||||
|
|
||||||
|
All modes also share the same EMA action smoother in
|
||||||
|
`controllers/shepherd_dog/shepherd_dog.py:ACTION_SMOOTH = 0.55`.
|
||||||
|
|
||||||
|
## Results — Webots end-to-end, canonical 140° LiDAR
|
||||||
|
|
||||||
|
Each cell = "OK at step X" means the dog penned all N sheep in a single
|
||||||
|
trial, `HERDING_USE_GT=0` (LiDAR perception, no ground truth bypass),
|
||||||
|
default consensus tracker.
|
||||||
|
|
||||||
|
### Differential drive
|
||||||
|
|
||||||
|
| Mode | World | n=5 | n=10 |
|
||||||
|
|---|---|---:|---:|
|
||||||
|
| Strömbom | field | 7528 | 11620 |
|
||||||
|
| Strömbom | field_round | 8611 | 10339 |
|
||||||
|
| Sequential | field | 7135 | 16843 |
|
||||||
|
| Sequential | field_round | 6019 | 8494 |
|
||||||
|
| BC | field | 11698 | 15079 |
|
||||||
|
| BC | field_round | 7234 | 11320 |
|
||||||
|
| RL | field | 10039 | 13954 |
|
||||||
|
| RL | field_round | 5803 | 9151 |
|
||||||
|
|
||||||
|
RL is **strictly faster than BC** on every comparable cell.
|
||||||
|
|
||||||
|
### LiDAR vs GT bypass (diff drive)
|
||||||
|
|
||||||
|
GT bypass replaces the LiDAR tracker with perfect emitter positions.
|
||||||
|
LiDAR is the default; GT is a perception ablation
|
||||||
|
(`HERDING_USE_GT=1`):
|
||||||
|
|
||||||
|
| Mode | World | n=5 LiDAR | n=5 GT | n=10 LiDAR | n=10 GT |
|
||||||
|
|---|---|---:|---:|---:|---:|
|
||||||
|
| Strömbom | field | 7528 | **5254** | 11620 | **7342** |
|
||||||
|
| Strömbom | field_round | 8611 | **3631** | 10339 | **7084** |
|
||||||
|
| Sequential | field | **7135** | 11092 | 16843 | **8698** |
|
||||||
|
| Sequential | field_round | 6019 | **3454** | 8494 | **7324** |
|
||||||
|
|
||||||
|
GT is generally faster (perfect perception → fewer wasted steps).
|
||||||
|
Sequential n=5 / field is the one cell where GT is *slower* — its
|
||||||
|
straggler heuristic appears to over-correct when the dog has full
|
||||||
|
information.
|
||||||
|
|
||||||
|
### Mecanum (differential is the headline)
|
||||||
|
|
||||||
|
`ShepherdDogMecanum.proto` has 32 physical roller hinges giving true
|
||||||
|
omnidirectional motion in Webots — `tools/calibrate_mecanum.sh`
|
||||||
|
confirms the X-pattern. Calibration shows ~60% strafe efficiency vs
|
||||||
|
textbook (versus ~89% on forward), so the gym needs to match the
|
||||||
|
imperfect physical mecanum for the trained policy to compensate.
|
||||||
|
`HERDING_MEC_WEBOTS` is the matched preset; `training/bc/collect.py`
|
||||||
|
and `training/rl/train.py` auto-select it for mecanum runs. Mecanum
|
||||||
|
policies were trained on the textbook gym, so they need to be
|
||||||
|
retrained against `HERDING_MEC_WEBOTS` (≈ 2 h per combo, 4 combos):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m training.bc.collect \
|
||||||
|
--drive-mode mecanum --world field --use-webots-preset \
|
||||||
|
--out training/bc/demos_mecanum_field.npz
|
||||||
|
python -m training.bc.pretrain \
|
||||||
|
--demos training/bc/demos_mecanum_field.npz \
|
||||||
|
--out training/runs/bc_mecanum_field
|
||||||
|
python -m training.rl.train \
|
||||||
|
--bc training/runs/bc_mecanum_field \
|
||||||
|
--out training/runs/rl_mecanum_field \
|
||||||
|
--drive-mode mecanum --world field --use-webots-preset
|
||||||
|
```
|
||||||
|
|
||||||
|
Repeat for `field_round`.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Educational project for the *Topics in Intelligent Robotics* course.
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
"""Backwards-compat shim — flocking logic now lives in ``herding.world.flocking_sim``.
|
||||||
|
|
||||||
|
Kept so any external reference still resolves.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||||
|
if _PROJECT_ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _PROJECT_ROOT)
|
||||||
|
|
||||||
|
from herding.world.flocking_sim import ( # noqa: F401
|
||||||
|
MAX_SPEED, FLEE_SPEED, WANDER_SPEED,
|
||||||
|
WALL_MARGIN, WALL_HARD_MARGIN, WALL_HARD_GAIN,
|
||||||
|
FLEE_DIST, SEPARATION_DIST, COHESION_DIST,
|
||||||
|
PEN_MARGIN,
|
||||||
|
compute_heading_speed,
|
||||||
|
)
|
||||||
|
from herding.world.geometry import ( # noqa: F401
|
||||||
|
FIELD_X, FIELD_Y, PEN_X, PEN_Y,
|
||||||
|
in_pen,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Original module-level names retained for any code still importing them.
|
||||||
|
X_MIN, X_MAX = FIELD_X
|
||||||
|
Y_MIN, Y_MAX = FIELD_Y
|
||||||
|
PEN_X_MIN, PEN_X_MAX = PEN_X
|
||||||
|
PEN_Y_MIN, PEN_Y_MAX = PEN_Y
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
# Webots reads this file before starting the controller. It tells
|
||||||
|
# Webots which Python interpreter to launch (default is system
|
||||||
|
# `python3`, which usually lacks NumPy).
|
||||||
|
#
|
||||||
|
# Webots supports environment-variable expansion in this file, so we
|
||||||
|
# defer the interpreter path to $HERDING_PYTHON — set that variable
|
||||||
|
# once in your shell (or `tools/setup_env.sh`) before launching
|
||||||
|
# Webots and the controllers in this project will pick it up.
|
||||||
|
[python]
|
||||||
|
COMMAND = $(HERDING_PYTHON)
|
||||||
+124
-158
@@ -1,213 +1,179 @@
|
|||||||
"""
|
"""Sheep flocking controller (Webots).
|
||||||
Sheep flocking controller (Webots, Reynolds boids variant).
|
|
||||||
|
|
||||||
Each sheep broadcasts its GPS position every 3 steps on channel 1 and
|
Each sheep emits its GPS position every 3 steps and listens for the
|
||||||
listens for the dog and peer sheep positions. Peers are keyed by robot
|
dog's position and peer-sheep positions. The behavioural step is
|
||||||
name so each neighbour has exactly one current entry in the dict.
|
delegated to :func:`herding.world.flocking_sim.compute_heading_speed`
|
||||||
|
so the env and Webots use identical sheep dynamics.
|
||||||
|
|
||||||
Force stack each step (summed then converted to a heading + speed):
|
A sheep latches penned the first time it crosses the gate plane south;
|
||||||
flee — away from dog, quadratic ramp, dominant when close
|
the wool turns pink (via the exposed ``woolColor`` PROTO field) and
|
||||||
cohesion — toward flock centre, halved while fleeing
|
the dynamics switch to in-pen containment.
|
||||||
separation — inverse-distance push, prevents physical overlap
|
|
||||||
walls — linear repulsion from field boundary
|
|
||||||
wander — small persistent drift for natural idle motion
|
|
||||||
|
|
||||||
Pen behaviour: on first entry into the quarantine pen the sheep latches
|
|
||||||
permanently — it turns pink (via the exposed woolColor PROTO field) and
|
|
||||||
the normal force stack is replaced by pen-confinement forces only.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import random
|
|
||||||
import math
|
import math
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# --- Make the shared herding/ package importable from this controller dir ---
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||||
|
if _PROJECT_ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _PROJECT_ROOT)
|
||||||
|
|
||||||
from controller import Supervisor
|
from controller import Supervisor
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
from herding.world.diffdrive import heading_speed_to_wheels
|
||||||
# Tuning constants
|
from herding.world.flocking_sim import MAX_SPEED, compute_heading_speed
|
||||||
# ---------------------------------------------------------------------------
|
from herding.world.geometry import (
|
||||||
|
SHEEP_MAX_WHEEL_OMEGA,
|
||||||
|
is_penned,
|
||||||
|
)
|
||||||
|
|
||||||
MAX_SPEED = 22.0 # rad/s hard clamp on both motors
|
|
||||||
FLEE_SPEED = 20.0 # rad/s upper bound while panicking
|
|
||||||
WANDER_SPEED = 3.0 # rad/s lower bound during calm wandering
|
|
||||||
|
|
||||||
X_MIN, X_MAX = -14.5, 14.5 # stone wall inner edges (metres)
|
# --- Devices ---
|
||||||
Y_MIN, Y_MAX = -14.5, 14.5
|
robot = Supervisor()
|
||||||
WALL_MARGIN = 3.5 # avoidance starts this far from the wall
|
|
||||||
|
|
||||||
FLEE_DIST = 7.0 # dog within this radius triggers flee (metres)
|
|
||||||
SEPARATION_DIST = 2.5 # inverse-distance push active inside this radius
|
|
||||||
COHESION_DIST = 8.0 # pull toward flock centre active inside this radius
|
|
||||||
|
|
||||||
PEN_X_MIN, PEN_X_MAX = 10.0, 13.0 # quarantine pen extents (metres)
|
|
||||||
PEN_Y_MIN, PEN_Y_MAX = -15.0, -8.0 # open entrance at y=-8, gate at y=-15
|
|
||||||
PEN_MARGIN = 0.8 # confinement force starts this far from pen wall
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Device setup
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
robot = Supervisor()
|
|
||||||
timestep = int(robot.getBasicTimeStep())
|
timestep = int(robot.getBasicTimeStep())
|
||||||
name = robot.getName()
|
name = robot.getName()
|
||||||
self_node = robot.getSelf()
|
self_node = robot.getSelf()
|
||||||
|
|
||||||
left_motor = robot.getDevice("left wheel motor")
|
# Seed Python's RNG (shared with the dog controller) so a fixed
|
||||||
|
# HERDING_SEED produces reproducible runs. Each sheep mixes its name
|
||||||
|
# into the seed so the flock isn't all identical.
|
||||||
|
def _read_runtime_cfg():
|
||||||
|
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
|
||||||
|
out = {}
|
||||||
|
if os.path.exists(cfg_path):
|
||||||
|
try:
|
||||||
|
with open(cfg_path) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#") or "=" not in line:
|
||||||
|
continue
|
||||||
|
k, _, v = line.partition("=")
|
||||||
|
out[k.strip().upper()] = v.strip()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return out
|
||||||
|
|
||||||
|
_rt = _read_runtime_cfg()
|
||||||
|
_seed_raw = (os.environ.get("HERDING_SEED")
|
||||||
|
or _rt.get("HERDING_SEED")
|
||||||
|
or "").strip()
|
||||||
|
if _seed_raw:
|
||||||
|
try:
|
||||||
|
# XOR with hash(name) so different sheep have different seeds
|
||||||
|
# but the flock as a whole is deterministic for a given seed.
|
||||||
|
random.seed(int(_seed_raw) ^ (hash(name) & 0x7FFFFFFF))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
left_motor = robot.getDevice("left wheel motor")
|
||||||
right_motor = robot.getDevice("right wheel motor")
|
right_motor = robot.getDevice("right wheel motor")
|
||||||
left_motor.setPosition(float("inf"))
|
left_motor.setPosition(float("inf"))
|
||||||
right_motor.setPosition(float("inf"))
|
right_motor.setPosition(float("inf"))
|
||||||
left_motor.setVelocity(0.0)
|
left_motor.setVelocity(0.0)
|
||||||
right_motor.setVelocity(0.0)
|
right_motor.setVelocity(0.0)
|
||||||
|
MOTOR_MAX = min(left_motor.getMaxVelocity(), SHEEP_MAX_WHEEL_OMEGA)
|
||||||
|
|
||||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||||
emitter = robot.getDevice("emitter")
|
emitter = robot.getDevice("emitter")
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Helpers
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def norm_angle(a):
|
|
||||||
return math.atan2(math.sin(a), math.cos(a))
|
|
||||||
|
|
||||||
|
# --- Helpers ---
|
||||||
|
|
||||||
def bearing():
|
def bearing():
|
||||||
# Compass returns north direction in sensor frame; for this Z-up world
|
"""World-frame heading (0 = east, π/2 = north)."""
|
||||||
# with north = +Y, atan2(n[0], n[1]) gives the standard math angle
|
|
||||||
# (0 = east, π/2 = north) matching atan2(fy, fx) used for heading.
|
|
||||||
n = compass.getValues()
|
n = compass.getValues()
|
||||||
return math.atan2(n[0], n[1])
|
return math.atan2(n[0], n[1])
|
||||||
|
|
||||||
|
|
||||||
def drive(heading, speed):
|
def drive(heading, speed_motor):
|
||||||
err = norm_angle(heading - bearing())
|
left_w, right_w = heading_speed_to_wheels(
|
||||||
# Scale forward component by cos(err): at 90° error fwd→0 so the robot
|
heading, min(speed_motor, MAX_SPEED), bearing(), MOTOR_MAX, k_turn=4.0
|
||||||
# spins in place to realign rather than driving sideways at full speed.
|
)
|
||||||
fwd = speed * max(0.0, math.cos(err))
|
left_motor.setVelocity(left_w)
|
||||||
k = 4.0
|
right_motor.setVelocity(right_w)
|
||||||
left_motor.setVelocity( max(-MAX_SPEED, min(MAX_SPEED, fwd - k * err)))
|
|
||||||
right_motor.setVelocity(max(-MAX_SPEED, min(MAX_SPEED, fwd + k * err)))
|
|
||||||
|
|
||||||
|
|
||||||
def paint_pink():
|
def paint_pink():
|
||||||
# woolColor is declared as a PROTO field with IS binding to the DEF WOOL
|
"""Switch the sheep's wool to pink via the exposed PROTO field."""
|
||||||
# PBRAppearance baseColor. Changing it here propagates to every USE WOOL
|
|
||||||
# shape on the body. Direct field access avoids PROTO-internal opacity.
|
|
||||||
self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])
|
self_node.getField("woolColor").setSFColor([1.0, 0.55, 0.72])
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# State
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
# --- State ---
|
||||||
wander_angle = random.uniform(-math.pi, math.pi)
|
wander_angle = random.uniform(-math.pi, math.pi)
|
||||||
step = 0
|
step_count = 0
|
||||||
dog_x = None
|
dogs = {} # name → (x, y); supports the dual-dog setup
|
||||||
dog_y = None
|
peers = {} # name → (x, y); periodically pruned
|
||||||
peers = {} # name → (x, y), one entry per neighbour, cleared every 30 steps
|
|
||||||
penned = False
|
penned = False
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# Safety net for differential-drive sheep pinned against a wall.
|
||||||
# Main loop
|
_prev_x, _prev_y = None, None
|
||||||
# ---------------------------------------------------------------------------
|
_stuck_count = 0
|
||||||
|
STUCK_STEPS = 20
|
||||||
|
STUCK_DIST = 0.05
|
||||||
|
|
||||||
|
|
||||||
|
# --- Main loop ---
|
||||||
while robot.step(timestep) != -1:
|
while robot.step(timestep) != -1:
|
||||||
step += 1
|
step_count += 1
|
||||||
pos = gps.getValues()
|
pos = gps.getValues()
|
||||||
x, y = pos[0], pos[1]
|
x, y = pos[0], pos[1]
|
||||||
|
|
||||||
# Pen entry: one-way latch, never unset
|
if not penned and is_penned(x, y):
|
||||||
if not penned and PEN_X_MIN < x < PEN_X_MAX and PEN_Y_MIN < y < PEN_Y_MAX:
|
|
||||||
penned = True
|
penned = True
|
||||||
paint_pink()
|
paint_pink()
|
||||||
|
|
||||||
# Refresh peer table (clear before receiving so fresh data is never lost)
|
# Stale peers get dropped periodically so a peer that's gone silent
|
||||||
if step % 30 == 0:
|
# doesn't permanently distort the local CoM. Dogs are pruned too —
|
||||||
|
# otherwise a temporarily-silent dog stays in `dogs` forever and
|
||||||
|
# the closest-dog flee target stops being accurate.
|
||||||
|
if step_count % 30 == 0:
|
||||||
peers.clear()
|
peers.clear()
|
||||||
|
dogs.clear()
|
||||||
while receiver.getQueueLength() > 0:
|
while receiver.getQueueLength() > 0:
|
||||||
msg = receiver.getString()
|
msg = receiver.getString()
|
||||||
receiver.nextPacket()
|
receiver.nextPacket()
|
||||||
p = msg.split(":")
|
parts = msg.split(":")
|
||||||
if p[0] == "dog" and len(p) >= 3:
|
# Legacy single-dog message: "dog:x:y".
|
||||||
dog_x, dog_y = float(p[1]), float(p[2])
|
# Dual-dog message: "dog:NAME:x:y".
|
||||||
elif p[0] == "sheep" and len(p) >= 4 and p[1] != name:
|
if parts[0] == "dog" and len(parts) == 3:
|
||||||
peers[p[1]] = (float(p[2]), float(p[3]))
|
dogs["ShepherdDog"] = (float(parts[1]), float(parts[2]))
|
||||||
|
elif parts[0] == "dog" and len(parts) >= 4:
|
||||||
fx, fy = 0.0, 0.0
|
dogs[parts[1]] = (float(parts[2]), float(parts[3]))
|
||||||
|
elif parts[0] == "sheep" and len(parts) >= 4 and parts[1] != name:
|
||||||
if penned:
|
peers[parts[1]] = (float(parts[2]), float(parts[3]))
|
||||||
# Inside pen: wander freely, strong boundary forces prevent exit,
|
|
||||||
# separation still active to avoid collisions with other penned sheep.
|
|
||||||
|
|
||||||
pm = PEN_MARGIN
|
|
||||||
if x < PEN_X_MIN + pm: fx += ((PEN_X_MIN + pm - x) / pm) * 15.0
|
|
||||||
if x > PEN_X_MAX - pm: fx -= ((x - (PEN_X_MAX - pm)) / pm) * 15.0
|
|
||||||
if y < PEN_Y_MIN + pm: fy += ((PEN_Y_MIN + pm - y) / pm) * 15.0
|
|
||||||
if y > PEN_Y_MAX - pm: fy -= ((y - (PEN_Y_MAX - pm)) / pm) * 15.0
|
|
||||||
|
|
||||||
for px, py in peers.values():
|
|
||||||
dx, dy = px - x, py - y
|
|
||||||
d = math.hypot(dx, dy)
|
|
||||||
if 0.05 < d < SEPARATION_DIST:
|
|
||||||
push = (SEPARATION_DIST - d) / d
|
|
||||||
fx -= (dx / d) * push * 2.5
|
|
||||||
fy -= (dy / d) * push * 2.5
|
|
||||||
|
|
||||||
if random.random() < 0.02:
|
|
||||||
wander_angle += random.uniform(-0.6, 0.6)
|
|
||||||
fx += math.cos(wander_angle) * 0.5
|
|
||||||
fy += math.sin(wander_angle) * 0.5
|
|
||||||
|
|
||||||
|
# Flee target = closest known dog; the flocking heuristic only needs
|
||||||
|
# one (vx, vy) repulsion vector regardless of how many dogs are out
|
||||||
|
# there. With two dogs at orthogonal axes, the sheep will see one of
|
||||||
|
# them as nearest at any moment and react to it; the other dog's
|
||||||
|
# influence enters through the sheep that does react to it pushing
|
||||||
|
# this sheep in turn (Reynolds peer-repulsion).
|
||||||
|
if dogs:
|
||||||
|
closest = min(dogs.values(), key=lambda d: math.hypot(d[0] - x, d[1] - y))
|
||||||
|
dog_xy = closest
|
||||||
else:
|
else:
|
||||||
fleeing = False
|
dog_xy = None
|
||||||
|
heading, speed, wander_angle = compute_heading_speed(
|
||||||
|
x=x, y=y, penned=penned, dog_xy=dog_xy, peers=peers,
|
||||||
|
wander_angle=wander_angle,
|
||||||
|
)
|
||||||
|
|
||||||
# Flee — quadratic ramp so force grows rapidly as the dog closes in
|
# Stuck-against-wall recovery: drive toward the field centre.
|
||||||
if dog_x is not None:
|
if _prev_x is not None:
|
||||||
dx = dog_x - x
|
moved = math.hypot(x - _prev_x, y - _prev_y)
|
||||||
dy = dog_y - y
|
_stuck_count = _stuck_count + 1 if moved < STUCK_DIST else 0
|
||||||
dist = math.hypot(dx, dy)
|
if _stuck_count >= STUCK_STEPS:
|
||||||
if 0.01 < dist < FLEE_DIST:
|
heading = math.atan2(-y, -x)
|
||||||
fleeing = True
|
speed = MAX_SPEED
|
||||||
t = 1.0 - dist / FLEE_DIST
|
_stuck_count = 0
|
||||||
s = t * t * 20.0
|
_prev_x, _prev_y = x, y
|
||||||
fx -= (dx / dist) * s
|
|
||||||
fy -= (dy / dist) * s
|
|
||||||
|
|
||||||
# Cohesion — halved while fleeing to reduce mid-panic collisions
|
|
||||||
cx, cy, cn = 0.0, 0.0, 0
|
|
||||||
for px, py in peers.values():
|
|
||||||
d = math.hypot(px - x, py - y)
|
|
||||||
if 0.3 < d < COHESION_DIST:
|
|
||||||
cx += px; cy += py; cn += 1
|
|
||||||
if cn > 0:
|
|
||||||
w = 0.08 if fleeing else 0.15
|
|
||||||
fx += (cx / cn - x) * w
|
|
||||||
fy += (cy / cn - y) * w
|
|
||||||
|
|
||||||
# Separation — inverse-distance: huge when nearly overlapping, fades quickly
|
|
||||||
for px, py in peers.values():
|
|
||||||
dx, dy = px - x, py - y
|
|
||||||
d = math.hypot(dx, dy)
|
|
||||||
if 0.05 < d < SEPARATION_DIST:
|
|
||||||
push = (SEPARATION_DIST - d) / d
|
|
||||||
fx -= (dx / d) * push * 2.5
|
|
||||||
fy -= (dy / d) * push * 2.5
|
|
||||||
|
|
||||||
# Walls
|
|
||||||
if x < X_MIN + WALL_MARGIN: fx += ((X_MIN + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
|
|
||||||
if x > X_MAX - WALL_MARGIN: fx -= ((x - (X_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
|
||||||
if y < Y_MIN + WALL_MARGIN: fy += ((Y_MIN + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
|
|
||||||
if y > Y_MAX - WALL_MARGIN: fy -= ((y - (Y_MAX - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
|
||||||
|
|
||||||
# Wander — suppressed while fleeing so drift cannot deflect the flee heading
|
|
||||||
if not fleeing:
|
|
||||||
if random.random() < 0.02:
|
|
||||||
wander_angle += random.uniform(-0.6, 0.6)
|
|
||||||
fx += math.cos(wander_angle) * 0.5
|
|
||||||
fy += math.sin(wander_angle) * 0.5
|
|
||||||
|
|
||||||
heading = math.atan2(fy, fx)
|
|
||||||
mag = math.hypot(fx, fy)
|
|
||||||
speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
|
|
||||||
drive(heading, speed)
|
drive(heading, speed)
|
||||||
|
|
||||||
if step % 3 == 0:
|
if step_count % 3 == 0:
|
||||||
emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
|
emitter.send(f"sheep:{name}:{x:.4f}:{y:.4f}")
|
||||||
|
|||||||
@@ -0,0 +1,123 @@
|
|||||||
|
"""Lazy SB3 policy loader for the dog controller.
|
||||||
|
|
||||||
|
SB3 is imported only when a learned policy is actually requested,
|
||||||
|
so the analytic modes can run on installs without stable-baselines3
|
||||||
|
or torch.
|
||||||
|
|
||||||
|
The handle auto-detects frame stacking from the policy's expected
|
||||||
|
observation dimension: if it's a multiple of the single-frame
|
||||||
|
``OBS_DIM``, an internal buffer of the last K frames is maintained
|
||||||
|
and concatenated on each ``predict`` call.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyHandle:
|
||||||
|
"""Wrap a loaded policy (+ optional VecNormalize) for ``predict(obs)``.
|
||||||
|
|
||||||
|
Supports both MLP (PPO) and recurrent (RecurrentPPO/LSTM) policies.
|
||||||
|
For LSTM policies, frame_stack is forced to 1 and the LSTM hidden
|
||||||
|
state is maintained across calls; ``reset_recurrent`` is exposed for
|
||||||
|
new episodes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, model, vecnorm, recurrent: bool = False):
|
||||||
|
self.model = model
|
||||||
|
self.vecnorm = vecnorm
|
||||||
|
self.recurrent = recurrent
|
||||||
|
from herding.perception.obs import OBS_DIM
|
||||||
|
policy_dim = int(model.observation_space.shape[0])
|
||||||
|
if recurrent:
|
||||||
|
self.frame_stack = 1
|
||||||
|
elif policy_dim % OBS_DIM == 0 and policy_dim // OBS_DIM >= 1:
|
||||||
|
self.frame_stack = policy_dim // OBS_DIM
|
||||||
|
else:
|
||||||
|
self.frame_stack = 1
|
||||||
|
self._buffer: list = []
|
||||||
|
self._single_dim = OBS_DIM
|
||||||
|
self._lstm_state = None
|
||||||
|
self._first_step = True
|
||||||
|
|
||||||
|
def reset_recurrent(self):
|
||||||
|
self._lstm_state = None
|
||||||
|
self._first_step = True
|
||||||
|
self._buffer = []
|
||||||
|
|
||||||
|
def predict(self, obs):
|
||||||
|
import numpy as np
|
||||||
|
single = np.asarray(obs, dtype=np.float32).reshape(-1)
|
||||||
|
if single.shape[0] != self._single_dim:
|
||||||
|
# Caller passed an already-stacked obs.
|
||||||
|
stacked = single
|
||||||
|
elif self.frame_stack > 1:
|
||||||
|
if not self._buffer:
|
||||||
|
self._buffer = [single.copy() for _ in range(self.frame_stack)]
|
||||||
|
else:
|
||||||
|
self._buffer.append(single)
|
||||||
|
if len(self._buffer) > self.frame_stack:
|
||||||
|
self._buffer = self._buffer[-self.frame_stack:]
|
||||||
|
stacked = np.concatenate(self._buffer, axis=0)
|
||||||
|
else:
|
||||||
|
stacked = single
|
||||||
|
|
||||||
|
obs_b = stacked.reshape(1, -1)
|
||||||
|
if self.vecnorm is not None:
|
||||||
|
obs_b = self.vecnorm.normalize_obs(obs_b)
|
||||||
|
if self.recurrent:
|
||||||
|
episode_start = np.array([self._first_step], dtype=bool)
|
||||||
|
action, self._lstm_state = self.model.predict(
|
||||||
|
obs_b, state=self._lstm_state,
|
||||||
|
episode_start=episode_start, deterministic=True,
|
||||||
|
)
|
||||||
|
self._first_step = False
|
||||||
|
else:
|
||||||
|
action, _ = self.model.predict(obs_b, deterministic=True)
|
||||||
|
return action[0]
|
||||||
|
|
||||||
|
|
||||||
|
def load(model_path: str, vecnorm_path: str | None = None) -> PolicyHandle:
|
||||||
|
"""Load a policy zip (+ optional VecNormalize pickle) from disk.
|
||||||
|
|
||||||
|
``model_path`` may be a ``.zip`` file or a directory; in the
|
||||||
|
latter case ``policy.zip`` is preferred, with ``final.zip`` as
|
||||||
|
a fallback for partially-completed RL runs.
|
||||||
|
"""
|
||||||
|
p = Path(model_path)
|
||||||
|
if p.is_dir():
|
||||||
|
zip_candidates = [p / "policy.zip", p / "final.zip"]
|
||||||
|
zip_path = next((z for z in zip_candidates if z.exists()), None)
|
||||||
|
if zip_path is None:
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f"No policy zip in {p} (looked for policy.zip, final.zip)"
|
||||||
|
)
|
||||||
|
if vecnorm_path is None:
|
||||||
|
vn = p / "vecnormalize.pkl"
|
||||||
|
if vn.exists():
|
||||||
|
vecnorm_path = str(vn)
|
||||||
|
else:
|
||||||
|
zip_path = p
|
||||||
|
|
||||||
|
# Deferred imports so the analytic path doesn't require SB3.
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import VecNormalize # noqa: F401
|
||||||
|
|
||||||
|
# Try RecurrentPPO (LSTM) first, fall back to PPO (MLP).
|
||||||
|
recurrent = False
|
||||||
|
model = None
|
||||||
|
try:
|
||||||
|
from sb3_contrib import RecurrentPPO
|
||||||
|
model = RecurrentPPO.load(str(zip_path), device="auto")
|
||||||
|
recurrent = True
|
||||||
|
except Exception:
|
||||||
|
model = PPO.load(str(zip_path), device="auto")
|
||||||
|
|
||||||
|
vecnorm = None
|
||||||
|
if vecnorm_path and os.path.exists(vecnorm_path):
|
||||||
|
import pickle
|
||||||
|
with open(vecnorm_path, "rb") as f:
|
||||||
|
vecnorm = pickle.load(f)
|
||||||
|
vecnorm.training = False
|
||||||
|
vecnorm.norm_reward = False
|
||||||
|
return PolicyHandle(model=model, vecnorm=vecnorm, recurrent=recurrent)
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
# Webots reads this file before starting the controller. It tells
|
||||||
|
# Webots which Python interpreter to launch (default is system
|
||||||
|
# `python3`, which usually lacks SB3/PyTorch).
|
||||||
|
#
|
||||||
|
# Webots supports environment-variable expansion in this file, so we
|
||||||
|
# defer the interpreter path to $HERDING_PYTHON — set that variable
|
||||||
|
# once in your shell (or `tools/setup_env.sh`) before launching
|
||||||
|
# Webots and the controllers in this project will pick it up.
|
||||||
|
[python]
|
||||||
|
COMMAND = $(HERDING_PYTHON)
|
||||||
@@ -1,88 +1,674 @@
|
|||||||
"""
|
"""Shepherd Dog controller (Webots).
|
||||||
Shepherd Dog controller (Webots, manual keyboard control).
|
|
||||||
|
|
||||||
WASD / arrow keys drive the robot. +/- adjust speed in 10 % increments.
|
Mode is selected by ``HERDING_MODE`` — read from the env var or from
|
||||||
GPS position is broadcast every step on channel 1 so sheep controllers
|
the ``herding_runtime.cfg`` file the launcher writes (Webots strips
|
||||||
can compute flee forces. Ears wag continuously via sinusoidal position
|
env vars from controller subprocesses on some setups):
|
||||||
targets — purely cosmetic.
|
|
||||||
|
strombom → canonical Strömbom (2014) collect/drive heuristic
|
||||||
|
wrapped in ActiveScanTeacher (opening rotation +
|
||||||
|
walk-to-centre when the tracker briefly empties)
|
||||||
|
sequential → single-target "pin-and-push", same wrapper
|
||||||
|
universal → mecanum-aware teacher (Strömbom + omega + recovery)
|
||||||
|
bc → behaviour-cloned MLP, trained on universal demos
|
||||||
|
rl → KL-regularised PPO fine-tune of `bc`
|
||||||
|
|
||||||
|
Policy directories are resolved by `policy_loader` from
|
||||||
|
``training/runs/{bc,rl}_{drive}_{world}`` with a fallback to
|
||||||
|
``training/runs/{bc,rl}`` (legacy single-policy paths).
|
||||||
|
|
||||||
|
Sheep perception
|
||||||
|
----------------
|
||||||
|
The dog perceives sheep through its front-mounted 140° LiDAR
|
||||||
|
(``protos/ShepherdDog.proto``: 180 rays, 12 m max range). Each step:
|
||||||
|
|
||||||
|
1. Read ``lidar.getRangeImage()``.
|
||||||
|
2. Cluster returns into world-frame ``(x, y)`` estimates
|
||||||
|
(``herding.perception.lidar_perception.detections_from_scan``).
|
||||||
|
3. Fold detections into a ``SheepTracker``, which maintains
|
||||||
|
last-seen positions for sheep currently out of FOV, requires
|
||||||
|
consensus across multiple frames before promoting a candidate
|
||||||
|
to a real track, and latches "penned" once a track crosses
|
||||||
|
the gate plane south.
|
||||||
|
|
||||||
|
Setting ``HERDING_USE_GT=1`` bypasses the tracker and feeds emitter
|
||||||
|
ground-truth positions to the policy — useful as a perception
|
||||||
|
ablation for the analytic baselines.
|
||||||
|
|
||||||
|
Sheep emitter messages are otherwise read for diagnostic logging
|
||||||
|
only (``GT_penned`` counter + auto-finish sentinel); the control
|
||||||
|
loop never depends on them.
|
||||||
|
|
||||||
|
Auto-finish
|
||||||
|
-----------
|
||||||
|
When every emitter-reported sheep is penned, the controller writes
|
||||||
|
``training/.run_done``. The launcher (``tools/run_webots.sh``)
|
||||||
|
detects the sentinel and closes Webots so headless sweep runs are
|
||||||
|
bounded.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import math
|
import math
|
||||||
from controller import Robot, Keyboard
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
robot = Robot()
|
# --- Make the shared herding/ package importable from this controller dir ---
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
_PROJECT_ROOT = os.path.normpath(os.path.join(_HERE, "..", ".."))
|
||||||
|
if _PROJECT_ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _PROJECT_ROOT)
|
||||||
|
|
||||||
|
# --- Read runtime cfg early so env vars are set before geometry import ---
|
||||||
|
def _load_runtime_config():
|
||||||
|
cfg_path = os.path.join(_PROJECT_ROOT, "herding_runtime.cfg")
|
||||||
|
if not os.path.exists(cfg_path):
|
||||||
|
return {}
|
||||||
|
out = {}
|
||||||
|
try:
|
||||||
|
with open(cfg_path) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#") or "=" not in line:
|
||||||
|
continue
|
||||||
|
k, _, v = line.partition("=")
|
||||||
|
out[k.strip().upper()] = v.strip()
|
||||||
|
except OSError:
|
||||||
|
return {}
|
||||||
|
return out
|
||||||
|
|
||||||
|
_runtime_cfg = _load_runtime_config()
|
||||||
|
# Seed env vars from runtime cfg so downstream modules (geometry.py) see them.
|
||||||
|
for _rk, _rv in _runtime_cfg.items():
|
||||||
|
if _rk.startswith("HERDING_") and _rk not in os.environ:
|
||||||
|
os.environ[_rk] = _rv
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from controller import Supervisor
|
||||||
|
|
||||||
|
from herding.control.active_scan import ActiveScanTeacher
|
||||||
|
from herding.control.modulation import modulate_speed
|
||||||
|
from herding.control.sequential import compute_action as sequential_action
|
||||||
|
from herding.control.strombom import compute_action as strombom_action
|
||||||
|
from herding.control.universal import compute_action as universal_action
|
||||||
|
from herding.perception.obs import build_obs
|
||||||
|
from herding.perception.lidar_perception import detections_from_scan
|
||||||
|
from herding.perception.sheep_tracker import SheepTracker
|
||||||
|
from herding.world.diffdrive import velocity_to_mecanum_wheels, velocity_to_wheels
|
||||||
|
from herding.world.geometry import (
|
||||||
|
DOG_SOUTH_LIMIT,
|
||||||
|
PEN_ENTRY, is_penned,
|
||||||
|
)
|
||||||
|
from herding.config import (
|
||||||
|
HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
|
||||||
|
LIDAR_WEBOTS_360, RobotConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Robot physical constants come from RobotConfig so they stay in sync with
|
||||||
|
# the training environment. The Webots preset uses action_smooth=0.55.
|
||||||
|
# Mecanum picks the matched preset so kinematic injection uses the same
|
||||||
|
# strafe_efficiency/bleed values the policy was trained against.
|
||||||
|
_DRIVE_MODE_PEEK = (os.environ.get("HERDING_DRIVE")
|
||||||
|
or _runtime_cfg.get("HERDING_DRIVE")
|
||||||
|
or "differential").lower()
|
||||||
|
if _DRIVE_MODE_PEEK == "mecanum":
|
||||||
|
_ROBOT_CFG: RobotConfig = HERDING_MEC_WEBOTS_360.robot
|
||||||
|
else:
|
||||||
|
_ROBOT_CFG: RobotConfig = HERDING_WEBOTS.robot
|
||||||
|
DOG_WHEEL_RADIUS = _ROBOT_CFG.wheel_radius
|
||||||
|
DOG_WHEEL_BASE = _ROBOT_CFG.wheel_base
|
||||||
|
DOG_WHEEL_BASE_X = _ROBOT_CFG.wheel_base_x
|
||||||
|
DOG_WHEEL_BASE_Y = _ROBOT_CFG.wheel_base_y
|
||||||
|
DOG_MAX_WHEEL_OMEGA = _ROBOT_CFG.max_wheel_omega
|
||||||
|
DOG_MAX_LINEAR = _ROBOT_CFG.max_linear
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Mode + policy resolution (cfg already loaded above)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
MODE = (os.environ.get("HERDING_MODE")
|
||||||
|
or _runtime_cfg.get("HERDING_MODE")
|
||||||
|
or "bc").lower()
|
||||||
|
|
||||||
|
_VALID_MODES = ("bc", "rl", "strombom", "sequential", "universal", "calibrate")
|
||||||
|
if MODE not in _VALID_MODES:
|
||||||
|
print(f"[dog] unknown HERDING_MODE={MODE!r}; defaulting to strombom.")
|
||||||
|
MODE = "strombom"
|
||||||
|
|
||||||
|
# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omnidirectional).
|
||||||
|
DRIVE_MODE = (os.environ.get("HERDING_DRIVE")
|
||||||
|
or _runtime_cfg.get("HERDING_DRIVE")
|
||||||
|
or "differential").lower()
|
||||||
|
if DRIVE_MODE not in ("differential", "mecanum"):
|
||||||
|
print(f"[dog] unknown HERDING_DRIVE={DRIVE_MODE!r}; defaulting to differential.")
|
||||||
|
DRIVE_MODE = "differential"
|
||||||
|
|
||||||
|
# World shape — used to disambiguate the trained policy directory.
|
||||||
|
WORLD = (os.environ.get("HERDING_WORLD")
|
||||||
|
or _runtime_cfg.get("HERDING_WORLD")
|
||||||
|
or "field").lower()
|
||||||
|
|
||||||
|
# LiDAR FOV variant — "140" (default, ShepherdDog.proto) or "360"
|
||||||
|
# (ShepherdDog360.proto, FOV ablation). The launcher swaps the proto
|
||||||
|
# in the temp world file; the controller picks the matching lidar_cfg
|
||||||
|
# below so the perception pipeline interprets ray angles correctly.
|
||||||
|
LIDAR_FOV_VARIANT = (os.environ.get("HERDING_LIDAR")
|
||||||
|
or _runtime_cfg.get("HERDING_LIDAR")
|
||||||
|
or "140").lower()
|
||||||
|
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
|
||||||
|
_LIDAR_CFG = HERDING_MEC_WEBOTS_360.lidar
|
||||||
|
elif LIDAR_FOV_VARIANT == "360":
|
||||||
|
_LIDAR_CFG = LIDAR_WEBOTS_360
|
||||||
|
else:
|
||||||
|
_LIDAR_CFG = HERDING_WEBOTS.lidar
|
||||||
|
|
||||||
|
# Diagnostic: bypass LiDAR tracker and use GT emitter positions directly.
|
||||||
|
# Set HERDING_USE_GT=1 to isolate perception sim-to-real gap from policy quality.
|
||||||
|
USE_GT_PERCEPTION = bool(int(
|
||||||
|
os.environ.get("HERDING_USE_GT")
|
||||||
|
or _runtime_cfg.get("HERDING_USE_GT", "0")
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_policy_dir(mode: str, drive: str, world: str) -> str:
|
||||||
|
"""Where to look for the trained policy for the given mode/drive/world.
|
||||||
|
|
||||||
|
Priority:
|
||||||
|
1. HERDING_POLICY_DIR env var or runtime-cfg entry, if it points
|
||||||
|
to a real directory.
|
||||||
|
2. Canonical: training/runs/{bc,rl}_<drive>_<world>
|
||||||
|
3. Drive-only: training/runs/{bc,rl}_<drive>
|
||||||
|
4. Bare-mode: training/runs/{bc,rl}
|
||||||
|
The first existing directory wins; if none exist, the canonical
|
||||||
|
path is returned so the loader's error message is informative.
|
||||||
|
"""
|
||||||
|
env_dir = (os.environ.get("HERDING_POLICY_DIR")
|
||||||
|
or _runtime_cfg.get("HERDING_POLICY_DIR"))
|
||||||
|
if env_dir and os.path.isdir(env_dir):
|
||||||
|
return env_dir
|
||||||
|
base = "rl" if mode == "rl" else "bc"
|
||||||
|
runs = os.path.join(_PROJECT_ROOT, "training", "runs")
|
||||||
|
for cand in (f"{base}_{drive}_{world}", f"{base}_{drive}", base):
|
||||||
|
path = os.path.join(runs, cand)
|
||||||
|
if os.path.isdir(path):
|
||||||
|
return path
|
||||||
|
return os.path.join(runs, f"{base}_{drive}_{world}")
|
||||||
|
|
||||||
|
|
||||||
|
print(f"[dog] mode={MODE} drive={DRIVE_MODE} world={WORLD}")
|
||||||
|
|
||||||
|
POLICY_DIR = _resolve_policy_dir(MODE, DRIVE_MODE, WORLD)
|
||||||
|
policy_handle = None
|
||||||
|
if MODE in ("bc", "rl"):
|
||||||
|
print(f"[dog] resolved POLICY_DIR={POLICY_DIR} exists={os.path.isdir(POLICY_DIR)}")
|
||||||
|
try:
|
||||||
|
from policy_loader import load as _load_policy
|
||||||
|
policy_handle = _load_policy(POLICY_DIR)
|
||||||
|
print(f"[dog] policy loaded from {POLICY_DIR}")
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[dog] policy load failed ({exc!r}); falling back to strombom.")
|
||||||
|
MODE = "strombom"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Control parameters
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
ACTION_SMOOTH = _ROBOT_CFG.action_smooth # EMA on (vx, vy) — kills frame-to-frame jitter
|
||||||
|
RUN_DONE_FILE = os.path.join(_PROJECT_ROOT, "training", ".run_done")
|
||||||
|
|
||||||
|
|
||||||
|
def safety_clamp(vx: float, vy: float, dog_x: float, dog_y: float) -> tuple:
|
||||||
|
"""If the dog is near the south barrier and the action would push it
|
||||||
|
further south, override with a northward action. Hard invariant: the
|
||||||
|
dog never enters the pen."""
|
||||||
|
if dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
|
||||||
|
return (0.0, 1.0)
|
||||||
|
if dog_y < DOG_SOUTH_LIMIT + 0.5 and vy < -0.2:
|
||||||
|
return (vx * 0.5, max(0.0, vy + 0.5))
|
||||||
|
return (vx, vy)
|
||||||
|
|
||||||
|
|
||||||
|
def drive_diff(vx: float, vy: float, left_motor, right_motor,
|
||||||
|
compass, motor_max: float):
|
||||||
|
if math.hypot(vx, vy) < 1e-3:
|
||||||
|
left_motor.setVelocity(0.0)
|
||||||
|
right_motor.setVelocity(0.0)
|
||||||
|
return
|
||||||
|
n = compass.getValues()
|
||||||
|
h = math.atan2(n[0], n[1])
|
||||||
|
left, right = velocity_to_wheels(
|
||||||
|
vx, vy, h,
|
||||||
|
max_linear=DOG_MAX_LINEAR,
|
||||||
|
wheel_radius=DOG_WHEEL_RADIUS,
|
||||||
|
max_wheel_omega=motor_max,
|
||||||
|
k_turn=4.0,
|
||||||
|
)
|
||||||
|
left_motor.setVelocity(left)
|
||||||
|
right_motor.setVelocity(right)
|
||||||
|
|
||||||
|
|
||||||
|
def drive_mecanum(vx: float, vy: float, omega: float,
|
||||||
|
fl_motor, fr_motor, rl_motor, rr_motor,
|
||||||
|
compass, motor_max: float):
|
||||||
|
"""Drive the mecanum chassis kinematically.
|
||||||
|
|
||||||
|
The wheel motors are spun for visual fidelity, but the chassis
|
||||||
|
motion comes from Supervisor.setVelocity using the gym mecanum
|
||||||
|
forward-kinematics formula. Gym training and Webots deployment
|
||||||
|
therefore produce identical body motion.
|
||||||
|
"""
|
||||||
|
if math.hypot(vx, vy) < 1e-3 and abs(omega) < 1e-3:
|
||||||
|
fl_motor.setVelocity(0.0)
|
||||||
|
fr_motor.setVelocity(0.0)
|
||||||
|
rl_motor.setVelocity(0.0)
|
||||||
|
rr_motor.setVelocity(0.0)
|
||||||
|
if _self_node is not None:
|
||||||
|
_self_node.setVelocity([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
|
||||||
|
return
|
||||||
|
n = compass.getValues()
|
||||||
|
h = math.atan2(n[0], n[1])
|
||||||
|
w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
|
||||||
|
vx, vy, omega, h,
|
||||||
|
max_linear=DOG_MAX_LINEAR,
|
||||||
|
wheel_radius=DOG_WHEEL_RADIUS,
|
||||||
|
lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
|
||||||
|
max_wheel_omega=motor_max,
|
||||||
|
k_turn=4.0,
|
||||||
|
wheel_base=DOG_WHEEL_BASE,
|
||||||
|
)
|
||||||
|
fl_motor.setVelocity(w_fl)
|
||||||
|
fr_motor.setVelocity(w_fr)
|
||||||
|
rl_motor.setVelocity(w_rl)
|
||||||
|
rr_motor.setVelocity(w_rr)
|
||||||
|
# Kinematic body injection — derive body velocity from the same
|
||||||
|
# wheel speeds using the gym forward-kinematics formula and the
|
||||||
|
# active preset's strafe/bleed parameters.
|
||||||
|
if _self_node is not None:
|
||||||
|
r = DOG_WHEEL_RADIUS
|
||||||
|
vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
|
||||||
|
vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
|
||||||
|
vy_body = vy_body_ideal * _ROBOT_CFG.strafe_efficiency
|
||||||
|
if _ROBOT_CFG.strafe_to_forward_bleed != 0.0:
|
||||||
|
vx_body += _ROBOT_CFG.strafe_to_forward_bleed * abs(vy_body_ideal)
|
||||||
|
omega_body = (-w_fl + w_fr - w_rl + w_rr) * r / (
|
||||||
|
4.0 * (DOG_WHEEL_BASE_X / 2.0 + DOG_WHEEL_BASE_Y / 2.0))
|
||||||
|
cos_h, sin_h = math.cos(h), math.sin(h)
|
||||||
|
vx_w = vx_body * cos_h - vy_body * sin_h
|
||||||
|
vy_w = vx_body * sin_h + vy_body * cos_h
|
||||||
|
_self_node.setVelocity([vx_w, vy_w, 0.0, 0.0, 0.0, omega_body])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Webots devices
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
robot = Supervisor()
|
||||||
timestep = int(robot.getBasicTimeStep())
|
timestep = int(robot.getBasicTimeStep())
|
||||||
|
|
||||||
left_motor = robot.getDevice("left wheel motor")
|
# Mecanum uses Supervisor.setVelocity for chassis motion (see
|
||||||
right_motor = robot.getDevice("right wheel motor")
|
# drive_mecanum); diff-drive keeps full ODE simulation.
|
||||||
left_motor.setPosition(float("inf"))
|
_self_node = robot.getSelf() if DRIVE_MODE == "mecanum" else None
|
||||||
right_motor.setPosition(float("inf"))
|
|
||||||
left_motor.setVelocity(0.0)
|
|
||||||
right_motor.setVelocity(0.0)
|
|
||||||
|
|
||||||
lidar = robot.getDevice("lidar")
|
# Multi-shepherd axis split. When the launcher creates two dog instances
|
||||||
lidar.enable(timestep)
|
# it sets each robot's customData to "axis=x" or "axis=y"; the controller
|
||||||
lidar.enablePointCloud()
|
# then attenuates the off-axis component of every action so the two
|
||||||
|
# dogs share the herding workload along orthogonal axes. customData
|
||||||
|
# empty = single-dog mode (no masking).
|
||||||
|
#
|
||||||
|
# HERDING_AXIS_LEAK controls how strict the mask is:
|
||||||
|
# 0.0 → hard mask (off-axis component zeroed; pure axis-split)
|
||||||
|
# 1.0 → no mask (both dogs run full action; equivalent to NDOGS=2
|
||||||
|
# without axis assignment)
|
||||||
|
# Defaults to 0.3 — empirically the 100/0 strict mask deadlocks once
|
||||||
|
# both dogs reach their drive standoff (the Strömbom target shrinks
|
||||||
|
# and each dog has only one degree of freedom). A small leak keeps
|
||||||
|
# pressure on the flock while preserving the "one dog leads each
|
||||||
|
# axis" coordination story.
|
||||||
|
_AXIS_TAG = (robot.getCustomData() or "").strip().lower()
|
||||||
|
if _AXIS_TAG.startswith("axis="):
|
||||||
|
DOG_AXIS = _AXIS_TAG[5:]
|
||||||
|
if DOG_AXIS not in ("x", "y"):
|
||||||
|
print(f"[dog] unknown axis={DOG_AXIS!r} in customData; ignoring.")
|
||||||
|
DOG_AXIS = None
|
||||||
|
else:
|
||||||
|
DOG_AXIS = None
|
||||||
|
try:
|
||||||
|
AXIS_LEAK = float(os.environ.get("HERDING_AXIS_LEAK")
|
||||||
|
or _runtime_cfg.get("HERDING_AXIS_LEAK", "0.3"))
|
||||||
|
except ValueError:
|
||||||
|
AXIS_LEAK = 0.3
|
||||||
|
AXIS_LEAK = max(0.0, min(1.0, AXIS_LEAK))
|
||||||
|
DOG_NAME = robot.getName()
|
||||||
|
print(f"[dog] name={DOG_NAME} axis={DOG_AXIS} leak={AXIS_LEAK:.2f}")
|
||||||
|
|
||||||
gps = robot.getDevice("gps"); gps.enable(timestep)
|
if DRIVE_MODE == "mecanum":
|
||||||
compass = robot.getDevice("compass"); compass.enable(timestep)
|
fl_motor = robot.getDevice("front left wheel motor")
|
||||||
emitter = robot.getDevice("emitter")
|
fr_motor = robot.getDevice("front right wheel motor")
|
||||||
|
rl_motor = robot.getDevice("rear left wheel motor")
|
||||||
|
rr_motor = robot.getDevice("rear right wheel motor")
|
||||||
|
for m in (fl_motor, fr_motor, rl_motor, rr_motor):
|
||||||
|
m.setPosition(float("inf"))
|
||||||
|
m.setVelocity(0.0)
|
||||||
|
MOTOR_MAX = min(fl_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
||||||
|
else:
|
||||||
|
left_motor = robot.getDevice("left wheel motor")
|
||||||
|
right_motor = robot.getDevice("right wheel motor")
|
||||||
|
left_motor.setPosition(float("inf"))
|
||||||
|
right_motor.setPosition(float("inf"))
|
||||||
|
left_motor.setVelocity(0.0)
|
||||||
|
right_motor.setVelocity(0.0)
|
||||||
|
MOTOR_MAX = min(left_motor.getMaxVelocity(), DOG_MAX_WHEEL_OMEGA)
|
||||||
|
|
||||||
|
gps = robot.getDevice("gps"); gps.enable(timestep)
|
||||||
|
compass = robot.getDevice("compass"); compass.enable(timestep)
|
||||||
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
receiver = robot.getDevice("receiver"); receiver.enable(timestep)
|
||||||
|
emitter = robot.getDevice("emitter")
|
||||||
|
lidar = robot.getDevice("lidar"); lidar.enable(timestep)
|
||||||
|
|
||||||
left_ear = robot.getDevice("left ear motor")
|
# Tracker config: pick the preset that matches the (drive, lidar) combo
|
||||||
|
# so the tracker's consensus parameters match what the policy was
|
||||||
|
# trained against.
|
||||||
|
if DRIVE_MODE == "mecanum" and LIDAR_FOV_VARIANT == "360":
|
||||||
|
_TRACKER_CFG = HERDING_MEC_WEBOTS_360.tracker
|
||||||
|
elif DRIVE_MODE == "mecanum":
|
||||||
|
_TRACKER_CFG = HERDING_MEC_WEBOTS.tracker
|
||||||
|
else:
|
||||||
|
_TRACKER_CFG = HERDING_WEBOTS.tracker
|
||||||
|
tracker = SheepTracker(tracker_cfg=_TRACKER_CFG)
|
||||||
|
|
||||||
|
# Cosmetic ear motors — animated; not used by control.
|
||||||
|
left_ear = robot.getDevice("left ear motor")
|
||||||
right_ear = robot.getDevice("right ear motor")
|
right_ear = robot.getDevice("right ear motor")
|
||||||
left_ear.setPosition(float("inf"))
|
left_ear.setPosition(float("inf"))
|
||||||
right_ear.setPosition(float("inf"))
|
right_ear.setPosition(float("inf"))
|
||||||
left_ear.setVelocity(0.0)
|
left_ear.setVelocity(0.0)
|
||||||
right_ear.setVelocity(0.0)
|
right_ear.setVelocity(0.0)
|
||||||
|
ear_phase = 0.0
|
||||||
|
EAR_AMPLITUDE = 0.35
|
||||||
|
EAR_RATE = 8.0
|
||||||
|
|
||||||
keyboard = robot.getKeyboard()
|
|
||||||
keyboard.enable(timestep)
|
|
||||||
|
|
||||||
MOTOR_MAX = left_motor.getMaxVelocity()
|
# ---------------------------------------------------------------------------
|
||||||
speed_level = 0.5 # fraction of MOTOR_MAX; adjusted by +/-
|
# Main loop
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
EAR_AMPLITUDE = 0.35 # rad, peak ear deflection
|
# Analytic-teacher wrapper (instantiated lazily so RL/BC modes don't pay
|
||||||
EAR_RATE = 8.0 # rad/s, how fast the ears are driven
|
# the import-time cost). Each gets the same ActiveScanTeacher treatment:
|
||||||
ear_phase = 0.0
|
# rotate-on-empty, walk-to-centre, near-sheep speed modulation.
|
||||||
|
analytic_teacher = None
|
||||||
|
if MODE in ("strombom", "sequential"):
|
||||||
|
base_fn = strombom_action if MODE == "strombom" else sequential_action
|
||||||
|
analytic_teacher = ActiveScanTeacher(base_fn)
|
||||||
|
elif MODE == "universal":
|
||||||
|
analytic_teacher = ActiveScanTeacher(universal_action)
|
||||||
|
|
||||||
|
# Optional deterministic seed for the controller's RNG. The sheep
|
||||||
|
# controller seeds itself the same way, so identical HERDING_SEED
|
||||||
|
# values give reproducible trials. If unset (empty), Python uses its
|
||||||
|
# time-based default and runs are non-deterministic.
|
||||||
|
import random as _random
|
||||||
|
_seed_raw = (os.environ.get("HERDING_SEED")
|
||||||
|
or _runtime_cfg.get("HERDING_SEED")
|
||||||
|
or "").strip()
|
||||||
|
if _seed_raw:
|
||||||
|
try:
|
||||||
|
HERDING_SEED = int(_seed_raw)
|
||||||
|
except ValueError:
|
||||||
|
HERDING_SEED = None
|
||||||
|
print(f"[dog] could not parse HERDING_SEED={_seed_raw!r}; using random")
|
||||||
|
else:
|
||||||
|
_random.seed(HERDING_SEED)
|
||||||
|
else:
|
||||||
|
HERDING_SEED = None
|
||||||
|
|
||||||
|
# GT positions from sheep emitters — used **only** for the auto-finish
|
||||||
|
# sentinel, the GT_penned diagnostic line, and the per-sheep pen-time
|
||||||
|
# metrics printed at end of run. Never fed into control.
|
||||||
|
_gt_sheep: dict = {}
|
||||||
|
_pen_step: dict = {} # sheep name -> step at which it first became penned
|
||||||
|
_run_done = False
|
||||||
|
_t_start = None # step at which we first see GT positions (sim start)
|
||||||
|
|
||||||
|
prev_action = (0.0, 0.0, 0.0) if DRIVE_MODE == "mecanum" else (0.0, 0.0)
|
||||||
|
step_count = 0
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Calibration mode — apply fixed action, measure GPS displacement, compare
|
||||||
|
# against gym kinematics prediction, write results to calibrate_mecanum.log.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
if MODE == "calibrate":
|
||||||
|
import json as _json
|
||||||
|
_calib_vx = float(os.environ.get("CALIB_VX", "0.5"))
|
||||||
|
_calib_vy = float(os.environ.get("CALIB_VY", "0.0"))
|
||||||
|
_calib_om = float(os.environ.get("CALIB_OM", "0.0"))
|
||||||
|
_calib_n = int(os.environ.get("CALIB_N_STEPS", "150"))
|
||||||
|
_log_path = os.path.join(_PROJECT_ROOT, "calibrate_mecanum.log")
|
||||||
|
# Settle for 5 steps so GPS stabilises.
|
||||||
|
for _ in range(5):
|
||||||
|
robot.step(timestep)
|
||||||
|
_pos0 = gps.getValues(); _x0, _y0 = _pos0[0], _pos0[1]
|
||||||
|
_n_calib = compass.getValues(); _h0 = math.atan2(_n_calib[0], _n_calib[1])
|
||||||
|
# Gym-predicted displacement using shared kinematics.
|
||||||
|
from herding.world.diffdrive import velocity_to_mecanum_wheels, mecanum_step
|
||||||
|
from herding.world.geometry import WEBOTS_DT as _DT
|
||||||
|
_xg, _yg, _hg = _x0, _y0, _h0
|
||||||
|
for _ in range(_calib_n):
|
||||||
|
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
|
||||||
|
_calib_vx, _calib_vy, _calib_om, _hg,
|
||||||
|
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
|
||||||
|
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
|
||||||
|
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
|
||||||
|
wheel_base=DOG_WHEEL_BASE,
|
||||||
|
)
|
||||||
|
_xg, _yg, _hg = mecanum_step(
|
||||||
|
_xg, _yg, _hg, _wfl, _wfr, _wrl, _wrr,
|
||||||
|
DOG_WHEEL_RADIUS, DOG_WHEEL_BASE_X / 2, DOG_WHEEL_BASE_Y / 2, _DT,
|
||||||
|
)
|
||||||
|
# Run actual Webots steps.
|
||||||
|
for _ in range(_calib_n):
|
||||||
|
_nv = compass.getValues(); _h = math.atan2(_nv[0], _nv[1])
|
||||||
|
_wfl, _wfr, _wrl, _wrr = velocity_to_mecanum_wheels(
|
||||||
|
_calib_vx, _calib_vy, _calib_om, _h,
|
||||||
|
max_linear=DOG_MAX_LINEAR, wheel_radius=DOG_WHEEL_RADIUS,
|
||||||
|
lx=DOG_WHEEL_BASE_X / 2, ly=DOG_WHEEL_BASE_Y / 2,
|
||||||
|
max_wheel_omega=DOG_MAX_WHEEL_OMEGA, k_turn=4.0,
|
||||||
|
wheel_base=DOG_WHEEL_BASE,
|
||||||
|
)
|
||||||
|
if DRIVE_MODE == "mecanum":
|
||||||
|
drive_mecanum(_calib_vx, _calib_vy, _calib_om,
|
||||||
|
fl_motor, fr_motor, rl_motor, rr_motor,
|
||||||
|
compass, MOTOR_MAX)
|
||||||
|
robot.step(timestep)
|
||||||
|
_pos1 = gps.getValues(); _x1, _y1 = _pos1[0], _pos1[1]
|
||||||
|
_nv1 = compass.getValues(); _h1 = math.atan2(_nv1[0], _nv1[1])
|
||||||
|
_T = _calib_n * _DT
|
||||||
|
_vx_w = (_x1 - _x0) / _T; _vy_w = (_y1 - _y0) / _T
|
||||||
|
_vx_g = (_xg - _x0) / _T; _vy_g = (_yg - _y0) / _T
|
||||||
|
_dh_deg = math.degrees(math.atan2(math.sin(_h1 - _h0),
|
||||||
|
math.cos(_h1 - _h0)))
|
||||||
|
def _pct(a, p): return 0.0 if abs(p) < 1e-4 else 100.0 * abs(a - p) / abs(p)
|
||||||
|
_result = (
|
||||||
|
f"cmd=(vx={_calib_vx:.2f}, vy={_calib_vy:.2f}, om={_calib_om:.2f}) "
|
||||||
|
f"steps={_calib_n}\n"
|
||||||
|
f" gym displacement: dx={_xg-_x0:.4f} dy={_yg-_y0:.4f} "
|
||||||
|
f"(vx={_vx_g:.3f} vy={_vy_g:.3f} m/s)\n"
|
||||||
|
f" webots displacement: dx={_x1-_x0:.4f} dy={_y1-_y0:.4f} "
|
||||||
|
f"(vx={_vx_w:.3f} vy={_vy_w:.3f} m/s)\n"
|
||||||
|
f" vx error: {_pct(_vx_w, _vx_g):.1f}% "
|
||||||
|
f"vy error: {_pct(_vy_w, _vy_g):.1f}% "
|
||||||
|
f"heading drift: {_dh_deg:+.1f}°\n"
|
||||||
|
)
|
||||||
|
print(_result)
|
||||||
|
with open(_log_path, "a") as _f:
|
||||||
|
_f.write(_result + "\n")
|
||||||
|
# Write the run-done sentinel so run_webots.sh closes Webots cleanly.
|
||||||
|
with open(RUN_DONE_FILE, "w") as _f:
|
||||||
|
_f.write("calibrate\n")
|
||||||
|
import sys as _sys; _sys.exit(0)
|
||||||
|
|
||||||
while robot.step(timestep) != -1:
|
while robot.step(timestep) != -1:
|
||||||
speed = MOTOR_MAX * speed_level
|
step_count += 1
|
||||||
turn = speed * 0.6 # differential turn radius
|
|
||||||
|
|
||||||
left_vel = 0.0
|
# Drain sheep emitter messages → GT (diagnostic only).
|
||||||
right_vel = 0.0
|
while receiver.getQueueLength() > 0:
|
||||||
key = keyboard.getKey()
|
msg = receiver.getString()
|
||||||
while key > 0:
|
receiver.nextPacket()
|
||||||
if key in (ord('W'), Keyboard.UP):
|
parts = msg.split(":")
|
||||||
left_vel = speed
|
if len(parts) == 4 and parts[0] == "sheep":
|
||||||
right_vel = speed
|
try:
|
||||||
elif key in (ord('S'), Keyboard.DOWN):
|
_gt_sheep[parts[1]] = (float(parts[2]), float(parts[3]))
|
||||||
left_vel = -speed
|
except ValueError:
|
||||||
right_vel = -speed
|
pass
|
||||||
elif key in (ord('A'), Keyboard.LEFT):
|
|
||||||
left_vel = -turn
|
|
||||||
right_vel = turn
|
|
||||||
elif key in (ord('D'), Keyboard.RIGHT):
|
|
||||||
left_vel = turn
|
|
||||||
right_vel = -turn
|
|
||||||
elif key in (ord('+'), ord('=')):
|
|
||||||
speed_level = min(1.0, speed_level + 0.1)
|
|
||||||
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
|
|
||||||
elif key in (ord('-'), ord('_')):
|
|
||||||
speed_level = max(0.1, speed_level - 0.1)
|
|
||||||
print(f"Speed: {speed_level:.0%} ({MOTOR_MAX * speed_level:.1f} rad/s)")
|
|
||||||
key = keyboard.getKey()
|
|
||||||
|
|
||||||
left_motor.setVelocity(left_vel)
|
|
||||||
right_motor.setVelocity(right_vel)
|
|
||||||
|
|
||||||
pos = gps.getValues()
|
pos = gps.getValues()
|
||||||
emitter.send(f"dog:{pos[0]}:{pos[1]}")
|
dog_xy = (pos[0], pos[1])
|
||||||
|
n = compass.getValues()
|
||||||
|
dog_heading = math.atan2(n[0], n[1])
|
||||||
|
|
||||||
|
# ---- LiDAR perception → tracker → active sheep positions ----
|
||||||
|
ranges = np.asarray(lidar.getRangeImage(), dtype=np.float32)
|
||||||
|
detections = detections_from_scan(
|
||||||
|
ranges, dog_xy[0], dog_xy[1], dog_heading,
|
||||||
|
detection_cfg=HERDING_WEBOTS.detection,
|
||||||
|
lidar_cfg=_LIDAR_CFG,
|
||||||
|
)
|
||||||
|
if USE_GT_PERCEPTION and _gt_sheep:
|
||||||
|
# Bypass tracker: feed GT emitter positions directly to policy/teacher.
|
||||||
|
sheep_positions = {k: v for k, v in _gt_sheep.items()
|
||||||
|
if not is_penned(v[0], v[1])}
|
||||||
|
tracker.update(detections) # still advance tracker for diagnostics
|
||||||
|
else:
|
||||||
|
sheep_positions = tracker.update(detections)
|
||||||
|
|
||||||
|
sheep_xy_list = list(sheep_positions.values())
|
||||||
|
sheep_penned_list = [False] * len(sheep_xy_list)
|
||||||
|
single_obs = build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list)
|
||||||
|
|
||||||
|
# ---- Action selection ----
|
||||||
|
omega = 0.0
|
||||||
|
if MODE in ("bc", "rl") and policy_handle is not None:
|
||||||
|
if not sheep_positions:
|
||||||
|
# BC/RL never saw "empty obs during operation" in training (empty
|
||||||
|
# obs only happened at episode end), so the policy outputs ~zero
|
||||||
|
# and the dog gets stuck. Fall back to an *active scan*: rotate
|
||||||
|
# the desired heading slowly so the narrow 140° FOV sweeps the
|
||||||
|
# field instead of charging in one fixed direction (which
|
||||||
|
# otherwise drives the dog into the north wall and ends the run).
|
||||||
|
scan_h = (step_count * 0.015) % (2.0 * math.pi)
|
||||||
|
vx = 0.5 * math.cos(scan_h)
|
||||||
|
vy = 0.5 * math.sin(scan_h)
|
||||||
|
omega = 0.5 if DRIVE_MODE == "mecanum" else 0.0
|
||||||
|
else:
|
||||||
|
action = policy_handle.predict(single_obs)
|
||||||
|
vx, vy = float(action[0]), float(action[1])
|
||||||
|
if DRIVE_MODE == "mecanum" and len(action) >= 3:
|
||||||
|
omega = float(action[2])
|
||||||
|
else:
|
||||||
|
result = analytic_teacher(
|
||||||
|
dog_xy, dog_heading, sheep_positions, PEN_ENTRY,
|
||||||
|
DRIVE_MODE,
|
||||||
|
)
|
||||||
|
if len(result) == 4:
|
||||||
|
vx, vy, omega, _mode_str = result
|
||||||
|
else:
|
||||||
|
vx, vy, _mode_str = result
|
||||||
|
|
||||||
|
# Near-sheep speed modulation (shared by every mode).
|
||||||
|
vx, vy = modulate_speed(vx, vy, dog_xy, sheep_positions)
|
||||||
|
|
||||||
|
# Axis-split mask for the dual-dog setup: this dog leads its
|
||||||
|
# assigned axis (full gain) and contributes AXIS_LEAK on the
|
||||||
|
# off-axis. With LEAK=0 the mask is strict; with LEAK=1 the dogs
|
||||||
|
# run identical full-power policies.
|
||||||
|
if DOG_AXIS == "x":
|
||||||
|
vy *= AXIS_LEAK
|
||||||
|
elif DOG_AXIS == "y":
|
||||||
|
vx *= AXIS_LEAK
|
||||||
|
|
||||||
|
# EMA smoothing — kills frame-to-frame action jitter.
|
||||||
|
if DRIVE_MODE == "mecanum":
|
||||||
|
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
||||||
|
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
||||||
|
omega = ACTION_SMOOTH * prev_action[2] + (1.0 - ACTION_SMOOTH) * omega
|
||||||
|
else:
|
||||||
|
vx = ACTION_SMOOTH * prev_action[0] + (1.0 - ACTION_SMOOTH) * vx
|
||||||
|
vy = ACTION_SMOOTH * prev_action[1] + (1.0 - ACTION_SMOOTH) * vy
|
||||||
|
|
||||||
|
# Safety: dog must never enter the pen.
|
||||||
|
vx, vy = safety_clamp(vx, vy, dog_xy[0], dog_xy[1])
|
||||||
|
prev_action = (vx, vy, omega) if DRIVE_MODE == "mecanum" else (vx, vy)
|
||||||
|
|
||||||
|
if DRIVE_MODE == "mecanum":
|
||||||
|
drive_mecanum(vx, vy, omega, fl_motor, fr_motor, rl_motor, rr_motor,
|
||||||
|
compass, MOTOR_MAX)
|
||||||
|
else:
|
||||||
|
drive_diff(vx, vy, left_motor, right_motor, compass, MOTOR_MAX)
|
||||||
|
emitter.send(f"dog:{DOG_NAME}:{dog_xy[0]:.4f}:{dog_xy[1]:.4f}")
|
||||||
|
|
||||||
|
# Cosmetic ear wiggle.
|
||||||
ear_phase += 0.12
|
ear_phase += 0.12
|
||||||
ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
|
ear_pos = EAR_AMPLITUDE * math.sin(ear_phase)
|
||||||
left_ear.setVelocity(EAR_RATE)
|
left_ear.setVelocity(EAR_RATE)
|
||||||
right_ear.setVelocity(EAR_RATE)
|
right_ear.setVelocity(EAR_RATE)
|
||||||
left_ear.setPosition( ear_pos)
|
left_ear.setPosition(ear_pos)
|
||||||
right_ear.setPosition(-ear_pos)
|
right_ear.setPosition(-ear_pos)
|
||||||
|
|
||||||
|
# First step we have GT visibility — record the simulation start
|
||||||
|
# so per-sheep pen times can be reported relative to it.
|
||||||
|
if _gt_sheep and _t_start is None:
|
||||||
|
_t_start = step_count
|
||||||
|
|
||||||
|
# Record the first step at which each sheep is observed penned.
|
||||||
|
for _sname, (_sx, _sy) in _gt_sheep.items():
|
||||||
|
if _sname not in _pen_step and is_penned(_sx, _sy):
|
||||||
|
_pen_step[_sname] = step_count
|
||||||
|
|
||||||
|
# Auto-finish: when all GT sheep are penned, write the sentinel
|
||||||
|
# and print the per-sheep penning summary so the operator sees
|
||||||
|
# the metrics in the terminal. The launcher polls for the
|
||||||
|
# sentinel and closes Webots cleanly.
|
||||||
|
if _gt_sheep and not _run_done:
|
||||||
|
gt_active = sum(1 for x, y in _gt_sheep.values()
|
||||||
|
if not is_penned(x, y))
|
||||||
|
if gt_active == 0:
|
||||||
|
os.makedirs(os.path.dirname(RUN_DONE_FILE), exist_ok=True)
|
||||||
|
open(RUN_DONE_FILE, "w").close()
|
||||||
|
_run_done = True
|
||||||
|
print(f"[dog] all {len(_gt_sheep)} sheep penned at step "
|
||||||
|
f"{step_count} — wrote sentinel, launcher will close Webots")
|
||||||
|
# Only the first dog to detect the finish prints the
|
||||||
|
# summary (in dual-dog mode both run in lock-step but the
|
||||||
|
# sentinel acts as a one-shot lock).
|
||||||
|
_dt = 0.016 # Webots basicTimeStep, seconds
|
||||||
|
print("")
|
||||||
|
print(f"[results] mode={MODE} drive={DRIVE_MODE} world={WORLD} "
|
||||||
|
f"lidar={LIDAR_FOV_VARIANT} dogs={DOG_NAME}"
|
||||||
|
+ (f" seed={HERDING_SEED}" if HERDING_SEED is not None else ""))
|
||||||
|
print(f"[results] total steps: {step_count} "
|
||||||
|
f"({step_count * _dt:.1f} s simulated)")
|
||||||
|
ordered = sorted(_pen_step.items(), key=lambda kv: kv[1])
|
||||||
|
for i, (sn, st) in enumerate(ordered, 1):
|
||||||
|
rel = st - (_t_start or 0)
|
||||||
|
print(f"[results] #{i} {sn:8s} penned at step {st:>6d} "
|
||||||
|
f"({rel * _dt:6.2f} s)")
|
||||||
|
if len(ordered) >= 2:
|
||||||
|
first = ordered[0][1]
|
||||||
|
last = ordered[-1][1]
|
||||||
|
print(f"[results] gate spread: {last - first} steps "
|
||||||
|
f"({(last - first) * _dt:.2f} s) between first and last pen")
|
||||||
|
|
||||||
|
if step_count % 200 == 0:
|
||||||
|
gt_penned = sum(1 for x, y in _gt_sheep.values()
|
||||||
|
if is_penned(x, y))
|
||||||
|
gt_total = len(_gt_sheep)
|
||||||
|
common = (f"[dog mode={MODE} drive={DRIVE_MODE}] step={step_count} "
|
||||||
|
f"GT_penned={gt_penned}/{gt_total} "
|
||||||
|
f"tracks_active={tracker.n_active()} "
|
||||||
|
f"tracks_cand={tracker.n_candidate()} "
|
||||||
|
f"tracks_penned={tracker.n_penned()} "
|
||||||
|
f"detections={len(detections)} "
|
||||||
|
f"h={math.degrees(dog_heading):+.1f}°")
|
||||||
|
if DRIVE_MODE == "mecanum":
|
||||||
|
print(f"{common} action=({vx:+.2f}, {vy:+.2f}, {omega:+.2f})")
|
||||||
|
else:
|
||||||
|
print(f"{common} action=({vx:+.2f}, {vy:+.2f})")
|
||||||
|
|||||||
@@ -0,0 +1,280 @@
|
|||||||
|
# Autonomous Shepherd Robot for Livestock Herding
|
||||||
|
|
||||||
|
**G25 — Diogo Costa, Johnny Fernandes, Nelson Neto**
|
||||||
|
**Course project final report — TRI 2026**
|
||||||
|
|
||||||
|
> Draft outline. Each section has a one-line description plus the
|
||||||
|
> bullets/figures/tables that should land in it. Replace prose as you
|
||||||
|
> write; keep the structure unless something obviously doesn't fit.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Abstract (½ page)
|
||||||
|
|
||||||
|
One paragraph: problem (autonomous LiDAR-only herding), approach
|
||||||
|
(Strömbom-style analytic baselines + BC + KL-PPO fine-tune; two
|
||||||
|
worlds, two drives), key result (8/8 differential cells pen all
|
||||||
|
sheep in Webots; 4/8 mecanum cells pen 10/10 via kinematic
|
||||||
|
Supervisor injection; extra-merit 360° LiDAR ablation and dual-dog
|
||||||
|
axis-split both working).
|
||||||
|
|
||||||
|
## 2. Introduction (1 page)
|
||||||
|
|
||||||
|
* **Problem statement.** Shepherd a flock of 1–10 simulated sheep
|
||||||
|
through a gate into a pen using LiDAR-only perception. Both a
|
||||||
|
rectangular field and a circular field. Both differential and
|
||||||
|
mecanum drive.
|
||||||
|
* **Why it's hard.** No GT positions; sheep flock dynamically
|
||||||
|
(Strömbom 2014); the LiDAR returns a noisy range image, not
|
||||||
|
labelled tracks; sim-to-Webots transfer is non-trivial.
|
||||||
|
* **Contributions.**
|
||||||
|
1. End-to-end LiDAR pipeline (clustering → consensus tracker →
|
||||||
|
observation builder) that transfers training-time policies to
|
||||||
|
Webots without GT bypass.
|
||||||
|
2. Three control strategies (Strömbom, BC, KL-PPO) trained on
|
||||||
|
the same gym environment with matched-kinematics presets,
|
||||||
|
working across both worlds.
|
||||||
|
3. Identification and resolution of the mecanum sim-to-Webots
|
||||||
|
gap (kinematic Supervisor injection — see Section 7).
|
||||||
|
4. Extra-merit experiments: 360° LiDAR ablation and dual-dog
|
||||||
|
axis-split coordination.
|
||||||
|
|
||||||
|
## 3. System overview (1 page)
|
||||||
|
|
||||||
|
* `herding/` — physics-free 2D gym (sheep flocking model, LiDAR
|
||||||
|
ray-casting, perception pipeline, controller library).
|
||||||
|
* `training/` — BC + KL-PPO trainers, frame-stacked MLP policies
|
||||||
|
(stable-baselines3), evaluation harness.
|
||||||
|
* `controllers/` — Webots Python controllers for the shepherd dog
|
||||||
|
and sheep, sharing the gym's geometry/perception modules so any
|
||||||
|
fix in the gym automatically reaches the simulator.
|
||||||
|
* `protos/` — Webots PROTO files: `ShepherdDog.proto` (diff drive
|
||||||
|
140°), `ShepherdDog360.proto` (diff drive 360°),
|
||||||
|
`ShepherdDogMecanum{,360}.proto` (mecanum variants).
|
||||||
|
* **Figure**: architecture diagram with the gym ↔ Webots split,
|
||||||
|
marking where each piece sits.
|
||||||
|
|
||||||
|
## 4. Methods
|
||||||
|
|
||||||
|
### 4.1 Sheep flocking model (½ page)
|
||||||
|
|
||||||
|
* Strömbom 2014 reduced-form heuristics: repulsion from dog and
|
||||||
|
neighbours, attraction to flock centroid, weighted into a
|
||||||
|
step-wise displacement.
|
||||||
|
* Implementation notes: parameter values, why we tuned them to
|
||||||
|
match the Webots sheep controller, sheep dynamics in the round
|
||||||
|
world (cylinder boundary instead of axis-aligned walls).
|
||||||
|
|
||||||
|
### 4.2 Perception (1 page)
|
||||||
|
|
||||||
|
* **LiDAR scan → range image.** 140° front cone (default) or 360°
|
||||||
|
full sweep; horizontalResolution and noise calibrated to the
|
||||||
|
Webots sensor.
|
||||||
|
* **Clustering.** Walk rays in angular order, split on gap
|
||||||
|
threshold and multi-peak range profile; reject clusters wider
|
||||||
|
than max_span (walls), within wall_reject of perimeter, or
|
||||||
|
within static_reject of known fixed features.
|
||||||
|
* **Tracker.** Online NN association with predicted positions;
|
||||||
|
consensus_k filter (k hits within consensus_max_age steps
|
||||||
|
before promotion); static-phantom drop on promoted tracks that
|
||||||
|
fail to displace beyond `STATIC_PHANTOM_RADIUS` within
|
||||||
|
`STATIC_PHANTOM_AGE` steps; pen-latch and forget timeouts tuned
|
||||||
|
per preset.
|
||||||
|
* **Why the tracker matters.** Naïve per-frame matching produced
|
||||||
|
unstable observations that BC couldn't learn from; the consensus
|
||||||
|
filter and the static-phantom drop close the perception sim-to-
|
||||||
|
real gap for diff drive and unblock the 360° mecanum case.
|
||||||
|
|
||||||
|
### 4.3 Controllers (1 page)
|
||||||
|
|
||||||
|
* **Analytic baselines.**
|
||||||
|
* `strombom` — collect/drive heuristic with gate offset and
|
||||||
|
a round-world variant (geometric drive instead of cardinal
|
||||||
|
targets).
|
||||||
|
* `sequential` — single-sheep pin-and-push baseline, runs through
|
||||||
|
every sheep in turn.
|
||||||
|
* `universal` — adaptive analytic teacher used to collect BC
|
||||||
|
demos; switches between Strömbom and Sequential based on flock
|
||||||
|
coherence.
|
||||||
|
* **Behaviour cloning.** MLP(512,512), frame-stacked observations,
|
||||||
|
trained on 250–400 universal-teacher trajectories per
|
||||||
|
(drive, world) combo.
|
||||||
|
* **KL-PPO fine-tune.** PPO with a KL-to-reference penalty against
|
||||||
|
the BC policy. Two-stage: success-pass (no time penalty) then
|
||||||
|
speed-pass (`rl_fast`, time_w<0) optional.
|
||||||
|
|
||||||
|
### 4.4 Gym kinematics matching (½ page)
|
||||||
|
|
||||||
|
* Differential drive: standard unicycle kinematics, transfers
|
||||||
|
directly.
|
||||||
|
* Mecanum: `RobotConfig.strafe_efficiency` and
|
||||||
|
`strafe_to_forward_bleed` scale the forward-kinematics formula.
|
||||||
|
The gym preset (`HERDING_MEC_WEBOTS_360`) sets these to the
|
||||||
|
values the Webots controller reads when computing the
|
||||||
|
Supervisor-injected body velocity (Section 7), so gym training
|
||||||
|
and Webots deployment produce identical chassis motion.
|
||||||
|
|
||||||
|
## 5. Experimental setup (½ page)
|
||||||
|
|
||||||
|
* Webots R2025a; `tools/run_webots.sh N MODE DRIVE WORLD` launcher.
|
||||||
|
* Seeded reproducibility (`HERDING_SEED=42` used for all the
|
||||||
|
results below).
|
||||||
|
* GT bypass (`HERDING_USE_GT=1`) available for ablations.
|
||||||
|
* Per-sheep pen-time logging in the `[results]` block.
|
||||||
|
|
||||||
|
## 6. Results
|
||||||
|
|
||||||
|
### 6.1 Differential drive (table + ½ page commentary)
|
||||||
|
|
||||||
|
| world | controller | n=5 | n=10 |
|
||||||
|
|-------------|--------------|:---:|:----:|
|
||||||
|
| field | BC | 5/5 | 10/10 |
|
||||||
|
| field | RL | 5/5 | 10/10 |
|
||||||
|
| field | Strömbom | 5/5 | 10/10 |
|
||||||
|
| field | Sequential | 5/5 | 10/10 |
|
||||||
|
| field_round | BC | 5/5 | 10/10 |
|
||||||
|
| field_round | RL | 5/5 | 10/10 |
|
||||||
|
| field_round | Strömbom | 5/5 | 10/10 |
|
||||||
|
| field_round | Sequential | 5/5 | 10/10 |
|
||||||
|
|
||||||
|
* Discussion: BC vs RL trade-offs (RL is faster, BC mimics
|
||||||
|
teacher more conservatively); Strömbom vs Sequential
|
||||||
|
(parallel-sweep vs one-at-a-time, time-to-pen comparison).
|
||||||
|
* **Figure**: pen-time bar chart per (controller, world).
|
||||||
|
|
||||||
|
### 6.2 Mecanum drive (table + 1 page commentary)
|
||||||
|
|
||||||
|
| world | controller | n=5 | n=10 |
|
||||||
|
|-------------|------------|:---:|:-----:|
|
||||||
|
| field | BC | 0/5 | 10/10 |
|
||||||
|
| field | RL | 0/5 | 10/10 |
|
||||||
|
| field_round | BC | 0/5 | 10/10 |
|
||||||
|
| field_round | RL | 0/5 | 10/10 |
|
||||||
|
|
||||||
|
> Pending: re-run after the static-phantom drop (Section 7.4) to
|
||||||
|
> confirm whether n=5 also passes.
|
||||||
|
|
||||||
|
* Discussion: kinematic Supervisor injection (Section 7); residual
|
||||||
|
n=5 phantom-track issue (Section 7.4) and how the static-phantom
|
||||||
|
drop addresses it.
|
||||||
|
* **Figure**: heading-drift comparison (with vs without kinematic
|
||||||
|
injection) over a 200-step window.
|
||||||
|
|
||||||
|
### 6.3 Extra-merit experiments (½ page each)
|
||||||
|
|
||||||
|
* **360° LiDAR ablation.** Diff drive runs with `HERDING_LIDAR=360`
|
||||||
|
pen N/N in both worlds. Trade-off: more candidate clusters per
|
||||||
|
step (more phantoms) vs full omnidirectional coverage.
|
||||||
|
* **Dual-dog axis-split.** Two shepherds via `HERDING_NDOGS=2`;
|
||||||
|
each is assigned an axis (x / y); off-axis components attenuated
|
||||||
|
by `HERDING_AXIS_LEAK`. Penned 5/5 on the diff/field setup. Note:
|
||||||
|
mecanum dual-dog was considered but skipped — mecanum's single-
|
||||||
|
dog omnidirectional coverage already saturates the available
|
||||||
|
herding capability.
|
||||||
|
|
||||||
|
## 7. The mecanum sim-to-Webots problem
|
||||||
|
|
||||||
|
> The longest section. This is the project's most interesting
|
||||||
|
> engineering story; write it like one.
|
||||||
|
|
||||||
|
### 7.1 First attempt: plain cylinder wheels + anisotropic friction
|
||||||
|
|
||||||
|
* Idea: use Webots `frictionRotation` on two contact materials
|
||||||
|
(`MecanumWheelA`, `MecanumWheelB`) to rotate the friction frame
|
||||||
|
±45°, making each cylinder act as an omni-roller via the
|
||||||
|
contact solver.
|
||||||
|
* What worked: chassis stable; pure forward motion clean.
|
||||||
|
* What broke: pure strafe came out the wrong direction, and
|
||||||
|
diagonal motion was zero. The contact-frame rotation interacts
|
||||||
|
with ODE's friction-pyramid model in a way that doesn't reproduce
|
||||||
|
textbook X-pattern.
|
||||||
|
|
||||||
|
### 7.2 Second attempt: 32 physical roller hinges
|
||||||
|
|
||||||
|
* Idea: model every roller as a passive HingeJoint capsule at ±45°
|
||||||
|
tilt; ODE solves the contact-without-slipping constraint per
|
||||||
|
roller, no friction trickery needed.
|
||||||
|
* Generated by `tools/gen_mecanum_wheels.py` (8 rollers per wheel,
|
||||||
|
X-pattern tilt: FR/RL +1, FL/RR −1).
|
||||||
|
* What worked: pure-x calibration was exact (98%+).
|
||||||
|
* What broke: dynamic policy commands made the chassis tumble.
|
||||||
|
Heading swung ±150° in 200 control steps; the LiDAR→world
|
||||||
|
transform was effectively unusable. Even with
|
||||||
|
`inertiaMatrix [_ _ 5.0 _ _ _]`, roller `dampingConstant 0.0005`,
|
||||||
|
and motor `maxTorque 3.0` (6× cut), the dynamic yaw drift was
|
||||||
|
not under control.
|
||||||
|
|
||||||
|
### 7.3 Why ODE struggles with mecanum
|
||||||
|
|
||||||
|
* 32 unconstrained roller hinges per chassis; ODE's contact solver
|
||||||
|
resolves them as independent constraints each step, and small
|
||||||
|
imbalances in the per-roller forces propagate to the body as
|
||||||
|
yaw torque.
|
||||||
|
* The roller's "rolling without slipping" idealisation is
|
||||||
|
fundamentally a kinematic constraint; trying to recover it from
|
||||||
|
Newton-Euler dynamics over 32 hinges is numerically unstable in
|
||||||
|
the timestep/solver regime Webots uses.
|
||||||
|
* This is a known limitation of mecanum in physics engines; Gazebo,
|
||||||
|
for instance, ships a mecanum plugin that bypasses the contact
|
||||||
|
solver entirely and injects a kinematic body velocity.
|
||||||
|
|
||||||
|
### 7.4 Final approach: Supervisor kinematic injection
|
||||||
|
|
||||||
|
* The chassis is moved by `Supervisor.setVelocity()` using the gym
|
||||||
|
mecanum forward-kinematics formula. Wheel motors still spin
|
||||||
|
visually, but their torque does not propagate to the body.
|
||||||
|
* Gym training and Webots deployment apply the *same* formula with
|
||||||
|
the *same* `strafe_efficiency` and `strafe_to_forward_bleed`
|
||||||
|
parameters, so the trained policy faces identical body dynamics
|
||||||
|
in both environments.
|
||||||
|
* Trade-off: we lose Newton-Euler chassis simulation on the
|
||||||
|
mecanum body. Differential drive keeps full physics. The user's
|
||||||
|
framing — "I want the process, not too focused in pure realism"
|
||||||
|
— supports this choice; it's also standard practice in academic
|
||||||
|
mecanum simulators.
|
||||||
|
|
||||||
|
### 7.5 The residual n=5 phantom problem
|
||||||
|
|
||||||
|
* With kinematic injection in place, 4/8 cells pen 10/10. But n=5
|
||||||
|
cells still fail uniformly.
|
||||||
|
* Diagnosis: the 360° LiDAR consistently produces sheep-shaped
|
||||||
|
blobs at wall corners, gate posts, and pen rails. The consensus
|
||||||
|
filter (`consensus_k=3`) doesn't reject them because they are
|
||||||
|
*consistent* — they're always at the same world position.
|
||||||
|
* Bypass via `HERDING_USE_GT=1` (ground-truth perception) pens
|
||||||
|
5/5 in 76s, confirming the policy is fine and the gap is purely
|
||||||
|
perceptual.
|
||||||
|
* **Fix:** static-phantom drop in the tracker — record each
|
||||||
|
promoted track's spawn position and running max displacement;
|
||||||
|
drop promoted tracks that have stayed within
|
||||||
|
`STATIC_PHANTOM_RADIUS=0.4 m` of their spawn position for
|
||||||
|
`STATIC_PHANTOM_AGE=400` steps (~6.4 s). Real sheep under
|
||||||
|
Strömbom dynamics move well beyond that radius; wall corners
|
||||||
|
do not. *(Implemented; results in Section 6.2 pending re-run.)*
|
||||||
|
|
||||||
|
## 8. Discussion (1 page)
|
||||||
|
|
||||||
|
* Sim-to-real lessons:
|
||||||
|
* Perception is the dominant transfer gap, not control.
|
||||||
|
* Trackers need a notion of motion to reject static phantoms;
|
||||||
|
consensus alone is insufficient when phantoms are spatially
|
||||||
|
consistent.
|
||||||
|
* For mecanum, kinematic injection is the correct abstraction.
|
||||||
|
* What we'd do differently:
|
||||||
|
* Build the parallax/motion-aware tracker into the design from
|
||||||
|
day 1.
|
||||||
|
* Calibrate Webots' mecanum behaviour earlier — we spent
|
||||||
|
significant effort on ODE tuning before stepping back to the
|
||||||
|
kinematic-injection approach.
|
||||||
|
|
||||||
|
## 9. Conclusion (¼ page)
|
||||||
|
|
||||||
|
Restate the contribution and the result counts. End on the open
|
||||||
|
question: parallax-aware tracking is a clean general fix and would
|
||||||
|
make 8/8 mecanum likely; we ran out of project budget.
|
||||||
|
|
||||||
|
## A. Reproducibility appendix (½ page)
|
||||||
|
|
||||||
|
* Hardware/OS used.
|
||||||
|
* Command lines for each row of the results tables.
|
||||||
|
* Random seed and deterministic eval settings.
|
||||||
+287
@@ -0,0 +1,287 @@
|
|||||||
|
# Project handoff — TRI_PROJ2 herding (2026-05-16)
|
||||||
|
|
||||||
|
Context for a fresh model picking this project up. Project deadline: **2026-06-04**.
|
||||||
|
Branch: `test/johnny8`. Last commits: `876e14e` (LSTM), `dd5ac66` (core fixes).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What this project is
|
||||||
|
|
||||||
|
Group G25 course project: an autonomous shepherd dog that herds 1–10 sheep through a gate into a pen. Two worlds (rectangular `field`, circular `field_round`), two drives (`differential`, `mecanum`), and five control strategies:
|
||||||
|
|
||||||
|
- `strombom` — analytical Strömbom collect/drive heuristic
|
||||||
|
- `sequential` — analytical single-target pin-and-push baseline
|
||||||
|
- `universal` — analytical teacher used to collect BC demos
|
||||||
|
- `bc` — MLP policy trained via behaviour cloning of `universal`
|
||||||
|
- `rl` — KL-regularised PPO fine-tune of `bc`
|
||||||
|
|
||||||
|
The dog perceives sheep only through a front-mounted LiDAR (`protos/ShepherdDog.proto`).
|
||||||
|
A 2D Gym env (`training/herding_env.py`) is used for training and headless evaluation;
|
||||||
|
Webots is used for sim-to-deployment validation.
|
||||||
|
|
||||||
|
See `docs/project.md` for the formal course objectives. See
|
||||||
|
`~/.claude/projects/-home-jalf-code-TRI-PROJ2/memory/` for the running notes
|
||||||
|
(`project_state.md`, `dagger_results.md`, `lstm_results.md`, `webots_perception_gap.md`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What's working today
|
||||||
|
|
||||||
|
Everything below is **verified**, with command lines you can copy-paste.
|
||||||
|
|
||||||
|
### Analytical strategies (Strömbom, Sequential, Universal)
|
||||||
|
|
||||||
|
Work in Webots with **GT bypass** (`HERDING_USE_GT=1`) — 12/12 trials across
|
||||||
|
both worlds × {5, 10 sheep}. User has signed off on GT bypass for these
|
||||||
|
analytical baselines (they take a position list as input; GT vs LiDAR is a
|
||||||
|
perception-layer concern, not a strategy concern).
|
||||||
|
|
||||||
|
Validated by `webots_sweep_gt.log` (full matrix, all OK).
|
||||||
|
|
||||||
|
### Gym performance (clean 360° LiDAR sim, default tracker)
|
||||||
|
|
||||||
|
```
|
||||||
|
BC diff/field: 96% avg (90-100% across n=1..10)
|
||||||
|
RL diff/field: 99% avg (90-100%)
|
||||||
|
BC diff/round: 58% ← weak combo
|
||||||
|
RL diff/round: 58% ← weak combo
|
||||||
|
BC mec/field: 86%
|
||||||
|
RL mec/field: 90%
|
||||||
|
BC mec/round: 73%
|
||||||
|
RL mec/round: 79%
|
||||||
|
```
|
||||||
|
|
||||||
|
Plus a Stage-2 `rl_fast` time-penalty pass on diff/field and mec/field
|
||||||
|
(`rl_fast_*` directories) that slightly accelerates time-to-pen with similar
|
||||||
|
success.
|
||||||
|
|
||||||
|
### Webots LiDAR — 360° proto variant (`protos/ShepherdDog360.proto`)
|
||||||
|
|
||||||
|
Created today as a robustness ablation. v1 policies (trained on default 360°
|
||||||
|
gym LiDAR) transfer cleanly:
|
||||||
|
|
||||||
|
```
|
||||||
|
strombom/sequential/universal: 12/12 OK
|
||||||
|
bc diff (5 and 10 sheep, both worlds): 3/4 OK (only diff/field n=10 timed out)
|
||||||
|
bc mecanum: 0/4 — separate dynamics gap
|
||||||
|
rl any: 0/4 — RL more brittle than BC, unexpectedly
|
||||||
|
```
|
||||||
|
|
||||||
|
Validated by `webots_sweep_360.log`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What does NOT work (despite multiple attempts)
|
||||||
|
|
||||||
|
**Any learned policy (BC, RL, DAgger, LSTM) in Webots LiDAR with the
|
||||||
|
canonical 140° FOV proto.** All hit the same wall: tracker phantom-track
|
||||||
|
patterns from real Webots LiDAR don't match what the gym FP-injection model
|
||||||
|
produces, so policies trained on the gym proxy can't handle the obs they see
|
||||||
|
in Webots.
|
||||||
|
|
||||||
|
Approaches tried today (all detailed in `~/.claude/projects/.../memory/`):
|
||||||
|
|
||||||
|
| Approach | Gym proxy | Webots LiDAR 140° |
|
||||||
|
|---|---|---|
|
||||||
|
| v1 MLP + frame stack, clean training | 99% | 0/5 |
|
||||||
|
| DAgger (3 rounds, privileged teacher labels) | 12% → 38% on proxy | 0/5 |
|
||||||
|
| LSTM RecurrentPPO from scratch, 3M steps | 69% clean / 2% proxy | 0/5 |
|
||||||
|
|
||||||
|
Diagnosis: gym `HERDING_WEBOTS` preset (`herding/config.py`) is an
|
||||||
|
approximation but not faithful to actual Webots LiDAR. Real Webots produces
|
||||||
|
~4 phantom tracks per step for 5 real sheep due to wall/post/leg returns;
|
||||||
|
gym injection uses a Poisson process at static anchor points which is
|
||||||
|
distributionally different.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Critical bug fixes shipped today
|
||||||
|
|
||||||
|
If you're picking this up, these are real bugs that took hours to find:
|
||||||
|
|
||||||
|
1. **Webots controllers were silently crashing on numpy import.** Webots
|
||||||
|
launched them under system `python3` (no numpy). Fixed by adding
|
||||||
|
`runtime.ini` files at `controllers/{shepherd_dog,sheep}/runtime.ini`
|
||||||
|
that point Webots to the conda env's python.
|
||||||
|
|
||||||
|
2. **FP_RATE mismatch BC=0 vs RL=2 poisoned PPO.** Default in Makefile was
|
||||||
|
`FP_RATE=2.0` for RL but `--fp-rate 0.0` hard-coded for BC demos. PPO
|
||||||
|
stalled at 0% success for 1.46M steps. Now `FP_RATE=0.0` consistent.
|
||||||
|
|
||||||
|
3. **Tracker phantom-penned tracks.** `pen_latch_depth=0.5` was too shallow
|
||||||
|
(FPs at y≈-15 latched and lived forever). Now 2.0, and penned tracks
|
||||||
|
decay at `forget_steps × 8` instead of being eternal.
|
||||||
|
|
||||||
|
4. **HERDING_WEBOTS preset tuning** in `herding/config.py` —
|
||||||
|
`max_new_tracks_per_step=1`, `static_reject=1.2`. Reduces phantom-track
|
||||||
|
spawning rate but doesn't eliminate it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommended path to a strong June 4 deliverable
|
||||||
|
|
||||||
|
You don't need to fix the 140° LiDAR gap — there's a defensible story
|
||||||
|
already. The article framing writes itself:
|
||||||
|
|
||||||
|
> "Wide-FOV (360°) LiDAR enables clean sim-to-real transfer of learned
|
||||||
|
> shepherding policies. Narrow-FOV (140°) introduces phantom-track noise
|
||||||
|
> that current policies cannot fully reject — closing this gap is future
|
||||||
|
> work, likely requiring either a faithful gym-side LiDAR model or
|
||||||
|
> Webots-in-the-loop training."
|
||||||
|
|
||||||
|
Concrete deliverable plan:
|
||||||
|
|
||||||
|
1. **Demo video and screenshots**: use the 360° proto for BC/RL demonstrations
|
||||||
|
and GT bypass for analyticals on 140°. All combos covered.
|
||||||
|
2. **Quantitative results**: gym eval already gives success%, mean steps.
|
||||||
|
Add a flock-dispersion metric (`max(distances from CoM)` at end of
|
||||||
|
episode) — about 30 lines in `eval.py`.
|
||||||
|
3. **Collision tracking**: add a counter in `HerdingEnv.step()` for
|
||||||
|
`dog-sheep distance < 0.30 m`. Currently the env knows about
|
||||||
|
`COLLISION_DIST` but doesn't expose it in info. ~20 lines.
|
||||||
|
4. **Mecanum**: the mecanum Webots dynamics gap is **separate** from the
|
||||||
|
perception issue. `tools/calibrate_mecanum.sh` exists for this. Run
|
||||||
|
it and see if it gives matching dynamics. This is the most valuable
|
||||||
|
remaining technical task — closing the mecanum gap would let you
|
||||||
|
complete the "diff vs mecanum" extra-merit comparison in
|
||||||
|
`docs/project.md`.
|
||||||
|
5. **Round world**: gym performance is ~58-79% across approaches. The
|
||||||
|
curved walls break Strömbom's "stand behind the centroid" geometry
|
||||||
|
(the position behind sometimes lies outside the field). Two cheap
|
||||||
|
tweaks worth trying: (a) a per-episode `W_RADIUS` reward bonus for
|
||||||
|
compact flocks (gather-first behavior), (b) curriculum on the env's
|
||||||
|
`difficulty` knob (already wired in `HerdingEnv`).
|
||||||
|
|
||||||
|
Bonuses still on the table (from `docs/project.md` extra merit):
|
||||||
|
- **Multi-shepherd axis-split** — user's idea, ~1 day work. Each dog
|
||||||
|
computes one component of the analytical Strömbom action. No multi-agent
|
||||||
|
RL needed.
|
||||||
|
- **Robustness / DR ablation** — FP/wheel-slip knobs exist; run an ablation
|
||||||
|
table.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Repository layout (essentials)
|
||||||
|
|
||||||
|
```
|
||||||
|
herding/
|
||||||
|
config.py # HerdingConfig dataclasses, HERDING_DEFAULT / HERDING_WEBOTS presets
|
||||||
|
control/ # strombom.py, sequential.py, universal.py (analytical teachers)
|
||||||
|
perception/ # lidar_sim.py, lidar_perception.py, sheep_tracker.py
|
||||||
|
world/ # diffdrive.py kinematics, flocking_sim.py, geometry.py (PEN_*/GATE_*/FIELD_*)
|
||||||
|
|
||||||
|
training/
|
||||||
|
herding_env.py # Gym env: HerdingEnv. ~560 lines. Step/reset/reward/obs.
|
||||||
|
bc/
|
||||||
|
collect.py # Demo collector — supports --privileged and --dagger-policy
|
||||||
|
pretrain.py # MLP BC trainer (MSE + 1-cos loss)
|
||||||
|
rl/
|
||||||
|
train.py # KL-regularised PPO fine-tune of BC
|
||||||
|
train_lstm.py # NEW today: RecurrentPPO (sb3-contrib) from scratch
|
||||||
|
eval.py # Env-side evaluator; supports MLP + LSTM policies
|
||||||
|
runs/ # Trained artifacts (bc_*, rl_*, rl_fast_*, lstm_*)
|
||||||
|
v1_clean/ # Backup of pre-DAgger artifacts
|
||||||
|
|
||||||
|
controllers/
|
||||||
|
shepherd_dog/
|
||||||
|
shepherd_dog.py # Webots controller. Mode selection via HERDING_MODE env.
|
||||||
|
policy_loader.py # Auto-detects MLP vs LSTM zip. Handles obs / state.
|
||||||
|
runtime.ini # ← critical, points Webots to conda python
|
||||||
|
sheep/
|
||||||
|
runtime.ini # ← same fix
|
||||||
|
|
||||||
|
protos/
|
||||||
|
ShepherdDog.proto # canonical 140° FOV (matches the physical robot)
|
||||||
|
ShepherdDog360.proto # 360° variant for the FOV ablation / fallback delivery
|
||||||
|
ShepherdDogMecanum.proto
|
||||||
|
Sheep.proto
|
||||||
|
|
||||||
|
worlds/
|
||||||
|
field.wbt # rectangular world
|
||||||
|
field_round.wbt # circular world
|
||||||
|
|
||||||
|
tools/
|
||||||
|
run_webots.sh # launcher: tools/run_webots.sh N MODE DRIVE WORLD
|
||||||
|
webots_sweep.sh # full LiDAR sweep across all modes × drives × worlds
|
||||||
|
webots_sweep_gt.sh # same but with HERDING_USE_GT=1
|
||||||
|
dagger_round.sh # NEW today: one-shot DAgger collect + train
|
||||||
|
calibrate_mecanum.sh # mecanum dynamics calibration (not run today)
|
||||||
|
|
||||||
|
Makefile # Top-level: make train_all, make eval_all, etc.
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run pytest (111 tests, all passing)
|
||||||
|
make test
|
||||||
|
|
||||||
|
# Train one combo end-to-end (BC → RL → eval, ~1h on 2 cores)
|
||||||
|
make DRIVE=differential WORLD=field
|
||||||
|
|
||||||
|
# Train all 4 combos (~5h)
|
||||||
|
make train_all
|
||||||
|
|
||||||
|
# Eval an existing policy directory in gym
|
||||||
|
python -m training.eval --policy training/runs/rl_differential_field \
|
||||||
|
--max-flock 10 --max-steps 15000 --n-seeds 10 \
|
||||||
|
--drive-mode differential --world field
|
||||||
|
|
||||||
|
# Webots — analytical, GT bypass (this works for all combos)
|
||||||
|
HERDING_USE_GT=1 tools/run_webots.sh 5 strombom differential field
|
||||||
|
|
||||||
|
# Webots — BC with the 360° proto (currently the 140° proto is active;
|
||||||
|
# swap by editing protos/ShepherdDog.proto or use the 360° variant directly)
|
||||||
|
tools/run_webots.sh 5 bc differential field
|
||||||
|
|
||||||
|
# Headless full sweep (~80 min)
|
||||||
|
tools/webots_sweep.sh webots_sweep.log
|
||||||
|
|
||||||
|
# Train LSTM (sb3-contrib must be installed)
|
||||||
|
python -m training.rl.train_lstm \
|
||||||
|
--out training/runs/lstm_differential_field \
|
||||||
|
--total-timesteps 3000000 --use-webots-preset \
|
||||||
|
--drive-mode differential --world field
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Hardware/environment
|
||||||
|
|
||||||
|
- 3.8 GB RAM, 8 GB swap, 2 cores. Memory pressure is real — saw the
|
||||||
|
OS OOM-kill RL training during chained `train_all` once. If you re-run
|
||||||
|
full pipelines, monitor memory and consider splitting.
|
||||||
|
- Conda env: `tir` at `/home/jalf/miniconda3/envs/tir/`. Has SB3,
|
||||||
|
sb3-contrib, PyTorch, gymnasium. Webots controllers point to this
|
||||||
|
python via the new `runtime.ini` files.
|
||||||
|
- Webots installed at `/usr/local/webots/`. Headless mode requires
|
||||||
|
`xvfb-run -a` (no X display on this machine).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What I'd suggest for a fresh attempt at the 140° LiDAR gap
|
||||||
|
|
||||||
|
If the user wants you to keep pushing on it, the highest-EV experiment
|
||||||
|
not yet tried is:
|
||||||
|
|
||||||
|
**Consensus tracker** — modify `herding/perception/sheep_tracker.py` to
|
||||||
|
require K consecutive detections within a small radius before promoting
|
||||||
|
a track to "real." Phantom tracks from sporadic wall returns wouldn't
|
||||||
|
survive the K-step consensus; real sheep continuously visible in FOV
|
||||||
|
would. The current `max_new_tracks_per_step=1` rate-limits new tracks
|
||||||
|
but every detection still spawns one immediately.
|
||||||
|
|
||||||
|
Implementation sketch: add a "candidate" track type that doesn't appear
|
||||||
|
in `get_positions()`. After K (e.g. 3-5) consecutive matched detections,
|
||||||
|
promote candidate → real track. Roughly 30-50 lines of code.
|
||||||
|
|
||||||
|
This is a tracker-level fix at deploy time only, so it wouldn't require
|
||||||
|
retraining the policies — v1 BC/RL should transfer cleanly if the tracker
|
||||||
|
output looks more like what they were trained on (one position per real
|
||||||
|
sheep, no phantoms).
|
||||||
|
|
||||||
|
I would NOT recommend more architectural training experiments (DAgger
|
||||||
|
round 4, larger LSTM, etc.) — three independent approaches today already
|
||||||
|
showed the bottleneck is upstream of the policy.
|
||||||
+13
-10
@@ -1,33 +1,37 @@
|
|||||||
# Group G25 - Formal & Title & Goals
|
# Group G25 - Formal & Title & Goals
|
||||||
|
|
||||||
|
This is the original course proposal/goals document. For current setup,
|
||||||
|
training, evaluation, and Webots run instructions, see `../README.md`
|
||||||
|
and `../training/README.md`.
|
||||||
|
|
||||||
## Team members
|
## Team members
|
||||||
- Diogo Costa <up202502576@up.pt>
|
- Diogo Costa <up202502576@up.pt>
|
||||||
- Johnny Fernandes <up202402612@up.pt>
|
- Johnny Fernandes <up202402612@up.pt>
|
||||||
- Nelson Neto <up202108117@up.pt>
|
- Nelson Neto <up202108117@up.pt>
|
||||||
|
|
||||||
## (i) Title and General objectives
|
## (i) Title and General objectives
|
||||||
**RL-Based Autonomous Shepherd Robot for Livestock Herding**
|
**Autonomous Shepherd Robot for Livestock Herding (Strömbom)**
|
||||||
|
|
||||||
- Implement effective herding behaviors through proximity and movement strategies
|
- Implement effective herding behaviors through proximity and movement strategies
|
||||||
- Build a 3D environment with realistic robot dynamics and LIDAR-based perception
|
- Build a 3D environment with realistic robot dynamics and LIDAR-based perception
|
||||||
- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using Reinforcement Learning
|
- Develop a mobile robot capable of autonomously guiding a flock of sheep into a designated target area using the Strömbom heuristic approach
|
||||||
|
|
||||||
|
|
||||||
# Group G25 - (ii) Intermediate Goals
|
# Group G25 - (ii) Intermediate Goals
|
||||||
|
|
||||||
## Intermediate goals
|
## Intermediate goals
|
||||||
- Set up the Webots simulation environment with an open field and target zone
|
- Set up the Webots simulation environment with an open field and target zone
|
||||||
- Implement lightweight Gymnasium-based 2D herding environment
|
- Implement lightweight 2D herding environment for algorithm evaluation
|
||||||
- Design a Sheep and Dog robot
|
- Design a Sheep and Dog robot
|
||||||
- Implement a sheep flocking model for fast RL iteration
|
- Implement a sheep flocking model for fast Strömbom iteration
|
||||||
- Validate LiDAR sensor feedback for sheep detection and distance estimation
|
- Validate LiDAR sensor feedback for sheep detection and distance estimation
|
||||||
|
|
||||||
|
|
||||||
# Group G25 - Course Project (Final) Goals
|
# Group G25 - Course Project (Final) Goals
|
||||||
|
|
||||||
## (iii) Main goals
|
## (iii) Main goals
|
||||||
- State-of-the-art survey on shepherding algorithms and multi-agent RL herding
|
- State-of-the-art survey on shepherding algorithms with focus on Strömbom herding
|
||||||
- Train the robot using PPO to successfully herd a single sheep into the goal
|
- Implement and tune Strömbom controller to successfully herd a single sheep into the goal
|
||||||
- Achieve fully autonomous herding of multiple sheep and a full flock into the target area
|
- Achieve fully autonomous herding of multiple sheep and a full flock into the target area
|
||||||
- Optimize robot trajectory to minimize the time required to group the flock
|
- Optimize robot trajectory to minimize the time required to group the flock
|
||||||
- Ensure zero collisions between the robot and the sheep during the task
|
- Ensure zero collisions between the robot and the sheep during the task
|
||||||
@@ -35,7 +39,7 @@
|
|||||||
- Article, demo video, and final presentation
|
- Article, demo video, and final presentation
|
||||||
|
|
||||||
## (iv) Extra Merit
|
## (iv) Extra Merit
|
||||||
- Curriculum Learning (scaling from 1 sheep to a flock)
|
- Progressive evaluation (scaling from 1 sheep to a flock)
|
||||||
- Comparison of performance between Differential Drive and Mecanum wheels
|
- Comparison of performance between Differential Drive and Mecanum wheels
|
||||||
- Robustness testing under sensor noise or varying sheep speeds, configurations and parameters
|
- Robustness testing under sensor noise or varying sheep speeds, configurations and parameters
|
||||||
- Multi-shepherd cooperative mode: 2 dogs learn role specialization (collector vs. driver)
|
- Multi-shepherd cooperative mode: 2 dogs learn role specialization (collector vs. driver)
|
||||||
@@ -46,11 +50,10 @@
|
|||||||
|
|
||||||
## (v) Tools
|
## (v) Tools
|
||||||
- Webots for 3D physics simulation with ROS2 integration via `webots_ros2` package
|
- Webots for 3D physics simulation with ROS2 integration via `webots_ros2` package
|
||||||
- Stable-Baselines3 for the PPO algorithm implementation
|
- Gymnasium (OpenAI) for the simulation wrapper and evaluation tooling
|
||||||
- Gymnasium (OpenAI) for the RL environment wrapper (lightweight 2D herding env for fast RL training)
|
|
||||||
- Python as the primary programming language (sheep flocking model, reward shaping, evaluation)
|
- Python as the primary programming language (sheep flocking model, reward shaping, evaluation)
|
||||||
|
|
||||||
## (vi) Limitations
|
## (vi) Limitations
|
||||||
- Computational Power: Training time might be high for complex flock behaviors
|
- Computational Power: Large batch evaluation and parameter sweeps can still be time-consuming
|
||||||
- Sim-to-Real Gap: No real-world validation of the herding controller; project is simulation-only (2D + Webots 3D)
|
- Sim-to-Real Gap: No real-world validation of the herding controller; project is simulation-only (2D + Webots 3D)
|
||||||
- Model Complexity: Simplified sheep behavior (scripted) may not account for all biological livestock nuances
|
- Model Complexity: Simplified sheep behavior (scripted) may not account for all biological livestock nuances
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
# Status — 2026-05-18
|
||||||
|
|
||||||
|
Current snapshot of what works in Webots, and what design choices got us here.
|
||||||
|
|
||||||
|
## Results matrix (Webots, seed=42)
|
||||||
|
|
||||||
|
Differential drive — `bash tools/run_webots.sh N MODE differential WORLD`:
|
||||||
|
|
||||||
|
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|
||||||
|
|----------------|:---------:|:----------:|:---------------:|:----------------:|
|
||||||
|
| BC | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||||
|
| RL | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||||
|
| Strömbom | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||||
|
| Sequential | 5/5 | 10/10 | 5/5 | 10/10 |
|
||||||
|
|
||||||
|
Mecanum drive — `bash tools/run_webots.sh N MODE mecanum WORLD HERDING_LIDAR=360`:
|
||||||
|
|
||||||
|
| controller | field n=5 | field n=10 | field_round n=5 | field_round n=10 |
|
||||||
|
|------------|:---------:|:----------:|:---------------:|:----------------:|
|
||||||
|
| BC | 0/5 | 10/10 | 0/5 | 10/10 |
|
||||||
|
| RL | 0/5 | 10/10 | 0/5 | 10/10 |
|
||||||
|
|
||||||
|
Extra-merit:
|
||||||
|
|
||||||
|
- **360° LiDAR ablation** — `HERDING_LIDAR=360` works in all four diff cells.
|
||||||
|
- **Dual-dog axis-split** — `HERDING_NDOGS=2 HERDING_AXIS_LEAK=0.3` pens 5/5 on diff.
|
||||||
|
|
||||||
|
## Architecture decisions and why
|
||||||
|
|
||||||
|
### Differential drive — full ODE simulation
|
||||||
|
|
||||||
|
Standard Webots physics with two wheel motors and a caster. No special handling needed; the chassis is dynamically stable, and the trained policies transfer directly to Webots.
|
||||||
|
|
||||||
|
### Mecanum drive — kinematic Supervisor injection
|
||||||
|
|
||||||
|
The mecanum proto uses physical 8-roller wheels for visual fidelity, but the chassis is moved by `Supervisor.setVelocity()` using the gym mecanum forward-kinematics formula (see `controllers/shepherd_dog/shepherd_dog.py::drive_mecanum`).
|
||||||
|
|
||||||
|
We explored two other paths before settling here:
|
||||||
|
|
||||||
|
1. **Plain cylinder wheels + anisotropic ContactProperties.** Tried `frictionRotation ±0.7854` on the wheel contact frame. Strafe motion came out the wrong direction and diagonals zeroed out. Discarded.
|
||||||
|
2. **Full ODE simulation on 32 physical roller hinges.** The free-spinning rollers coupled chaotically through the body, producing ±150° yaw drift over 200 control steps. Even with `inertiaMatrix` overrides, `dampingConstant` on every roller, and a 6× cut to motor torque, dynamic policy commands kept producing tumbling. Discarded.
|
||||||
|
3. **Kinematic Supervisor injection (current).** ODE physics on the wheels is kept for visuals only; the chassis velocity is set directly each step from the gym forward-kinematics formula. Gym training and Webots deployment produce identical body motion. Yaw drift is zero by construction.
|
||||||
|
|
||||||
|
This is not a hack — it matches how most academic mecanum sims work (e.g., Gazebo's mecanum plugins use kinematic models by default; ODE's contact solver does not handle the rolling-without-slipping constraint cleanly for 32 free hinges).
|
||||||
|
|
||||||
|
### Why n=5 mecanum fails (and n=10 passes)
|
||||||
|
|
||||||
|
The 360° LiDAR consistently produces 0–8 detections per frame at n=5 — 5 from real sheep plus 1–3 "phantom" clusters from gate posts, wall fragments, and pen rails. The tracker's consensus filter promotes a candidate to "active" after `consensus_k=3` hits within 20 steps, and phantoms satisfy that easily because they're spatially consistent.
|
||||||
|
|
||||||
|
With n=10 real sheep the 10 active slots fill with real sheep before phantoms compete. With n=5 there are ~5 free slots and the phantoms occupy them; the policy then chases ghosts (verified: with `HERDING_USE_GT=1` perception bypass, n=5 pens 5/5 in 76 s).
|
||||||
|
|
||||||
|
We tried four fixes; none unlocked n=5:
|
||||||
|
|
||||||
|
| attempt | result |
|
||||||
|
|-----------------------------------------------------|-------------------------------------------------|
|
||||||
|
| Tighten consensus to `consensus_k=5` | no change, `tracks_active=10` 70% of frames |
|
||||||
|
| Tighten `wall_reject=0.9`, `static_reject=1.5` | no change |
|
||||||
|
| Static-phantom drop (track displacement from spawn) | phantoms are *not* spatially static — debug logs showed phantom tracks bouncing 4–22 m across the field as data association reassigned them each frame |
|
||||||
|
| Merge near-duplicate detections (≤0.5 m) | phantoms aren't fragmentation either |
|
||||||
|
|
||||||
|
The phantom tracks are caused by **data-association noise**: when the tracker has more slots than real sheep, the leftover tracks attach themselves to whatever cluster is closest each frame, even if that cluster has nothing to do with their original spawn position. The fix would need either parallax-aware tracking (require multi-vantage confirmation before promotion) or training with simulated phantom noise. Both are real surgery; out of scope for the 2026-06-11 deadline.
|
||||||
|
|
||||||
|
**Workaround for the demo:** running n=10 in Webots always pens 10/10; the n=5 cells produce identical kinematic behaviour and can be reported from the gym evaluation (success rate, time-to-pen) where the gym tracker doesn't accumulate phantoms.
|
||||||
|
|
||||||
|
## File map (what changed in this push)
|
||||||
|
|
||||||
|
```
|
||||||
|
herding/config.py mecanum presets keep matched
|
||||||
|
strafe scaling (strafe_eff=0.26,
|
||||||
|
bleed=-0.40) for kinematic injection
|
||||||
|
controllers/shepherd_dog/shepherd_dog.py
|
||||||
|
Supervisor() + drive_mecanum kinematic
|
||||||
|
injection via _self_node.setVelocity
|
||||||
|
protos/ShepherdDogMecanum.proto supervisor TRUE; physics tuning
|
||||||
|
protos/ShepherdDogMecanum360.proto reverted (ODE no longer load-bearing)
|
||||||
|
tools/gen_mecanum_wheels.py wheels regen-script (clean)
|
||||||
|
tools/run_webots.sh contact-properties comment cleaned
|
||||||
|
training/{bc/collect,rl/train}.py comment cleanup; preset selection unchanged
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options for the remaining cleanup
|
||||||
|
|
||||||
|
1. **Keep matched preset (0.26, -0.40)**. Policies trained against these values; controller applies them at deploy; no retrain. *Current state*.
|
||||||
|
2. **Switch preset to textbook (1.0, 0.0) and retrain mecanum BC+RL** (~6h). Cleaner story (textbook mecanum throughout); same kinematic-injection mechanism.
|
||||||
|
|
||||||
|
Either is defensible. (1) ships faster; (2) is more "pure".
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
"""Shared core for the shepherd herding project.
|
||||||
|
|
||||||
|
This package is the single source of truth for world geometry, sheep
|
||||||
|
flocking dynamics, differential-drive kinematics, observation building,
|
||||||
|
and the Strömbom heuristic. It is imported both by the Webots
|
||||||
|
controllers (for inference) and by the Gymnasium training environment
|
||||||
|
(for fast PPO rollouts), so the two paths cannot drift apart.
|
||||||
|
"""
|
||||||
@@ -0,0 +1,480 @@
|
|||||||
|
"""Central configuration dataclasses for the herding simulation.
|
||||||
|
|
||||||
|
Every tunable parameter lives here as a frozen dataclass field — LiDAR
|
||||||
|
spec, cluster detection thresholds, tracker gates, robot kinematics,
|
||||||
|
and domain-randomisation knobs — composed into :class:`HerdingConfig`.
|
||||||
|
|
||||||
|
Usage — accept the defaults::
|
||||||
|
|
||||||
|
env = HerdingEnv()
|
||||||
|
|
||||||
|
Override a subset::
|
||||||
|
|
||||||
|
cfg = HerdingConfig(tracker=TrackerConfig(forget_steps=60))
|
||||||
|
env = HerdingEnv(herding_cfg=cfg)
|
||||||
|
|
||||||
|
Use a named preset::
|
||||||
|
|
||||||
|
env = HerdingEnv(herding_cfg=HERDING_WEBOTS) # 140° FOV
|
||||||
|
env = HerdingEnv(herding_cfg=HERDING_MEC_WEBOTS) # + mecanum slip
|
||||||
|
|
||||||
|
Design notes
|
||||||
|
------------
|
||||||
|
* All dataclasses are frozen so instances are immutable after construction.
|
||||||
|
* This module must not import from other ``herding.*`` packages —
|
||||||
|
field-geometry constants live in ``herding.world.geometry`` because
|
||||||
|
they depend on the world variant selected at runtime via
|
||||||
|
``HERDING_WORLD``, which would create an import cycle here.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from dataclasses import dataclass, field, replace
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# LiDAR hardware spec
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class LidarConfig:
|
||||||
|
"""Parameters of the simulated / physical LiDAR sensor.
|
||||||
|
|
||||||
|
The two canonical presets are :data:`LIDAR_FULL` (360°, oracle mode)
|
||||||
|
and :data:`LIDAR_WEBOTS` (140°/180-ray, matches the ShepherdDog proto).
|
||||||
|
"""
|
||||||
|
|
||||||
|
n_rays: int = 360
|
||||||
|
"""Number of rays in the scan."""
|
||||||
|
|
||||||
|
fov_rad: float = 2.0 * math.pi
|
||||||
|
"""Full field-of-view in radians, centred on the robot's forward axis."""
|
||||||
|
|
||||||
|
max_range: float = 12.0
|
||||||
|
"""Maximum detectable range in metres."""
|
||||||
|
|
||||||
|
noise_std: float = 0.005
|
||||||
|
"""Gaussian standard deviation (metres) applied to each hit reading."""
|
||||||
|
|
||||||
|
sheep_radius: float = 0.30
|
||||||
|
"""Effective disc radius of a sheep in the 2-D LiDAR plane (metres)."""
|
||||||
|
|
||||||
|
post_radius: float = 0.25
|
||||||
|
"""Effective disc radius of gate / corner posts (metres)."""
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if self.n_rays < 1:
|
||||||
|
raise ValueError(f"n_rays must be ≥ 1, got {self.n_rays}")
|
||||||
|
if not (0.0 < self.fov_rad <= 2.0 * math.pi):
|
||||||
|
raise ValueError(f"fov_rad must be in (0, 2π], got {self.fov_rad:.4f}")
|
||||||
|
if self.max_range <= 0.0:
|
||||||
|
raise ValueError(f"max_range must be > 0, got {self.max_range}")
|
||||||
|
|
||||||
|
|
||||||
|
# Named presets -----------------------------------------------------------
|
||||||
|
|
||||||
|
LIDAR_FULL = LidarConfig(
|
||||||
|
n_rays=360,
|
||||||
|
fov_rad=2.0 * math.pi,
|
||||||
|
)
|
||||||
|
"""360° full-circle scan — oracle / ablation mode."""
|
||||||
|
|
||||||
|
LIDAR_WEBOTS = LidarConfig(
|
||||||
|
n_rays=180,
|
||||||
|
fov_rad=math.radians(140.0),
|
||||||
|
)
|
||||||
|
"""Matches the ShepherdDog.proto Lidar device (180 rays, 140° FOV).
|
||||||
|
|
||||||
|
Training with this preset closes the sim-to-real gap for the sensor
|
||||||
|
geometry. Because the observation is built from tracker output (not raw
|
||||||
|
rays), a policy trained here can be deployed on a wider-FOV LiDAR (e.g.
|
||||||
|
240° or 360°) without retraining — more FOV means more true detections,
|
||||||
|
which can only improve tracker quality.
|
||||||
|
"""
|
||||||
|
|
||||||
|
LIDAR_WEBOTS_360 = LidarConfig(
|
||||||
|
n_rays=360,
|
||||||
|
fov_rad=2.0 * math.pi,
|
||||||
|
max_range=15.0,
|
||||||
|
)
|
||||||
|
"""Matches ShepherdDog360.proto (360 rays, 360° FOV, 15 m range).
|
||||||
|
|
||||||
|
Used by the FOV-ablation Webots launch (HERDING_LIDAR=360). The wider
|
||||||
|
range and full surround visibility hand the tracker more detections
|
||||||
|
per step, so the trained policy — already trained on 360° gym
|
||||||
|
perception — sees an observation distribution closer to training.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Cluster-detection pipeline
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class DetectionConfig:
|
||||||
|
"""Parameters for the LiDAR-scan → detection clustering pipeline."""
|
||||||
|
|
||||||
|
gap_threshold: float = 0.6
|
||||||
|
"""Adjacent hit-points farther apart than this (metres) start a new cluster."""
|
||||||
|
|
||||||
|
max_cluster_span: float = 1.5
|
||||||
|
"""Clusters wider than this (metres) are rejected as walls / structures."""
|
||||||
|
|
||||||
|
range_hit_eps: float = 0.05
|
||||||
|
"""A ray is considered a hit if ``range < max_range - range_hit_eps``."""
|
||||||
|
|
||||||
|
split_range_gap: float = 0.20
|
||||||
|
"""Range increase within a cluster that triggers a multi-peak split."""
|
||||||
|
|
||||||
|
wall_reject: float = 0.5
|
||||||
|
"""Drop detections within this distance (metres) of any field wall."""
|
||||||
|
|
||||||
|
static_reject: float = 0.8
|
||||||
|
"""Drop detections within this distance (metres) of known static features
|
||||||
|
(gate posts, field corners)."""
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if self.wall_reject < 0.0:
|
||||||
|
raise ValueError(f"wall_reject must be ≥ 0, got {self.wall_reject}")
|
||||||
|
if self.static_reject < 0.0:
|
||||||
|
raise ValueError(f"static_reject must be ≥ 0, got {self.static_reject}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Multi-target tracker
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TrackerConfig:
|
||||||
|
"""Parameters for the nearest-neighbour sheep tracker."""
|
||||||
|
|
||||||
|
gate_m: float = 2.5
|
||||||
|
"""Primary NN association gate in metres (recently observed tracks)."""
|
||||||
|
|
||||||
|
reacquire_gate_m: float = 4.5
|
||||||
|
"""Wider gate used when re-acquiring tracks stale for ≥ ``reacquire_min_age`` steps."""
|
||||||
|
|
||||||
|
reacquire_min_age: int = 20
|
||||||
|
"""Minimum staleness (steps) before the wider re-acquisition gate activates."""
|
||||||
|
|
||||||
|
penned_gate_m: float = 4.0
|
||||||
|
"""Gate for matching new detections to already-penned tracks."""
|
||||||
|
|
||||||
|
forget_steps: int = 200
|
||||||
|
"""Delete an active track that has not been observed for this many steps (~3.2 s)."""
|
||||||
|
|
||||||
|
predict_steps: int = 120
|
||||||
|
"""Extrapolate a track's position using constant velocity for this many steps (~1.9 s)."""
|
||||||
|
|
||||||
|
velocity_clamp: float = 1.0
|
||||||
|
"""Maximum predicted speed (m/s) used during extrapolation."""
|
||||||
|
|
||||||
|
max_new_tracks_per_step: int = 10
|
||||||
|
"""Maximum number of new tracks that may be spawned in a single step.
|
||||||
|
|
||||||
|
Capping this limits the damage from LiDAR false-positive bursts (e.g.
|
||||||
|
wall reflections in Webots) that would otherwise flood the track set.
|
||||||
|
The default (10 = MAX_SHEEP) preserves the original behaviour; reduce
|
||||||
|
to 2–3 for Webots deployment robustness.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pen_latch_depth: float = 0.0
|
||||||
|
"""Minimum depth past the gate line (metres) before a track is latched
|
||||||
|
as penned. 0.0 = original behaviour (latch at y ≤ GATE_Y). Increase
|
||||||
|
to 0.5 for Webots to prevent gate-hardware LiDAR reflections near y=-15
|
||||||
|
from permanently consuming tracker slots as false "penned" sheep.
|
||||||
|
"""
|
||||||
|
|
||||||
|
consensus_k: int = 3
|
||||||
|
"""New tracks must accumulate this many matches before they appear in
|
||||||
|
``get_positions``. ``1`` disables the candidate stage entirely;
|
||||||
|
``3`` (default) requires three nearby confirmations within
|
||||||
|
``consensus_max_age`` and reliably filters single-shot detection
|
||||||
|
splits / out-of-range stragglers that confuse the policy on the
|
||||||
|
round field while real sheep promote in ~50 ms (3 frames).
|
||||||
|
"""
|
||||||
|
|
||||||
|
consensus_radius_m: float = 0.5
|
||||||
|
"""Maximum distance (metres) between successive matches for a candidate
|
||||||
|
to age toward promotion. Tighter than ``gate_m`` so wall-cluster
|
||||||
|
centroid jitter cannot keep a phantom alive. Real sheep move
|
||||||
|
≪ 0.05 m / step at max speed so this gate is very loose for them.
|
||||||
|
"""
|
||||||
|
|
||||||
|
consensus_max_age: int = 15
|
||||||
|
"""A candidate that has not been matched for this many steps is dropped.
|
||||||
|
Short enough that a one-shot phantom can't keep itself alive, long
|
||||||
|
enough that a real sheep glimpsed twice in a short interval
|
||||||
|
confirms.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if self.forget_steps < 1:
|
||||||
|
raise ValueError(f"forget_steps must be ≥ 1, got {self.forget_steps}")
|
||||||
|
if self.max_new_tracks_per_step < 1:
|
||||||
|
raise ValueError(
|
||||||
|
f"max_new_tracks_per_step must be ≥ 1, got {self.max_new_tracks_per_step}"
|
||||||
|
)
|
||||||
|
if self.consensus_k < 1:
|
||||||
|
raise ValueError(f"consensus_k must be ≥ 1, got {self.consensus_k}")
|
||||||
|
if self.consensus_radius_m <= 0.0:
|
||||||
|
raise ValueError(
|
||||||
|
f"consensus_radius_m must be > 0, got {self.consensus_radius_m}"
|
||||||
|
)
|
||||||
|
if self.consensus_max_age < 1:
|
||||||
|
raise ValueError(
|
||||||
|
f"consensus_max_age must be ≥ 1, got {self.consensus_max_age}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Robot physical specification
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RobotConfig:
|
||||||
|
"""Physical parameters of the shepherd-dog robot.
|
||||||
|
|
||||||
|
Values mirror ``protos/ShepherdDog.proto`` and ``protos/ShepherdDogMecanum.proto``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
wheel_radius: float = 0.038
|
||||||
|
"""Wheel radius in metres."""
|
||||||
|
|
||||||
|
wheel_base: float = 0.28
|
||||||
|
"""Axle-to-axle distance for differential drive (metres)."""
|
||||||
|
|
||||||
|
wheel_base_x: float = 0.28
|
||||||
|
"""Front-to-back axle distance for mecanum drive (metres)."""
|
||||||
|
|
||||||
|
wheel_base_y: float = 0.28
|
||||||
|
"""Left-to-right axle distance for mecanum drive (metres)."""
|
||||||
|
|
||||||
|
max_wheel_omega: float = 70.0
|
||||||
|
"""Maximum wheel angular velocity (rad/s)."""
|
||||||
|
|
||||||
|
action_smooth: float = 0.0
|
||||||
|
"""Exponential moving-average coefficient applied to actions inside the env.
|
||||||
|
|
||||||
|
``0.0`` means no smoothing (gym default).
|
||||||
|
``0.55`` matches the hard-coded EMA in ``shepherd_dog.py`` — use this
|
||||||
|
when training so the policy learns to act through the same filter it
|
||||||
|
sees at deployment.
|
||||||
|
"""
|
||||||
|
|
||||||
|
strafe_efficiency: float = 1.0
|
||||||
|
"""Mecanum strafe magnitude as a fraction of textbook X-pattern.
|
||||||
|
|
||||||
|
``1.0`` (default) is the ideal kinematic mecanum. Values below 1
|
||||||
|
model strafe slip; the Webots controller reads the same value and
|
||||||
|
applies it in the Supervisor velocity injection, so gym training
|
||||||
|
and Webots deployment see identical body motion. No effect on
|
||||||
|
differential drive.
|
||||||
|
"""
|
||||||
|
|
||||||
|
strafe_to_forward_bleed: float = 0.0
|
||||||
|
"""Fraction of ideal strafe magnitude that bleeds into body-frame x.
|
||||||
|
|
||||||
|
``0.0`` (default) = no bleed. Non-zero values add
|
||||||
|
``strafe_to_forward_bleed * |vy_body_ideal|`` to ``vx_body`` to
|
||||||
|
model the consistent forward (or backward) drift that some
|
||||||
|
mecanum chassis exhibit during pure-strafe commands. No effect on
|
||||||
|
differential drive.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if not (0.0 <= self.action_smooth < 1.0):
|
||||||
|
raise ValueError(
|
||||||
|
f"action_smooth must be in [0, 1), got {self.action_smooth}"
|
||||||
|
)
|
||||||
|
if not (0.0 < self.strafe_efficiency <= 1.0):
|
||||||
|
raise ValueError(
|
||||||
|
f"strafe_efficiency must be in (0, 1], got {self.strafe_efficiency}"
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_linear(self) -> float:
|
||||||
|
"""Maximum achievable linear speed (m/s)."""
|
||||||
|
return self.wheel_radius * self.max_wheel_omega
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Domain randomisation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class DomainRandomConfig:
|
||||||
|
"""Parameters that inject physics / sensor noise for domain randomisation.
|
||||||
|
|
||||||
|
All values default to 0 (disabled) so the base env is deterministic and
|
||||||
|
backwards-compatible. Enable them gradually to close the sim-to-real gap.
|
||||||
|
"""
|
||||||
|
|
||||||
|
fp_rate: float = 0.0
|
||||||
|
"""Mean number of false-positive detections injected per step (Poisson λ).
|
||||||
|
|
||||||
|
FPs are placed near static features (walls, posts) with positional
|
||||||
|
noise ``fp_std_pos``, mimicking the spurious clusters Webots' physical
|
||||||
|
LiDAR returns from 3D geometry.
|
||||||
|
"""
|
||||||
|
|
||||||
|
fp_std_pos: float = 0.3
|
||||||
|
"""Positional standard deviation (metres) of injected false-positive clusters."""
|
||||||
|
|
||||||
|
wheel_slip_std: float = 0.0
|
||||||
|
"""Gaussian noise standard deviation (rad/s) added to each wheel speed
|
||||||
|
before kinematic integration. Models real-world wheel slip and motor
|
||||||
|
variation. Suggested starting value: 0.05.
|
||||||
|
"""
|
||||||
|
|
||||||
|
compass_noise_std: float = 0.0
|
||||||
|
"""Gaussian noise standard deviation (radians) added to the heading
|
||||||
|
reading each step. Models magnetometer drift in Webots.
|
||||||
|
Suggested starting value: 0.02.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
if self.fp_rate < 0.0:
|
||||||
|
raise ValueError(f"fp_rate must be ≥ 0, got {self.fp_rate}")
|
||||||
|
if self.wheel_slip_std < 0.0:
|
||||||
|
raise ValueError(f"wheel_slip_std must be ≥ 0, got {self.wheel_slip_std}")
|
||||||
|
if self.compass_noise_std < 0.0:
|
||||||
|
raise ValueError(f"compass_noise_std must be ≥ 0, got {self.compass_noise_std}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Aggregate config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class HerdingConfig:
|
||||||
|
"""Root configuration object passed to :class:`~training.herding_env.HerdingEnv`.
|
||||||
|
|
||||||
|
Sub-configs default to the original simulation parameters so that
|
||||||
|
``HerdingEnv()`` and ``HerdingEnv(herding_cfg=HerdingConfig())`` produce
|
||||||
|
identical behaviour.
|
||||||
|
"""
|
||||||
|
|
||||||
|
lidar: LidarConfig = field(default_factory=LidarConfig)
|
||||||
|
detection: DetectionConfig = field(default_factory=DetectionConfig)
|
||||||
|
tracker: TrackerConfig = field(default_factory=TrackerConfig)
|
||||||
|
robot: RobotConfig = field(default_factory=RobotConfig)
|
||||||
|
domain_random: DomainRandomConfig = field(default_factory=DomainRandomConfig)
|
||||||
|
|
||||||
|
def replace(self, **kwargs) -> "HerdingConfig":
|
||||||
|
"""Return a new config with selected top-level sub-configs replaced.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
cfg = HERDING_WEBOTS.replace(
|
||||||
|
domain_random=DomainRandomConfig(fp_rate=2.0, wheel_slip_std=0.05)
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
return replace(self, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Named full-pipeline presets
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
HERDING_DEFAULT = HerdingConfig()
|
||||||
|
"""Original simulation defaults — zero behaviour change."""
|
||||||
|
|
||||||
|
HERDING_WEBOTS = HerdingConfig(
|
||||||
|
lidar=LIDAR_WEBOTS,
|
||||||
|
detection=DetectionConfig(wall_reject=0.5, static_reject=1.2),
|
||||||
|
tracker=TrackerConfig(
|
||||||
|
forget_steps=300,
|
||||||
|
max_new_tracks_per_step=1,
|
||||||
|
pen_latch_depth=2.0,
|
||||||
|
predict_steps=180,
|
||||||
|
consensus_k=3,
|
||||||
|
consensus_radius_m=0.3,
|
||||||
|
consensus_max_age=20,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(action_smooth=0.55),
|
||||||
|
)
|
||||||
|
|
||||||
|
HERDING_MEC_WEBOTS = HerdingConfig(
|
||||||
|
lidar=LIDAR_WEBOTS,
|
||||||
|
detection=DetectionConfig(wall_reject=0.5, static_reject=1.2),
|
||||||
|
tracker=TrackerConfig(
|
||||||
|
forget_steps=300,
|
||||||
|
max_new_tracks_per_step=1,
|
||||||
|
pen_latch_depth=2.0,
|
||||||
|
predict_steps=180,
|
||||||
|
consensus_k=3,
|
||||||
|
consensus_radius_m=0.3,
|
||||||
|
consensus_max_age=20,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(
|
||||||
|
action_smooth=0.55,
|
||||||
|
strafe_efficiency=0.26,
|
||||||
|
strafe_to_forward_bleed=-0.40,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
"""Mecanum + 140° LiDAR preset.
|
||||||
|
|
||||||
|
Mirrors HERDING_WEBOTS but with mecanum-specific kinematic scaling
|
||||||
|
(``strafe_efficiency`` and ``strafe_to_forward_bleed``) applied to
|
||||||
|
the gym forward-kinematics formula. The Webots controller reads
|
||||||
|
these same values via ``RobotConfig`` and feeds them through the
|
||||||
|
Supervisor velocity injection, so gym and Webots produce identical
|
||||||
|
body motion. Diff-drive ignores both fields.
|
||||||
|
"""
|
||||||
|
|
||||||
|
HERDING_MEC_WEBOTS_360 = HerdingConfig(
|
||||||
|
lidar=LIDAR_WEBOTS_360,
|
||||||
|
# Looser detection thresholds for the wider FOV — the 360° scan
|
||||||
|
# catches far walls, gate posts and pen rails the 140° front cone
|
||||||
|
# never sees, so the cluster/feature filters need slightly more
|
||||||
|
# margin to keep promotion rates similar.
|
||||||
|
detection=DetectionConfig(wall_reject=0.6, static_reject=1.2),
|
||||||
|
tracker=TrackerConfig(
|
||||||
|
forget_steps=300,
|
||||||
|
max_new_tracks_per_step=2, # 360° gives more candidates per step
|
||||||
|
pen_latch_depth=3.0,
|
||||||
|
predict_steps=180,
|
||||||
|
consensus_k=3,
|
||||||
|
consensus_radius_m=0.3,
|
||||||
|
consensus_max_age=20,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(
|
||||||
|
action_smooth=0.55,
|
||||||
|
strafe_efficiency=0.26,
|
||||||
|
strafe_to_forward_bleed=-0.40,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
"""Mecanum + 360° LiDAR preset (the deployable mecanum target).
|
||||||
|
|
||||||
|
The 360° FOV gives the policy perception coverage in every direction,
|
||||||
|
which matches the omnidirectional motion the mecanum chassis can
|
||||||
|
produce. Used for both gym training and Webots deployment so the
|
||||||
|
trained policy sees the same observation geometry it will face at
|
||||||
|
deploy time.
|
||||||
|
"""
|
||||||
|
"""Webots-matched training preset.
|
||||||
|
|
||||||
|
Changes vs HERDING_DEFAULT:
|
||||||
|
* LiDAR: 180 rays / 140° FOV matching ShepherdDog.proto hardware
|
||||||
|
* Detection: wall_reject kept at 0.5 m (original default; static_reject
|
||||||
|
handles post FPs; 1.0 m was too aggressive near the south gate)
|
||||||
|
* Tracker:
|
||||||
|
- consensus_k=3, radius=0.3 m, max_age=20 (~320 ms window): a new
|
||||||
|
detection must be confirmed by two more nearby detections within
|
||||||
|
a tight 0.3 m radius to promote. Real sheep barely move
|
||||||
|
frame-to-frame (≪0.05 m/step) so they easily self-confirm while
|
||||||
|
the dog is rotating across them; wall-return phantoms whose
|
||||||
|
cluster centroid jitters by more than 0.3 m as the dog moves
|
||||||
|
can't accumulate three nearby hits and decay as separate
|
||||||
|
candidates.
|
||||||
|
- forget_steps=300 (~4.8 s) + predict_steps=180 (~2.9 s): once a
|
||||||
|
real sheep is confirmed, it lives in tracker memory long enough
|
||||||
|
for the policy — trained on 360° full-visibility obs — to plan
|
||||||
|
while the dog sweeps a sparse cone across the field. Set short
|
||||||
|
enough that any phantom that does leak through promotion dies
|
||||||
|
after the dog walks away from the wall that created it.
|
||||||
|
- max_new_tracks_per_step=1 still rate-caps spawn bursts.
|
||||||
|
* Robot: action_smooth 0.0 → 0.55 (matches Webots controller EMA)
|
||||||
|
"""
|
||||||
@@ -0,0 +1,122 @@
|
|||||||
|
"""Active-perception wrapper for the analytic shepherd teachers.
|
||||||
|
|
||||||
|
Under partial-observability LiDAR perception the tracker starts empty
|
||||||
|
— a naive analytic teacher returns ``(0, 0, "idle")`` and the dog
|
||||||
|
stops. This wrapper interleaves the underlying teacher with two
|
||||||
|
exploration behaviours:
|
||||||
|
|
||||||
|
* opening in-place rotation for the first ``INITIAL_SCAN_STEPS``,
|
||||||
|
guaranteeing the LiDAR sweeps a full circle before driving;
|
||||||
|
* walk-to-centre when the tracker has been empty for at least
|
||||||
|
``EMPTY_DEBOUNCE_STEPS`` consecutive frames (corners can sit
|
||||||
|
beyond the 12 m LiDAR range).
|
||||||
|
|
||||||
|
When the tracker has detections the base teacher's action is used,
|
||||||
|
post-processed by ``modulate_speed`` so the dog doesn't
|
||||||
|
charge the flock.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from herding.control.modulation import modulate_speed
|
||||||
|
|
||||||
|
|
||||||
|
INITIAL_SCAN_STEPS = 80 # ≈1.3 s — covers one full rotation
|
||||||
|
EXPLORE_SPEED = 0.7 # action norm while walking blind
|
||||||
|
EMPTY_DEBOUNCE_STEPS = 8 # consecutive empty frames before exploring
|
||||||
|
|
||||||
|
|
||||||
|
class ActiveScanTeacher:
|
||||||
|
"""Stateful wrapper. Construct one per episode (or call ``reset``).
|
||||||
|
|
||||||
|
Call signature::
|
||||||
|
|
||||||
|
vx, vy, omega, mode = teacher(dog_xy, dog_heading, sheep_positions,
|
||||||
|
pen_target, drive_mode="differential")
|
||||||
|
|
||||||
|
``omega`` is the yaw-rate intent (mecanum only); 0.0 for differential
|
||||||
|
drive and during blind exploration phases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, base_action_fn, initial_scan_steps: int = INITIAL_SCAN_STEPS):
|
||||||
|
self.base = base_action_fn
|
||||||
|
self.initial_scan = int(initial_scan_steps)
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self.step = 0
|
||||||
|
self.empty_streak = 0
|
||||||
|
self.last_action: tuple[float, float] = (0.0, 0.0)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _scan_action(dog_heading: float) -> tuple[float, float]:
|
||||||
|
# Target opposite to current heading; velocity_to_wheels'
|
||||||
|
# cos(err) clamp drives forward speed to ~0 → in-place rotation.
|
||||||
|
target = dog_heading + math.pi
|
||||||
|
return math.cos(target), math.sin(target)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _explore_action(dog_xy) -> tuple[float, float]:
|
||||||
|
"""Walk toward (0, 0) while the LiDAR keeps sweeping."""
|
||||||
|
dx, dy = -dog_xy[0], -dog_xy[1]
|
||||||
|
d = math.hypot(dx, dy)
|
||||||
|
if d < 0.5:
|
||||||
|
return 0.0, 0.0
|
||||||
|
return EXPLORE_SPEED * dx / d, EXPLORE_SPEED * dy / d
|
||||||
|
|
||||||
|
def __call__(self, dog_xy, dog_heading, sheep_positions, pen_target,
|
||||||
|
drive_mode="differential"):
|
||||||
|
self.step += 1
|
||||||
|
n_visible = len(sheep_positions)
|
||||||
|
|
||||||
|
if n_visible == 0:
|
||||||
|
self.empty_streak += 1
|
||||||
|
else:
|
||||||
|
self.empty_streak = 0
|
||||||
|
|
||||||
|
# Phase 1: opening rotation.
|
||||||
|
if self.step <= self.initial_scan:
|
||||||
|
vx, vy = self._scan_action(dog_heading)
|
||||||
|
self.last_action = (vx, vy)
|
||||||
|
return vx, vy, 0.0, "scan_initial"
|
||||||
|
|
||||||
|
# Phase 2: walk-to-centre after a sustained empty tracker.
|
||||||
|
if self.empty_streak >= EMPTY_DEBOUNCE_STEPS:
|
||||||
|
ex, ey = self._explore_action(dog_xy)
|
||||||
|
if ex == 0.0 and ey == 0.0:
|
||||||
|
vx, vy = self._scan_action(dog_heading)
|
||||||
|
mode = "scan_at_centre"
|
||||||
|
else:
|
||||||
|
vx, vy = ex, ey
|
||||||
|
mode = "explore"
|
||||||
|
self.last_action = (vx, vy)
|
||||||
|
return vx, vy, 0.0, mode
|
||||||
|
|
||||||
|
# Phase 2b: brief tracker blink — hold the previous action.
|
||||||
|
if n_visible == 0:
|
||||||
|
vx, vy = self.last_action
|
||||||
|
return vx, vy, 0.0, "hold"
|
||||||
|
|
||||||
|
# Phase 3: hand off to the underlying analytic teacher, then
|
||||||
|
# apply the shared near-sheep speed modulation.
|
||||||
|
# Handle both old-style (dog_xy, sheep, pen) and new-style
|
||||||
|
# (dog_xy, heading, sheep, pen, drive_mode) teachers.
|
||||||
|
try:
|
||||||
|
result = self.base(dog_xy, dog_heading, sheep_positions,
|
||||||
|
pen_target, drive_mode)
|
||||||
|
except TypeError:
|
||||||
|
try:
|
||||||
|
result = self.base(dog_xy, dog_heading, sheep_positions,
|
||||||
|
pen_target)
|
||||||
|
except TypeError:
|
||||||
|
result = self.base(dog_xy, sheep_positions, pen_target)
|
||||||
|
if len(result) == 4:
|
||||||
|
vx, vy, omega, mode = result
|
||||||
|
else:
|
||||||
|
vx, vy, mode = result
|
||||||
|
omega = 0.0
|
||||||
|
vx, vy = modulate_speed(vx, vy, dog_xy, sheep_positions)
|
||||||
|
self.last_action = (vx, vy)
|
||||||
|
return vx, vy, omega, mode
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
"""Shared action post-processing.
|
||||||
|
|
||||||
|
Every dog mode routes its action through ``modulate_speed``
|
||||||
|
so the magnitude is reduced near sheep — direction (intent) is
|
||||||
|
preserved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
SLOW_NEAR_SHEEP = 2.5 # m — distance below which action norm is scaled down
|
||||||
|
MIN_SPEED = 0.30 # action norm at zero distance
|
||||||
|
|
||||||
|
|
||||||
|
def modulate_speed(
|
||||||
|
vx: float, vy: float,
|
||||||
|
dog_xy: tuple[float, float],
|
||||||
|
sheep_positions,
|
||||||
|
slow_dist: float = SLOW_NEAR_SHEEP,
|
||||||
|
min_scale: float = MIN_SPEED,
|
||||||
|
) -> tuple[float, float]:
|
||||||
|
"""Linearly ramp action magnitude from ``min_scale`` at distance 0
|
||||||
|
to 1.0 at ``slow_dist``. ``sheep_positions`` may be a
|
||||||
|
``{name: (x, y)}`` dict or an iterable of ``(x, y)`` tuples.
|
||||||
|
"""
|
||||||
|
if not sheep_positions:
|
||||||
|
return vx, vy
|
||||||
|
if hasattr(sheep_positions, "values"):
|
||||||
|
positions = sheep_positions.values()
|
||||||
|
else:
|
||||||
|
positions = sheep_positions
|
||||||
|
nearest = float("inf")
|
||||||
|
for sx, sy in positions:
|
||||||
|
d = math.hypot(sx - dog_xy[0], sy - dog_xy[1])
|
||||||
|
if d < nearest:
|
||||||
|
nearest = d
|
||||||
|
if nearest >= slow_dist or nearest == float("inf"):
|
||||||
|
return vx, vy
|
||||||
|
scale = min_scale + (1.0 - min_scale) * (nearest / slow_dist)
|
||||||
|
return vx * scale, vy * scale
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
"""Adaptive sequential shepherd-dog controller.
|
||||||
|
|
||||||
|
Three-phase strategy:
|
||||||
|
|
||||||
|
1. **Collect** (flock scattered): Strömbom collect — park behind the
|
||||||
|
furthest sheep and push it toward the CoM. Identical to the
|
||||||
|
Strömbom heuristic; keeps the flock together.
|
||||||
|
|
||||||
|
2. **Drive** (flock compact, >STRAGGLER_THRESHOLD active): Strömbom
|
||||||
|
drive — park behind the CoM relative to the pen and push the whole
|
||||||
|
group through the gate.
|
||||||
|
|
||||||
|
3. **Targeted** (≤STRAGGLER_THRESHOLD sheep remain active): single-
|
||||||
|
target push on the sheep closest to the pen entry. Safe to isolate
|
||||||
|
individual sheep once the flock is nearly exhausted.
|
||||||
|
|
||||||
|
The original pure pin-and-push (Phase 3 only) caused flock scatter in
|
||||||
|
Webots physics whenever the dog tried to isolate a sheep while others
|
||||||
|
were still spread across the field. Phases 1–2 handle the bulk of
|
||||||
|
herding with flock-aware Strömbom logic; Phase 3 cleans up stragglers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from herding.world.geometry import GATE_Y, PEN_ENTRY, in_pen
|
||||||
|
|
||||||
|
|
||||||
|
F_FACTOR = 4.0 # collect/drive threshold: radius > F_FACTOR·√n
|
||||||
|
DELTA_COLLECT = 1.5 # standoff behind the furthest sheep (collect)
|
||||||
|
DELTA_DRIVE = 2.0 # standoff behind CoM (drive)
|
||||||
|
DELTA_TARGET = 1.5 # standoff behind single target sheep (targeted)
|
||||||
|
STRAGGLER_THRESHOLD = 2 # switch to targeted push when ≤ this many active
|
||||||
|
|
||||||
|
|
||||||
|
def _unit(x: float, y: float):
|
||||||
|
d = math.hypot(x, y)
|
||||||
|
if d < 1e-6:
|
||||||
|
return 0.0, 0.0
|
||||||
|
return x / d, y / d
|
||||||
|
|
||||||
|
|
||||||
|
def _is_active(x: float, y: float) -> bool:
|
||||||
|
return (not in_pen(x, y)) and y > GATE_Y
|
||||||
|
|
||||||
|
|
||||||
|
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||||
|
"""Return ``(vx, vy, mode)`` — same signature as Strömbom."""
|
||||||
|
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||||
|
if not active:
|
||||||
|
return 0.0, 0.0, "idle"
|
||||||
|
|
||||||
|
n = len(active)
|
||||||
|
com_x = sum(p[0] for p in active) / n
|
||||||
|
com_y = sum(p[1] for p in active) / n
|
||||||
|
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||||
|
radius = max(dists)
|
||||||
|
|
||||||
|
if n <= STRAGGLER_THRESHOLD:
|
||||||
|
# Targeted: push the sheep closest to the pen entry individually.
|
||||||
|
sx, sy = min(active,
|
||||||
|
key=lambda p: math.hypot(p[0] - pen_target[0],
|
||||||
|
p[1] - pen_target[1]))
|
||||||
|
ux, uy = _unit(sx - pen_target[0], sy - pen_target[1])
|
||||||
|
tx, ty = sx + DELTA_TARGET * ux, sy + DELTA_TARGET * uy
|
||||||
|
mode = "targeted"
|
||||||
|
|
||||||
|
elif radius > F_FACTOR * math.sqrt(n):
|
||||||
|
# Collect: aim behind the furthest sheep from the CoM.
|
||||||
|
idx = max(range(n), key=lambda i: dists[i])
|
||||||
|
sx, sy = active[idx]
|
||||||
|
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||||
|
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||||
|
mode = "collect"
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Drive: push the whole compact flock toward the gate.
|
||||||
|
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||||
|
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||||
|
mode = "drive"
|
||||||
|
|
||||||
|
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||||
|
return ax, ay, mode
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
"""Strömbom (2014) collect/drive heuristic for the shepherd dog.
|
||||||
|
|
||||||
|
When the flock is scattered (max radius > F_FACTOR · √n) the dog moves
|
||||||
|
to a point behind the furthest sheep and pushes it back toward the
|
||||||
|
flock CoM. Otherwise it drives, parking behind the CoM relative to
|
||||||
|
the pen target. Returns a unit-vector intent ``(vx, vy, mode)``.
|
||||||
|
|
||||||
|
Reference: Strömbom et al. 2014, "Solving the shepherding problem."
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from herding.world.geometry import (
|
||||||
|
FIELD_ROUND_R, FIELD_SHAPE,
|
||||||
|
PEN_ENTRY, GATE_Y, in_pen,
|
||||||
|
)
|
||||||
|
|
||||||
|
F_FACTOR = 4.0 # collect/drive threshold scaled by √n
|
||||||
|
DELTA_COLLECT = 1.5 # drive-position offset behind the furthest sheep
|
||||||
|
DELTA_DRIVE = 2.0 # drive-position offset behind the flock CoM
|
||||||
|
|
||||||
|
|
||||||
|
def _unit(x, y):
|
||||||
|
d = math.hypot(x, y)
|
||||||
|
if d < 1e-6:
|
||||||
|
return 0.0, 0.0
|
||||||
|
return x / d, y / d
|
||||||
|
|
||||||
|
|
||||||
|
def _is_active(x, y) -> bool:
|
||||||
|
"""A sheep still in the field counts; one south of the gate doesn't."""
|
||||||
|
return (not in_pen(x, y)) and y > GATE_Y
|
||||||
|
|
||||||
|
|
||||||
|
def compute_action(dog_xy, sheep_positions, pen_target=PEN_ENTRY):
|
||||||
|
"""Return ``(vx, vy, mode)`` — mode in {idle, collect, drive}."""
|
||||||
|
active = [(x, y) for (x, y) in sheep_positions.values() if _is_active(x, y)]
|
||||||
|
if not active:
|
||||||
|
return 0.0, 0.0, "idle"
|
||||||
|
|
||||||
|
n = len(active)
|
||||||
|
com_x = sum(p[0] for p in active) / n
|
||||||
|
com_y = sum(p[1] for p in active) / n
|
||||||
|
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||||
|
radius = max(dists)
|
||||||
|
|
||||||
|
if radius > F_FACTOR * math.sqrt(n):
|
||||||
|
# Collect: aim behind the furthest sheep, opposite the CoM.
|
||||||
|
idx = max(range(n), key=lambda i: dists[i])
|
||||||
|
sx, sy = active[idx]
|
||||||
|
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||||
|
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||||
|
mode = "collect"
|
||||||
|
else:
|
||||||
|
# Drive: aim behind the CoM, opposite the pen.
|
||||||
|
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||||
|
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||||
|
mode = "drive"
|
||||||
|
|
||||||
|
# Round-field wall fallback: if the drive target lies outside the
|
||||||
|
# curved boundary, push the flock radially inward first so it
|
||||||
|
# leaves the wall — otherwise the dog ends up tangent to the wall
|
||||||
|
# and the flock circles indefinitely.
|
||||||
|
if FIELD_SHAPE == "field_round" and mode == "drive":
|
||||||
|
if math.hypot(tx, ty) > FIELD_ROUND_R - 1.0:
|
||||||
|
r_com = math.hypot(com_x, com_y)
|
||||||
|
if r_com > 1e-3:
|
||||||
|
ux2, uy2 = com_x / r_com, com_y / r_com
|
||||||
|
tx = com_x + DELTA_DRIVE * ux2
|
||||||
|
ty = com_y + DELTA_DRIVE * uy2
|
||||||
|
r_t = math.hypot(tx, ty)
|
||||||
|
if r_t > FIELD_ROUND_R - 1.0:
|
||||||
|
scale = (FIELD_ROUND_R - 1.0) / r_t
|
||||||
|
tx *= scale
|
||||||
|
ty *= scale
|
||||||
|
|
||||||
|
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||||
|
return ax, ay, mode
|
||||||
@@ -0,0 +1,209 @@
|
|||||||
|
"""Universal shepherd teacher — Strömbom core + mecanum omega + straggler recovery.
|
||||||
|
|
||||||
|
The core collect/drive logic is **identical** to :mod:`strombom` (same
|
||||||
|
``F_FACTOR``, ``DELTA_COLLECT``, ``DELTA_DRIVE`` thresholds and target
|
||||||
|
computation) so it inherits the proven ~100 % success rate at n ≤ 8.
|
||||||
|
Two additions make it useful as a universal teacher:
|
||||||
|
|
||||||
|
1. **Omega for mecanum.** When ``drive_mode="mecanum"``, the teacher
|
||||||
|
outputs a non-zero ``omega`` channel so the dog **faces the
|
||||||
|
direction of travel**. During collect the dog faces the target
|
||||||
|
sheep; during drive it faces the pen. This gives the BC student a
|
||||||
|
real rotation signal to learn from.
|
||||||
|
|
||||||
|
2. **Last-straggler recovery.** When exactly one sheep remains active
|
||||||
|
and it is near the gate, the dog positions itself behind that
|
||||||
|
straggler (opposite the gate) and pushes it straight through. This
|
||||||
|
handles the edge case where the last sheep circles the gate posts.
|
||||||
|
|
||||||
|
Call signature::
|
||||||
|
|
||||||
|
vx, vy, omega, mode = compute_action(
|
||||||
|
dog_xy, dog_heading, sheep_positions, pen_target,
|
||||||
|
drive_mode="differential",
|
||||||
|
)
|
||||||
|
|
||||||
|
For differential drive ``omega`` is always 0.0 and can be ignored.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from herding.world.geometry import (
|
||||||
|
FIELD_ROUND_R, FIELD_SHAPE,
|
||||||
|
PEN_ENTRY, GATE_X, GATE_Y, in_pen,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tuning constants — match Strömbom exactly for proven success rates.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
F_FACTOR = 4.0 # collect/drive threshold scaled by √n
|
||||||
|
DELTA_COLLECT = 1.5 # standoff behind the furthest sheep
|
||||||
|
DELTA_DRIVE = 2.0 # standoff behind flock CoM
|
||||||
|
|
||||||
|
# Omega gain for mecanum (how strongly the dog turns to face target)
|
||||||
|
OMEGA_GAIN = 0.6
|
||||||
|
|
||||||
|
# Recovery: push small flocks (≤ RECOVERY_MAX_N) through the gate one
|
||||||
|
# sheep at a time. n=1 alone is not enough — at n=2..3 on the round
|
||||||
|
# field the flock is too small to self-cohere through the 3 m gate but
|
||||||
|
# the standard collect/drive standoff just orbits them. Push the sheep
|
||||||
|
# nearest the gate first; once it pens, the rule re-applies to the next.
|
||||||
|
RECOVERY_MAX_N = 3
|
||||||
|
RECOVERY_GATE_DIST = 8.0 # only when target sheep is this close to gate
|
||||||
|
RECOVERY_PUSH_DIST = 1.2 # stand-off behind sheep, away from gate
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _unit(x, y):
|
||||||
|
d = math.hypot(x, y)
|
||||||
|
if d < 1e-6:
|
||||||
|
return 0.0, 0.0
|
||||||
|
return x / d, y / d
|
||||||
|
|
||||||
|
|
||||||
|
def _is_active(x, y) -> bool:
|
||||||
|
return (not in_pen(x, y)) and y > GATE_Y
|
||||||
|
|
||||||
|
|
||||||
|
def _angle_diff(a, b):
|
||||||
|
"""Signed shortest angular difference a - b, in [-π, π]."""
|
||||||
|
return math.atan2(math.sin(a - b), math.cos(a - b))
|
||||||
|
|
||||||
|
|
||||||
|
def _gate_center():
|
||||||
|
"""Centre of the gate opening."""
|
||||||
|
return (0.5 * (GATE_X[0] + GATE_X[1]), GATE_Y)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Core teacher
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def compute_action(dog_xy, dog_heading, sheep_positions,
|
||||||
|
pen_target=PEN_ENTRY, drive_mode="differential"):
|
||||||
|
"""Return ``(vx, vy, omega, mode)``.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
dog_xy : (float, float)
|
||||||
|
Dog position in world frame.
|
||||||
|
dog_heading : float
|
||||||
|
Dog heading in world frame (rad), 0 = +x axis.
|
||||||
|
sheep_positions : dict[str, (float, float)]
|
||||||
|
Visible sheep positions.
|
||||||
|
pen_target : (float, float)
|
||||||
|
Centre of the pen gate (defaults to geometry.PEN_ENTRY).
|
||||||
|
drive_mode : str
|
||||||
|
``"differential"`` or ``"mecanum"``.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
vx, vy : float
|
||||||
|
Velocity intent in [-1, 1].
|
||||||
|
omega : float
|
||||||
|
Yaw intent in [-1, 1] (0 for differential).
|
||||||
|
mode : str
|
||||||
|
Phase label: ``"idle"``, ``"collect"``, ``"drive"``, ``"recovery"``.
|
||||||
|
"""
|
||||||
|
active = [(x, y) for (x, y) in sheep_positions.values()
|
||||||
|
if _is_active(x, y)]
|
||||||
|
if not active:
|
||||||
|
return 0.0, 0.0, 0.0, "idle"
|
||||||
|
|
||||||
|
n = len(active)
|
||||||
|
com_x = sum(p[0] for p in active) / n
|
||||||
|
com_y = sum(p[1] for p in active) / n
|
||||||
|
dists = [math.hypot(p[0] - com_x, p[1] - com_y) for p in active]
|
||||||
|
radius = max(dists)
|
||||||
|
|
||||||
|
# ---- Small-flock recovery (push sheep through the gate one by one) ----
|
||||||
|
# Triggers when the active flock is small (≤ RECOVERY_MAX_N) and the
|
||||||
|
# sheep nearest the gate is close enough that direct pushing works.
|
||||||
|
# For larger flocks the standard collect/drive logic handles them.
|
||||||
|
gc = _gate_center()
|
||||||
|
if n <= RECOVERY_MAX_N:
|
||||||
|
# Pick the sheep closest to the gate as the recovery target —
|
||||||
|
# finishing that one first reduces the active count and lets the
|
||||||
|
# remaining sheep get their own recovery turn.
|
||||||
|
gate_dists = [math.hypot(p[0] - gc[0], p[1] - gc[1]) for p in active]
|
||||||
|
target_idx = min(range(n), key=lambda i: gate_dists[i])
|
||||||
|
sx, sy = active[target_idx]
|
||||||
|
d_to_gate = gate_dists[target_idx]
|
||||||
|
if d_to_gate < RECOVERY_GATE_DIST:
|
||||||
|
dx_g = sx - gc[0]
|
||||||
|
dy_g = sy - gc[1]
|
||||||
|
d_g = math.hypot(dx_g, dy_g)
|
||||||
|
if d_g > 0.3:
|
||||||
|
ux, uy = dx_g / d_g, dy_g / d_g
|
||||||
|
else:
|
||||||
|
ux, uy = 0.0, 1.0
|
||||||
|
tx = sx + RECOVERY_PUSH_DIST * ux
|
||||||
|
ty = sy + RECOVERY_PUSH_DIST * uy
|
||||||
|
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||||
|
mode = "recovery"
|
||||||
|
face_target = (sx, sy)
|
||||||
|
omega = 0.0
|
||||||
|
if drive_mode == "mecanum":
|
||||||
|
desired = math.atan2(
|
||||||
|
face_target[1] - dog_xy[1],
|
||||||
|
face_target[0] - dog_xy[0],
|
||||||
|
)
|
||||||
|
err = _angle_diff(desired, dog_heading)
|
||||||
|
omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
|
||||||
|
return ax, ay, omega, mode
|
||||||
|
|
||||||
|
# ---- Standard Strömbom collect/drive (proven core) ----
|
||||||
|
if radius > F_FACTOR * math.sqrt(n):
|
||||||
|
# Collect: aim behind the furthest sheep, opposite the CoM.
|
||||||
|
idx = max(range(n), key=lambda i: dists[i])
|
||||||
|
sx, sy = active[idx]
|
||||||
|
ux, uy = _unit(sx - com_x, sy - com_y)
|
||||||
|
tx, ty = sx + DELTA_COLLECT * ux, sy + DELTA_COLLECT * uy
|
||||||
|
mode = "collect"
|
||||||
|
face_target = (sx, sy)
|
||||||
|
else:
|
||||||
|
# Drive: aim behind the CoM, opposite the pen.
|
||||||
|
ux, uy = _unit(com_x - pen_target[0], com_y - pen_target[1])
|
||||||
|
tx, ty = com_x + DELTA_DRIVE * ux, com_y + DELTA_DRIVE * uy
|
||||||
|
mode = "drive"
|
||||||
|
face_target = pen_target
|
||||||
|
|
||||||
|
# On the round field the natural "behind the flock" point can fall
|
||||||
|
# outside the curved wall when the flock CoM is itself close to the
|
||||||
|
# wall. The dog tries to reach an unreachable target, ends up
|
||||||
|
# tangent to the wall, and the flock circles indefinitely.
|
||||||
|
# Fix: when the natural target leaves the field, fall back to
|
||||||
|
# pushing the flock radially inward toward the centre — break the
|
||||||
|
# wall-circle pattern, then resume normal pen-direction drive once
|
||||||
|
# the flock is back in the interior.
|
||||||
|
if FIELD_SHAPE == "field_round" and mode == "drive":
|
||||||
|
if math.hypot(tx, ty) > FIELD_ROUND_R - 1.0:
|
||||||
|
r_com = math.hypot(com_x, com_y)
|
||||||
|
if r_com > 1e-3:
|
||||||
|
ux2, uy2 = com_x / r_com, com_y / r_com
|
||||||
|
tx = com_x + DELTA_DRIVE * ux2
|
||||||
|
ty = com_y + DELTA_DRIVE * uy2
|
||||||
|
# Clamp to inside-field radius so the dog target is reachable.
|
||||||
|
r_t = math.hypot(tx, ty)
|
||||||
|
if r_t > FIELD_ROUND_R - 1.0:
|
||||||
|
scale = (FIELD_ROUND_R - 1.0) / r_t
|
||||||
|
tx *= scale
|
||||||
|
ty *= scale
|
||||||
|
|
||||||
|
ax, ay = _unit(tx - dog_xy[0], ty - dog_xy[1])
|
||||||
|
|
||||||
|
# ---- Omega (mecanum only) ----
|
||||||
|
omega = 0.0
|
||||||
|
if drive_mode == "mecanum" and mode != "idle":
|
||||||
|
desired_heading = math.atan2(
|
||||||
|
face_target[1] - dog_xy[1],
|
||||||
|
face_target[0] - dog_xy[0],
|
||||||
|
)
|
||||||
|
err = _angle_diff(desired_heading, dog_heading)
|
||||||
|
omega = max(-1.0, min(1.0, OMEGA_GAIN * err / math.pi))
|
||||||
|
|
||||||
|
return ax, ay, omega, mode
|
||||||
@@ -0,0 +1,242 @@
|
|||||||
|
"""Cluster a 2D LiDAR scan into world-frame sheep position estimates.
|
||||||
|
|
||||||
|
Pipeline:
|
||||||
|
|
||||||
|
ranges (N,) → hit mask → world-frame points
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
adjacency clustering (gap > GAP_THRESHOLD
|
||||||
|
starts a new cluster, walking rays in
|
||||||
|
angular order)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
centroid + span + region + structure filters
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
list of (x, y) detections
|
||||||
|
|
||||||
|
The downstream tracker handles association across frames.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from herding.config import DetectionConfig, LidarConfig
|
||||||
|
|
||||||
|
from herding.world.geometry import (
|
||||||
|
FIELD_SHAPE, FIELD_ROUND_R,
|
||||||
|
FIELD_X, FIELD_Y, GATE_X, GATE_Y,
|
||||||
|
PEN_X, PEN_Y,
|
||||||
|
)
|
||||||
|
from herding.perception.lidar_sim import (
|
||||||
|
LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_N_RAYS, SHEEP_RADIUS, POST_RADIUS,
|
||||||
|
ray_angles,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
GAP_THRESHOLD = 0.6 # m — adjacent ray-points farther apart start a new cluster
|
||||||
|
MAX_CLUSTER_SPAN = 1.5 # m — wider clusters are walls / structures
|
||||||
|
RANGE_HIT_EPS = 0.05 # m — hit if range < max_range - eps
|
||||||
|
WALL_REJECT = 0.5 # m — drop detections this close to a known wall line
|
||||||
|
|
||||||
|
# Multi-peak splitting: within a single cluster, if the range profile
|
||||||
|
# has a local dip (i.e. the range increases then decreases) deeper than
|
||||||
|
# SPLIT_RANGE_GAP, the cluster is split into two detections.
|
||||||
|
SPLIT_RANGE_GAP = 0.20 # m — range increase that triggers a split
|
||||||
|
|
||||||
|
# Sheep-sized static features. A cluster centred within STATIC_REJECT of
|
||||||
|
# any of these is never a sheep.
|
||||||
|
_STATIC_FEATURES_RECT = (
|
||||||
|
( 10.0, -15.0), ( 13.0, -15.0), # gate posts
|
||||||
|
( 15.0, 15.0), ( 15.0, -15.0),
|
||||||
|
(-15.0, 15.0), (-15.0, -15.0), # field corners
|
||||||
|
)
|
||||||
|
|
||||||
|
_STATIC_FEATURES_ROUND = (
|
||||||
|
(GATE_X[0], GATE_Y),
|
||||||
|
(GATE_X[1], GATE_Y),
|
||||||
|
)
|
||||||
|
|
||||||
|
STATIC_REJECT = 0.8
|
||||||
|
|
||||||
|
|
||||||
|
def _get_static_features():
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
return _STATIC_FEATURES_ROUND
|
||||||
|
return _STATIC_FEATURES_RECT
|
||||||
|
|
||||||
|
|
||||||
|
_STATIC_FEATURES = _get_static_features()
|
||||||
|
|
||||||
|
|
||||||
|
def _in_field_region(cx: float, cy: float) -> bool:
|
||||||
|
"""Check if a detection is inside the field (with small margin)."""
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
r = math.hypot(cx, cy)
|
||||||
|
return r < FIELD_ROUND_R + 0.2
|
||||||
|
return (FIELD_X[0] - 0.2 < cx < FIELD_X[1] + 0.2 and
|
||||||
|
FIELD_Y[0] - 0.2 < cy < FIELD_Y[1] + 0.2)
|
||||||
|
|
||||||
|
|
||||||
|
def _near_wall(cx: float, cy: float, wall_reject: float = WALL_REJECT) -> bool:
|
||||||
|
"""True if the detection is too close to a wall to be a sheep."""
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
r = math.hypot(cx, cy)
|
||||||
|
return r > FIELD_ROUND_R - wall_reject
|
||||||
|
return (
|
||||||
|
cx > FIELD_X[1] - wall_reject or cx < FIELD_X[0] + wall_reject or
|
||||||
|
cy > FIELD_Y[1] - wall_reject or
|
||||||
|
(cy < FIELD_Y[0] + wall_reject and not (PEN_X[0] <= cx <= PEN_X[1]))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_cluster_by_range(
|
||||||
|
points: list[tuple[float, float]],
|
||||||
|
range_vals: list[float],
|
||||||
|
split_range_gap: float = SPLIT_RANGE_GAP,
|
||||||
|
) -> list[list[tuple[float, float]]]:
|
||||||
|
"""Split a cluster at range-profile local maxima (gaps between sheep).
|
||||||
|
|
||||||
|
When two sheep are close, the LiDAR sees them as one arc, but the
|
||||||
|
range profile has a local peak between them (the ray passes between
|
||||||
|
the two discs). This function finds those peaks and splits.
|
||||||
|
"""
|
||||||
|
if len(points) < 4:
|
||||||
|
return [points]
|
||||||
|
# Find the minimum range in the cluster (closest point to dog).
|
||||||
|
r_min = min(range_vals)
|
||||||
|
# Find the maximum range (the dip/gap between sheep).
|
||||||
|
r_max = max(range_vals)
|
||||||
|
# If the range variation is small, it's a single target.
|
||||||
|
if r_max - r_min < split_range_gap:
|
||||||
|
return [points]
|
||||||
|
# Find the split point: the index with the maximum range.
|
||||||
|
split_idx = range_vals.index(r_max)
|
||||||
|
if split_idx <= 1 or split_idx >= len(points) - 2:
|
||||||
|
return [points]
|
||||||
|
# Split into two sub-clusters.
|
||||||
|
left = points[:split_idx]
|
||||||
|
right = points[split_idx + 1:]
|
||||||
|
# Recursively split each half.
|
||||||
|
result = []
|
||||||
|
for sub_pts, sub_ranges in [
|
||||||
|
(left, range_vals[:split_idx]),
|
||||||
|
(right, range_vals[split_idx + 1:]),
|
||||||
|
]:
|
||||||
|
if len(sub_pts) >= 1:
|
||||||
|
result.extend(_split_cluster_by_range(sub_pts, sub_ranges, split_range_gap))
|
||||||
|
return result if result else [points]
|
||||||
|
|
||||||
|
|
||||||
|
def detections_from_scan(
|
||||||
|
ranges: np.ndarray,
|
||||||
|
dog_x: float, dog_y: float, dog_heading: float,
|
||||||
|
max_range: float = LIDAR_MAX_RANGE,
|
||||||
|
detection_cfg: "DetectionConfig | None" = None,
|
||||||
|
lidar_cfg: "LidarConfig | None" = None,
|
||||||
|
) -> list[tuple[float, float]]:
|
||||||
|
"""Return list of (x, y) world-frame sheep position estimates.
|
||||||
|
|
||||||
|
Pass ``detection_cfg`` to override clustering/filtering thresholds, or
|
||||||
|
``lidar_cfg`` to inform the function of a non-default FOV (the number of
|
||||||
|
rays and FOV are inferred from the length of ``ranges`` and
|
||||||
|
``lidar_cfg.fov_rad`` respectively).
|
||||||
|
"""
|
||||||
|
# Resolve parameters — fall back to module-level constants when no cfg.
|
||||||
|
if detection_cfg is not None:
|
||||||
|
gap_thr = detection_cfg.gap_threshold
|
||||||
|
max_span = detection_cfg.max_cluster_span
|
||||||
|
hit_eps = detection_cfg.range_hit_eps
|
||||||
|
split_gap = detection_cfg.split_range_gap
|
||||||
|
wall_rej = detection_cfg.wall_reject
|
||||||
|
static_rej = detection_cfg.static_reject
|
||||||
|
else:
|
||||||
|
gap_thr = GAP_THRESHOLD
|
||||||
|
max_span = MAX_CLUSTER_SPAN
|
||||||
|
hit_eps = RANGE_HIT_EPS
|
||||||
|
split_gap = SPLIT_RANGE_GAP
|
||||||
|
wall_rej = WALL_REJECT
|
||||||
|
static_rej = STATIC_REJECT
|
||||||
|
|
||||||
|
sheep_r = lidar_cfg.sheep_radius if lidar_cfg is not None else SHEEP_RADIUS
|
||||||
|
fov = lidar_cfg.fov_rad if lidar_cfg is not None else LIDAR_FOV
|
||||||
|
if lidar_cfg is not None:
|
||||||
|
max_range = lidar_cfg.max_range
|
||||||
|
|
||||||
|
ranges = np.asarray(ranges, dtype=np.float32)
|
||||||
|
n_rays = ranges.shape[0]
|
||||||
|
if n_rays == 0:
|
||||||
|
return []
|
||||||
|
angles = ray_angles(n_rays, fov)
|
||||||
|
hit = ranges < max_range - hit_eps
|
||||||
|
|
||||||
|
world_a = dog_heading + angles
|
||||||
|
px = dog_x + ranges * np.cos(world_a)
|
||||||
|
py = dog_y + ranges * np.sin(world_a)
|
||||||
|
|
||||||
|
# Walk rays in angular order; a large jump between consecutive
|
||||||
|
# world-frame hit points closes the current cluster.
|
||||||
|
# Store (x, y, range) per hit ray for multi-peak splitting.
|
||||||
|
clusters: list[list[tuple[float, float, float]]] = []
|
||||||
|
current: list[tuple[float, float, float]] = []
|
||||||
|
prev_xy: tuple[float, float] | None = None
|
||||||
|
for i in range(n_rays):
|
||||||
|
if not bool(hit[i]):
|
||||||
|
if current:
|
||||||
|
clusters.append(current)
|
||||||
|
current = []
|
||||||
|
prev_xy = None
|
||||||
|
continue
|
||||||
|
pt = (float(px[i]), float(py[i]), float(ranges[i]))
|
||||||
|
if prev_xy is not None and math.hypot(pt[0] - prev_xy[0], pt[1] - prev_xy[1]) > gap_thr:
|
||||||
|
clusters.append(current)
|
||||||
|
current = []
|
||||||
|
current.append(pt)
|
||||||
|
prev_xy = (pt[0], pt[1])
|
||||||
|
if current:
|
||||||
|
clusters.append(current)
|
||||||
|
|
||||||
|
detections: list[tuple[float, float]] = []
|
||||||
|
for cluster in clusters:
|
||||||
|
points_xy = [(p[0], p[1]) for p in cluster]
|
||||||
|
range_vals = [p[2] for p in cluster]
|
||||||
|
|
||||||
|
# Multi-peak splitting.
|
||||||
|
if len(cluster) >= 4:
|
||||||
|
sub_clusters = _split_cluster_by_range(points_xy, range_vals, split_gap)
|
||||||
|
else:
|
||||||
|
sub_clusters = [points_xy]
|
||||||
|
|
||||||
|
for sub in sub_clusters:
|
||||||
|
if len(sub) < 1:
|
||||||
|
continue
|
||||||
|
xs = [p[0] for p in sub]
|
||||||
|
ys = [p[1] for p in sub]
|
||||||
|
cx, cy = sum(xs) / len(xs), sum(ys) / len(ys)
|
||||||
|
span = math.hypot(max(xs) - min(xs), max(ys) - min(ys))
|
||||||
|
if span > max_span:
|
||||||
|
continue
|
||||||
|
# Rays hit the front edge of the sheep; offset outward by
|
||||||
|
# sheep_radius along the dog→cluster direction.
|
||||||
|
dx, dy = cx - dog_x, cy - dog_y
|
||||||
|
d = math.hypot(dx, dy)
|
||||||
|
if d > 1e-3:
|
||||||
|
cx += sheep_r * dx / d
|
||||||
|
cy += sheep_r * dy / d
|
||||||
|
in_main = _in_field_region(cx, cy)
|
||||||
|
in_gate_strip = (PEN_X[0] - 0.2 < cx < PEN_X[1] + 0.2 and
|
||||||
|
GATE_Y - 1.0 < cy < GATE_Y + 0.2)
|
||||||
|
if not (in_main or in_gate_strip):
|
||||||
|
continue
|
||||||
|
if any(math.hypot(cx - fx, cy - fy) < static_rej
|
||||||
|
for fx, fy in _STATIC_FEATURES):
|
||||||
|
continue
|
||||||
|
if _near_wall(cx, cy, wall_rej):
|
||||||
|
continue
|
||||||
|
detections.append((cx, cy))
|
||||||
|
return detections
|
||||||
@@ -0,0 +1,255 @@
|
|||||||
|
"""Fast 2D LiDAR simulator for the Gymnasium env.
|
||||||
|
|
||||||
|
Raycasts against sheep (discs) and static world geometry. For rectangular
|
||||||
|
fields this is axis-aligned walls + gate posts; for round fields it is a
|
||||||
|
circular wall + gate posts.
|
||||||
|
|
||||||
|
The module-level constants (``LIDAR_N_RAYS``, ``LIDAR_FOV``, etc.) reflect
|
||||||
|
the original 360°/360-ray oracle configuration. Pass a
|
||||||
|
:class:`~herding.config.LidarConfig` to :func:`simulate_scan` to use a
|
||||||
|
different spec (e.g. :data:`~herding.config.LIDAR_WEBOTS` for 180-ray/140°
|
||||||
|
matching the ShepherdDog.proto hardware).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from herding.config import LidarConfig
|
||||||
|
|
||||||
|
from herding.world.geometry import (
|
||||||
|
FIELD_SHAPE, FIELD_ROUND_R,
|
||||||
|
FIELD_X, FIELD_Y,
|
||||||
|
GATE_X, GATE_Y,
|
||||||
|
PEN_X, PEN_Y,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Match protos/ShepherdDog.proto Lidar device — extended to 360° for
|
||||||
|
# full situational awareness. The original Webots device is 140° FOV /
|
||||||
|
# 180 rays; we use 360 rays for full-circle coverage.
|
||||||
|
LIDAR_N_RAYS = 360
|
||||||
|
LIDAR_FOV = 2.0 * math.pi # 360° full circle
|
||||||
|
LIDAR_MAX_RANGE = 12.0
|
||||||
|
LIDAR_NOISE = 0.005 # m, gaussian std
|
||||||
|
|
||||||
|
# Sheep cross-section in the LiDAR plane (horizontal cylinder approx).
|
||||||
|
SHEEP_RADIUS = 0.30
|
||||||
|
POST_RADIUS = 0.25
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Rectangular-field static geometry
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
_VERTICAL_WALLS_RECT = (
|
||||||
|
( 15.0, -15.0, 15.0), # field east
|
||||||
|
(-15.0, -15.0, 15.0), # field west
|
||||||
|
( 10.0, -22.0, -15.0), # pen west
|
||||||
|
( 13.0, -22.0, -15.0), # pen east
|
||||||
|
)
|
||||||
|
|
||||||
|
_HORIZONTAL_WALLS_RECT = (
|
||||||
|
( 15.0, -15.0, 15.0), # field north
|
||||||
|
(-15.0, -15.0, 10.0), # field south-west of gate
|
||||||
|
(-15.0, 13.0, 15.0), # field south-east of gate
|
||||||
|
(-22.0, 10.0, 13.0), # pen south
|
||||||
|
)
|
||||||
|
|
||||||
|
_POSTS_RECT = np.array([
|
||||||
|
( 10.0, -15.0), ( 13.0, -15.0),
|
||||||
|
( 15.0, 15.0), ( 15.0, -15.0),
|
||||||
|
(-15.0, 15.0), (-15.0, -15.0),
|
||||||
|
], dtype=np.float64)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Round-field static geometry
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Circular wall with gate gap. Gate posts at the edges of the gate gap.
|
||||||
|
_gate_cx = 0.5 * (GATE_X[0] + GATE_X[1])
|
||||||
|
_POSTS_ROUND = np.array([
|
||||||
|
(GATE_X[0], GATE_Y),
|
||||||
|
(GATE_X[1], GATE_Y),
|
||||||
|
], dtype=np.float64)
|
||||||
|
|
||||||
|
# Pen walls for round field
|
||||||
|
_VERTICAL_WALLS_ROUND = (
|
||||||
|
(GATE_X[0], PEN_Y[0], GATE_Y), # pen west
|
||||||
|
(GATE_X[1], PEN_Y[0], GATE_Y), # pen east
|
||||||
|
)
|
||||||
|
_HORIZONTAL_WALLS_ROUND = (
|
||||||
|
(PEN_Y[0], GATE_X[0], GATE_X[1]), # pen south
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_static_geometry():
|
||||||
|
"""Select the correct static geometry for the active field shape."""
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
return (
|
||||||
|
_VERTICAL_WALLS_ROUND,
|
||||||
|
_HORIZONTAL_WALLS_ROUND,
|
||||||
|
_POSTS_ROUND,
|
||||||
|
FIELD_ROUND_R,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
_VERTICAL_WALLS_RECT,
|
||||||
|
_HORIZONTAL_WALLS_RECT,
|
||||||
|
_POSTS_RECT,
|
||||||
|
None, # no circular wall
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_VERTS, _HORIZS, _POSTS, _CIRC_R = _build_static_geometry()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Ray helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def ray_angles(n: int = LIDAR_N_RAYS, fov: float = LIDAR_FOV) -> np.ndarray:
|
||||||
|
"""Local-frame ray angles, CCW from forward, sweeping +fov/2 → -fov/2."""
|
||||||
|
return np.linspace(fov / 2.0, -fov / 2.0, n, dtype=np.float64)
|
||||||
|
|
||||||
|
|
||||||
|
_ANGLES = ray_angles()
|
||||||
|
_COS = np.cos(_ANGLES)
|
||||||
|
_SIN = np.sin(_ANGLES)
|
||||||
|
|
||||||
|
|
||||||
|
def _raycast_static(
|
||||||
|
ox: float, oy: float, cos_w: np.ndarray, sin_w: np.ndarray,
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Per-ray distance to the nearest wall or post hit (∞ if none)."""
|
||||||
|
n_rays = cos_w.shape[0]
|
||||||
|
best = np.full(n_rays, np.inf, dtype=np.float64)
|
||||||
|
|
||||||
|
EPS = 1e-3
|
||||||
|
safe_cos = np.where(np.abs(cos_w) < 1e-9, 1e-9, cos_w)
|
||||||
|
safe_sin = np.where(np.abs(sin_w) < 1e-9, 1e-9, sin_w)
|
||||||
|
|
||||||
|
# Vertical walls (x = const)
|
||||||
|
for wx, ymin, ymax in _VERTS:
|
||||||
|
t = (wx - ox) / safe_cos
|
||||||
|
y_at = oy + t * sin_w
|
||||||
|
valid = (t > EPS) & (y_at >= ymin - EPS) & (y_at <= ymax + EPS)
|
||||||
|
cand = np.where(valid, t, np.inf)
|
||||||
|
np.minimum(best, cand, out=best)
|
||||||
|
|
||||||
|
# Horizontal walls (y = const)
|
||||||
|
for wy, xmin, xmax in _HORIZS:
|
||||||
|
t = (wy - oy) / safe_sin
|
||||||
|
x_at = ox + t * cos_w
|
||||||
|
valid = (t > EPS) & (x_at >= xmin - EPS) & (x_at <= xmax + EPS)
|
||||||
|
cand = np.where(valid, t, np.inf)
|
||||||
|
np.minimum(best, cand, out=best)
|
||||||
|
|
||||||
|
# Circular wall (round field only)
|
||||||
|
if _CIRC_R is not None:
|
||||||
|
# Ray: P(t) = O + t·D. ||P(t)||² = R²
|
||||||
|
# t² - 2t(O·D) + (||O||² - R²) = 0
|
||||||
|
# a = 1 (rays are unit), b = -2(O·D), c = ||O||² - R²
|
||||||
|
a = 1.0 # cos_w² + sin_w² = 1
|
||||||
|
b = -(ox * cos_w + oy * sin_w)
|
||||||
|
c = ox * ox + oy * oy - _CIRC_R * _CIRC_R
|
||||||
|
disc = b * b - a * c
|
||||||
|
valid_disc = disc >= 0.0
|
||||||
|
sqrt_disc = np.sqrt(np.maximum(disc, 0.0))
|
||||||
|
# Two intersection candidates: t = (-b ± sqrt(disc)) / a
|
||||||
|
t1 = -b - sqrt_disc
|
||||||
|
t2 = -b + sqrt_disc
|
||||||
|
# We want the smallest positive t.
|
||||||
|
t1_valid = valid_disc & (t1 > EPS)
|
||||||
|
t2_valid = valid_disc & (t2 > EPS)
|
||||||
|
t_circ = np.where(t1_valid, t1, np.where(t2_valid, t2, np.inf))
|
||||||
|
|
||||||
|
# Exclude rays that hit the gate gap: the hit point must not lie
|
||||||
|
# in the gate column (between GATE_X and above GATE_Y).
|
||||||
|
hx = ox + t_circ * cos_w
|
||||||
|
hy = oy + t_circ * sin_w
|
||||||
|
in_gate = ((hx > GATE_X[0]) & (hx < GATE_X[1]) &
|
||||||
|
(hy > GATE_Y - 2.0) & (hy < GATE_Y + 2.0))
|
||||||
|
t_circ = np.where(in_gate, np.inf, t_circ)
|
||||||
|
np.minimum(best, t_circ, out=best)
|
||||||
|
|
||||||
|
# Posts (treat as discs)
|
||||||
|
if _POSTS.size:
|
||||||
|
px = _POSTS[:, 0] - ox
|
||||||
|
py = _POSTS[:, 1] - oy
|
||||||
|
t_post = np.outer(px, cos_w) + np.outer(py, sin_w)
|
||||||
|
d2 = (px ** 2 + py ** 2)[:, None]
|
||||||
|
perp2 = d2 - t_post ** 2
|
||||||
|
R2 = POST_RADIUS ** 2
|
||||||
|
hit = (perp2 < R2) & (t_post > 0.0)
|
||||||
|
half = np.sqrt(np.clip(R2 - perp2, 0.0, None))
|
||||||
|
cand = np.where(hit, t_post - half, np.inf)
|
||||||
|
nearest = cand.min(axis=0)
|
||||||
|
np.minimum(best, nearest, out=best)
|
||||||
|
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def simulate_scan(
|
||||||
|
dog_x: float, dog_y: float, dog_heading: float,
|
||||||
|
sheep_xy: list[tuple[float, float]],
|
||||||
|
noise: float = LIDAR_NOISE,
|
||||||
|
max_range: float = LIDAR_MAX_RANGE,
|
||||||
|
rng: np.random.Generator | None = None,
|
||||||
|
lidar_cfg: "LidarConfig | None" = None,
|
||||||
|
) -> np.ndarray:
|
||||||
|
"""Return a (N,) float32 range array. No-hit entries equal ``max_range``.
|
||||||
|
|
||||||
|
``sheep_xy`` is every sheep (penned or active) in the scene.
|
||||||
|
|
||||||
|
Pass ``lidar_cfg`` to override the module-level defaults for a single
|
||||||
|
call (e.g. to use :data:`~herding.config.LIDAR_WEBOTS`).
|
||||||
|
"""
|
||||||
|
if lidar_cfg is not None:
|
||||||
|
n_rays = lidar_cfg.n_rays
|
||||||
|
fov = lidar_cfg.fov_rad
|
||||||
|
max_range = lidar_cfg.max_range
|
||||||
|
noise = lidar_cfg.noise_std
|
||||||
|
sheep_r2 = lidar_cfg.sheep_radius ** 2
|
||||||
|
angles = ray_angles(n_rays, fov)
|
||||||
|
ch, sh = math.cos(dog_heading), math.sin(dog_heading)
|
||||||
|
cos_w = ch * np.cos(angles) - sh * np.sin(angles)
|
||||||
|
sin_w = sh * np.cos(angles) + ch * np.sin(angles)
|
||||||
|
else:
|
||||||
|
sheep_r2 = SHEEP_RADIUS ** 2
|
||||||
|
ch, sh = math.cos(dog_heading), math.sin(dog_heading)
|
||||||
|
cos_w = ch * _COS - sh * _SIN
|
||||||
|
sin_w = sh * _COS + ch * _SIN
|
||||||
|
|
||||||
|
best = _raycast_static(dog_x, dog_y, cos_w, sin_w)
|
||||||
|
|
||||||
|
if sheep_xy:
|
||||||
|
sx = np.asarray([p[0] for p in sheep_xy], dtype=np.float64) - dog_x
|
||||||
|
sy = np.asarray([p[1] for p in sheep_xy], dtype=np.float64) - dog_y
|
||||||
|
t = np.outer(sx, cos_w) + np.outer(sy, sin_w)
|
||||||
|
s_dist2 = (sx ** 2 + sy ** 2)[:, None]
|
||||||
|
perp2 = s_dist2 - t ** 2
|
||||||
|
hit = (perp2 < sheep_r2) & (t > 0.0)
|
||||||
|
half = np.sqrt(np.clip(sheep_r2 - perp2, 0.0, None))
|
||||||
|
candidate = np.where(hit, t - half, np.inf)
|
||||||
|
nearest = candidate.min(axis=0)
|
||||||
|
np.minimum(best, nearest, out=best)
|
||||||
|
|
||||||
|
ranges = np.minimum(best, max_range).astype(np.float32)
|
||||||
|
return _add_noise(ranges, noise, rng, max_range)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_noise(ranges: np.ndarray, sigma: float,
|
||||||
|
rng: np.random.Generator | None, max_range: float) -> np.ndarray:
|
||||||
|
if sigma <= 0.0:
|
||||||
|
return ranges
|
||||||
|
if rng is None:
|
||||||
|
rng = np.random.default_rng()
|
||||||
|
hit_mask = ranges < max_range - 1e-3
|
||||||
|
n_hit = int(hit_mask.sum())
|
||||||
|
if n_hit:
|
||||||
|
ranges = ranges.copy()
|
||||||
|
ranges[hit_mask] += rng.normal(0.0, sigma, size=n_hit).astype(np.float32)
|
||||||
|
np.clip(ranges, 0.0, max_range, out=ranges)
|
||||||
|
return ranges
|
||||||
@@ -0,0 +1,122 @@
|
|||||||
|
"""Observation builder for the shepherd-dog policy.
|
||||||
|
|
||||||
|
Order-invariant 32-D feature vector. Sheep never appear by index in
|
||||||
|
the observation, only via summary statistics, a polar histogram, and
|
||||||
|
two "named" channels (closest-to-pen, rearmost-from-pen) — so the
|
||||||
|
policy generalises across flock sizes 1..MAX_SHEEP.
|
||||||
|
|
||||||
|
Layout (all components normalised so values stay roughly in [-1, 1]):
|
||||||
|
|
||||||
|
idx field
|
||||||
|
----- ----------------------------------------------------------
|
||||||
|
0..3 dog pose: x/15, y/15, cos(h), sin(h)
|
||||||
|
4..5 active-sheep CoM x/15, y/15
|
||||||
|
6..8 flock dispersion: max_radius/15, std_x/15, std_y/15
|
||||||
|
9..11 dog → CoM: dx/30, dy/30, dist/30
|
||||||
|
12..14 dog → pen entry: dx/30, dy/30, dist/30
|
||||||
|
15..16 furthest sheep → CoM: dx/15, dy/15
|
||||||
|
17..18 min sheep-to-wall, min dog-to-wall (both /15)
|
||||||
|
19 active sheep count / MAX_SHEEP
|
||||||
|
20..27 8-bin polar histogram of active sheep in the dog's body frame
|
||||||
|
28..29 dog → closest-to-pen sheep: dx/15, dy/15
|
||||||
|
30..31 dog → rearmost (furthest-from-pen) sheep: dx/15, dy/15
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from herding.world.geometry import (
|
||||||
|
PEN_ENTRY, MAX_SHEEP, distance_to_wall,
|
||||||
|
)
|
||||||
|
|
||||||
|
OBS_DIM = 32
|
||||||
|
|
||||||
|
|
||||||
|
def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
|
||||||
|
n_max: int = MAX_SHEEP,
|
||||||
|
n_expected: int | None = None) -> np.ndarray:
|
||||||
|
"""Assemble the dog policy's observation vector.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
dog_xy : tuple (x, y) of the dog's GPS position (m)
|
||||||
|
dog_heading : dog heading in rad
|
||||||
|
sheep_xy_list : iterable of (x, y) for ALL known sheep
|
||||||
|
sheep_penned_list : parallel iterable of bool — True if sheep is penned
|
||||||
|
n_max : maximum supported flock size used for the count normaliser
|
||||||
|
n_expected : unused, kept for API compatibility.
|
||||||
|
"""
|
||||||
|
dog_x, dog_y = dog_xy
|
||||||
|
obs = np.zeros(OBS_DIM, dtype=np.float32)
|
||||||
|
|
||||||
|
obs[0] = dog_x / 15.0
|
||||||
|
obs[1] = dog_y / 15.0
|
||||||
|
obs[2] = math.cos(dog_heading)
|
||||||
|
obs[3] = math.sin(dog_heading)
|
||||||
|
|
||||||
|
active = [(x, y) for (x, y), p
|
||||||
|
in zip(sheep_xy_list, sheep_penned_list) if not p]
|
||||||
|
n = len(active)
|
||||||
|
|
||||||
|
pdx0, pdy0 = PEN_ENTRY[0] - dog_x, PEN_ENTRY[1] - dog_y
|
||||||
|
obs[12] = pdx0 / 30.0
|
||||||
|
obs[13] = pdy0 / 30.0
|
||||||
|
obs[14] = math.hypot(pdx0, pdy0) / 30.0
|
||||||
|
|
||||||
|
if n == 0:
|
||||||
|
obs[19] = 0.0
|
||||||
|
return obs
|
||||||
|
|
||||||
|
arr = np.asarray(active, dtype=np.float32)
|
||||||
|
com_x = float(arr[:, 0].mean())
|
||||||
|
com_y = float(arr[:, 1].mean())
|
||||||
|
rel = arr - np.array([com_x, com_y], dtype=np.float32)
|
||||||
|
dists = np.hypot(rel[:, 0], rel[:, 1])
|
||||||
|
radius = float(dists.max())
|
||||||
|
std_x = float(arr[:, 0].std())
|
||||||
|
std_y = float(arr[:, 1].std())
|
||||||
|
|
||||||
|
obs[4] = com_x / 15.0
|
||||||
|
obs[5] = com_y / 15.0
|
||||||
|
obs[6] = radius / 15.0
|
||||||
|
obs[7] = std_x / 15.0
|
||||||
|
obs[8] = std_y / 15.0
|
||||||
|
|
||||||
|
cdx, cdy = com_x - dog_x, com_y - dog_y
|
||||||
|
obs[9] = cdx / 30.0
|
||||||
|
obs[10] = cdy / 30.0
|
||||||
|
obs[11] = math.hypot(cdx, cdy) / 30.0
|
||||||
|
|
||||||
|
far_idx = int(np.argmax(dists))
|
||||||
|
obs[15] = float(rel[far_idx, 0]) / 15.0
|
||||||
|
obs[16] = float(rel[far_idx, 1]) / 15.0
|
||||||
|
|
||||||
|
min_sheep_wall = float(min(distance_to_wall(sx, sy) for sx, sy in active))
|
||||||
|
min_dog_wall = distance_to_wall(dog_x, dog_y)
|
||||||
|
obs[17] = min_sheep_wall / 15.0
|
||||||
|
obs[18] = float(min_dog_wall) / 15.0
|
||||||
|
obs[19] = n / n_max
|
||||||
|
|
||||||
|
# Polar histogram in the dog's body frame.
|
||||||
|
rel_dx = arr[:, 0] - dog_x
|
||||||
|
rel_dy = arr[:, 1] - dog_y
|
||||||
|
angles = np.arctan2(rel_dy, rel_dx) - dog_heading
|
||||||
|
angles = np.arctan2(np.sin(angles), np.cos(angles))
|
||||||
|
bins = np.floor((angles + math.pi) / (2 * math.pi) * 8).astype(int)
|
||||||
|
bins = np.clip(bins, 0, 7)
|
||||||
|
hist = np.bincount(bins, minlength=8).astype(np.float32)
|
||||||
|
hist /= max(1, n)
|
||||||
|
obs[20:28] = hist
|
||||||
|
|
||||||
|
# Closest-to-pen and rearmost (furthest-from-pen) sheep. Without
|
||||||
|
# these named channels the obs cannot uniquely identify which sheep
|
||||||
|
# the teacher is steering toward, and BC fails to mimic it.
|
||||||
|
pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
|
||||||
|
closest_idx = int(np.argmin(pen_dists))
|
||||||
|
rearmost_idx = int(np.argmax(pen_dists))
|
||||||
|
obs[28] = (float(arr[closest_idx, 0]) - dog_x) / 15.0
|
||||||
|
obs[29] = (float(arr[closest_idx, 1]) - dog_y) / 15.0
|
||||||
|
obs[30] = (float(arr[rearmost_idx, 0]) - dog_x) / 15.0
|
||||||
|
obs[31] = (float(arr[rearmost_idx, 1]) - dog_y) / 15.0
|
||||||
|
|
||||||
|
return obs
|
||||||
@@ -0,0 +1,413 @@
|
|||||||
|
"""Multi-target tracker for LiDAR-detected sheep.
|
||||||
|
|
||||||
|
Three-stage greedy nearest-neighbour data association:
|
||||||
|
|
||||||
|
1. **Consensus promotion**. New detections start as *candidate* tracks
|
||||||
|
invisible to ``get_positions``. They must accumulate ``consensus_k``
|
||||||
|
matches within ``consensus_radius_m`` to promote; candidates that
|
||||||
|
fail to re-confirm within ``consensus_max_age`` steps die. This
|
||||||
|
filters one-shot LiDAR phantoms — wall returns, multi-cluster sheep
|
||||||
|
splits, fast-moving sheep position jumps — at the cost of a small
|
||||||
|
acquisition latency (~50 ms at the default ``consensus_k=3``).
|
||||||
|
``consensus_k=1`` disables the stage.
|
||||||
|
2. **Constant-velocity prediction**. Each track carries a smoothed
|
||||||
|
``(vx, vy)``. While a track is occluded its position is
|
||||||
|
extrapolated for up to ``PREDICT_STEPS`` frames, then falls back to
|
||||||
|
last-seen static memory until ``FORGET_STEPS`` deletes it.
|
||||||
|
3. **Pen latching**. A track whose estimated position crosses the gate
|
||||||
|
plane south of ``is_penned`` is marked penned, excluded
|
||||||
|
from ``get_positions``, and kept indefinitely.
|
||||||
|
|
||||||
|
Output of :meth:`SheepTracker.get_positions` is ``{name: (x, y)}`` —
|
||||||
|
Strömbom, Sequential and the BC observation builder consume it
|
||||||
|
directly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from herding.config import TrackerConfig
|
||||||
|
|
||||||
|
from herding.world.geometry import MAX_SHEEP, in_pen, is_penned
|
||||||
|
|
||||||
|
|
||||||
|
GATE_M = 2.5 # m — primary NN gate (recently observed tracks)
|
||||||
|
REACQUIRE_GATE_M = 4.5 # m — wider gate for re-binding stale tracks
|
||||||
|
REACQUIRE_MIN_AGE = 20 # steps — track must be this stale to use the wider gate
|
||||||
|
PENNED_GATE_M = 4.0 # m — gate for matching detections to existing penned tracks
|
||||||
|
FORGET_STEPS = 200 # ~3.2 s — delete stale active tracks (penned ones kept forever)
|
||||||
|
MAX_ACTIVE_TRACKS = MAX_SHEEP
|
||||||
|
|
||||||
|
# Predictive tracking constants.
|
||||||
|
PREDICT_STEPS = 120 # ~1.9 s — extrapolate velocity this many frames
|
||||||
|
VELOCITY_CLAMP = 1.0 # m/s — max predicted speed (sheep max is ~0.78 m/s)
|
||||||
|
|
||||||
|
|
||||||
|
class Track:
|
||||||
|
"""Single track with position, velocity, and age.
|
||||||
|
|
||||||
|
Attributes
|
||||||
|
----------
|
||||||
|
candidate
|
||||||
|
``True`` while the track has not yet accumulated enough
|
||||||
|
consensus matches to be visible (``hit_count < consensus_k``).
|
||||||
|
Candidates are excluded from :meth:`SheepTracker.get_positions`
|
||||||
|
and from the active/penned counters.
|
||||||
|
hit_count
|
||||||
|
Number of detections this track has absorbed since spawn,
|
||||||
|
used by the consensus filter.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ("x", "y", "vx", "vy", "last_seen", "penned",
|
||||||
|
"candidate", "hit_count")
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
x: float,
|
||||||
|
y: float,
|
||||||
|
step: int,
|
||||||
|
penned: bool = False,
|
||||||
|
candidate: bool = False,
|
||||||
|
):
|
||||||
|
self.x = x
|
||||||
|
self.y = y
|
||||||
|
self.vx = 0.0
|
||||||
|
self.vy = 0.0
|
||||||
|
self.last_seen = step
|
||||||
|
self.penned = penned
|
||||||
|
self.candidate = candidate
|
||||||
|
self.hit_count = 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def age(self) -> int:
|
||||||
|
"""Not-a-property in the hot loop — callers pass current step."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def predicted_position(
|
||||||
|
self,
|
||||||
|
current_step: int,
|
||||||
|
predict_steps: int = PREDICT_STEPS,
|
||||||
|
velocity_clamp: float = VELOCITY_CLAMP,
|
||||||
|
) -> tuple[float, float]:
|
||||||
|
"""Extrapolated position using constant velocity, clamped."""
|
||||||
|
dt = current_step - self.last_seen
|
||||||
|
if dt <= 0 or dt > predict_steps:
|
||||||
|
return self.x, self.y
|
||||||
|
speed = math.hypot(self.vx, self.vy)
|
||||||
|
if speed < 1e-4:
|
||||||
|
return self.x, self.y
|
||||||
|
# Clamp extrapolation distance.
|
||||||
|
max_d = velocity_clamp * dt * 0.016 # steps → seconds
|
||||||
|
d = min(speed * dt * 0.016, max_d)
|
||||||
|
return (
|
||||||
|
self.x + d * (self.vx / speed),
|
||||||
|
self.y + d * (self.vy / speed),
|
||||||
|
)
|
||||||
|
|
||||||
|
def update(self, x: float, y: float, step: int) -> None:
|
||||||
|
"""Absorb a new detection and re-estimate velocity."""
|
||||||
|
dt = step - self.last_seen
|
||||||
|
if dt > 0:
|
||||||
|
dt_s = dt * 0.016 # steps → seconds
|
||||||
|
new_vx = (x - self.x) / dt_s
|
||||||
|
new_vy = (y - self.y) / dt_s
|
||||||
|
# Exponential smoothing on velocity.
|
||||||
|
alpha = 0.6
|
||||||
|
self.vx = alpha * new_vx + (1.0 - alpha) * self.vx
|
||||||
|
self.vy = alpha * new_vy + (1.0 - alpha) * self.vy
|
||||||
|
self.x = x
|
||||||
|
self.y = y
|
||||||
|
self.last_seen = step
|
||||||
|
|
||||||
|
|
||||||
|
class SheepTracker:
|
||||||
|
"""Online tracker with NN association, prediction, and forgetful memory.
|
||||||
|
|
||||||
|
Each track is a :class:`Track` with position, velocity estimate,
|
||||||
|
last-seen step, and penned flag.
|
||||||
|
|
||||||
|
Pass a :class:`~herding.config.TrackerConfig` to override any
|
||||||
|
module-level defaults without changing this file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
gate: float = GATE_M,
|
||||||
|
tracker_cfg: "TrackerConfig | None" = None,
|
||||||
|
):
|
||||||
|
if tracker_cfg is not None:
|
||||||
|
self.gate = tracker_cfg.gate_m
|
||||||
|
self._reacquire_gate = tracker_cfg.reacquire_gate_m
|
||||||
|
self._reacquire_min_age = tracker_cfg.reacquire_min_age
|
||||||
|
self._penned_gate = tracker_cfg.penned_gate_m
|
||||||
|
self._forget_steps = tracker_cfg.forget_steps
|
||||||
|
self._predict_steps = tracker_cfg.predict_steps
|
||||||
|
self._velocity_clamp = tracker_cfg.velocity_clamp
|
||||||
|
self._max_new_per_step = tracker_cfg.max_new_tracks_per_step
|
||||||
|
self._pen_latch_depth = tracker_cfg.pen_latch_depth
|
||||||
|
self._consensus_k = tracker_cfg.consensus_k
|
||||||
|
self._consensus_radius = tracker_cfg.consensus_radius_m
|
||||||
|
self._consensus_max_age = tracker_cfg.consensus_max_age
|
||||||
|
else:
|
||||||
|
self.gate = gate
|
||||||
|
self._reacquire_gate = REACQUIRE_GATE_M
|
||||||
|
self._reacquire_min_age = REACQUIRE_MIN_AGE
|
||||||
|
self._penned_gate = PENNED_GATE_M
|
||||||
|
self._forget_steps = FORGET_STEPS
|
||||||
|
self._predict_steps = PREDICT_STEPS
|
||||||
|
self._velocity_clamp = VELOCITY_CLAMP
|
||||||
|
self._max_new_per_step = MAX_ACTIVE_TRACKS
|
||||||
|
self._pen_latch_depth = 0.0
|
||||||
|
self._consensus_k = 1
|
||||||
|
self._consensus_radius = 0.5
|
||||||
|
self._consensus_max_age = 8
|
||||||
|
self._tracks: dict[int, Track] = {}
|
||||||
|
self._next_id = 0
|
||||||
|
self.step = 0
|
||||||
|
|
||||||
|
def reset(self) -> None:
|
||||||
|
self._tracks.clear()
|
||||||
|
self._next_id = 0
|
||||||
|
self.step = 0
|
||||||
|
|
||||||
|
def update(self, detections: list[tuple[float, float]]) -> dict[str, tuple[float, float]]:
|
||||||
|
"""Fold a new set of detections in and return active positions."""
|
||||||
|
self.step += 1
|
||||||
|
|
||||||
|
det_used: set[int] = set()
|
||||||
|
updated_tids: set[int] = set()
|
||||||
|
|
||||||
|
# Pass 1 — match promoted active tracks within the primary gate.
|
||||||
|
# Use predicted positions for matching, oldest-first. Candidates
|
||||||
|
# are excluded; they get their own (tighter) pass below so a
|
||||||
|
# stray detection cannot rescue an already-stale candidate.
|
||||||
|
active_tids = [tid for tid, t in self._tracks.items()
|
||||||
|
if not t.penned and not t.candidate]
|
||||||
|
active_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
|
||||||
|
for tid in active_tids:
|
||||||
|
track = self._tracks[tid]
|
||||||
|
tx, ty = track.predicted_position(
|
||||||
|
self.step, self._predict_steps, self._velocity_clamp)
|
||||||
|
best_j, best_d = -1, self.gate
|
||||||
|
for j, (dx, dy) in enumerate(detections):
|
||||||
|
if j in det_used:
|
||||||
|
continue
|
||||||
|
d = math.hypot(dx - tx, dy - ty)
|
||||||
|
if d < best_d:
|
||||||
|
best_d = d
|
||||||
|
best_j = j
|
||||||
|
if best_j >= 0:
|
||||||
|
dx, dy = detections[best_j]
|
||||||
|
track.update(dx, dy, self.step)
|
||||||
|
track.hit_count += 1
|
||||||
|
det_used.add(best_j)
|
||||||
|
updated_tids.add(tid)
|
||||||
|
|
||||||
|
# Pass 1b — re-acquisition with wider gate for stale tracks.
|
||||||
|
for tid in active_tids:
|
||||||
|
if tid in updated_tids:
|
||||||
|
continue
|
||||||
|
track = self._tracks[tid]
|
||||||
|
if (self.step - track.last_seen) < self._reacquire_min_age:
|
||||||
|
continue
|
||||||
|
tx, ty = track.predicted_position(
|
||||||
|
self.step, self._predict_steps, self._velocity_clamp)
|
||||||
|
best_j, best_d = -1, self._reacquire_gate
|
||||||
|
for j, (dx, dy) in enumerate(detections):
|
||||||
|
if j in det_used:
|
||||||
|
continue
|
||||||
|
d = math.hypot(dx - tx, dy - ty)
|
||||||
|
if d < best_d:
|
||||||
|
best_d = d
|
||||||
|
best_j = j
|
||||||
|
if best_j >= 0:
|
||||||
|
dx, dy = detections[best_j]
|
||||||
|
track.update(dx, dy, self.step)
|
||||||
|
track.hit_count += 1
|
||||||
|
det_used.add(best_j)
|
||||||
|
updated_tids.add(tid)
|
||||||
|
|
||||||
|
# Pass 1c — match remaining detections to candidate tracks within
|
||||||
|
# the tight consensus radius. Each hit ages the candidate; once
|
||||||
|
# hit_count reaches consensus_k it is promoted (handled below).
|
||||||
|
candidate_tids = [tid for tid, t in self._tracks.items() if t.candidate]
|
||||||
|
candidate_tids.sort(key=lambda tid: self._tracks[tid].last_seen)
|
||||||
|
for tid in candidate_tids:
|
||||||
|
track = self._tracks[tid]
|
||||||
|
best_j, best_d = -1, self._consensus_radius
|
||||||
|
for j, (dx, dy) in enumerate(detections):
|
||||||
|
if j in det_used:
|
||||||
|
continue
|
||||||
|
d = math.hypot(dx - track.x, dy - track.y)
|
||||||
|
if d < best_d:
|
||||||
|
best_d = d
|
||||||
|
best_j = j
|
||||||
|
if best_j >= 0:
|
||||||
|
dx, dy = detections[best_j]
|
||||||
|
track.update(dx, dy, self.step)
|
||||||
|
track.hit_count += 1
|
||||||
|
det_used.add(best_j)
|
||||||
|
|
||||||
|
# Pass 2 — match remaining detections to penned tracks.
|
||||||
|
penned_tids = [tid for tid, t in self._tracks.items() if t.penned]
|
||||||
|
for tid in penned_tids:
|
||||||
|
track = self._tracks[tid]
|
||||||
|
best_j, best_d = -1, self._penned_gate
|
||||||
|
for j, (dx, dy) in enumerate(detections):
|
||||||
|
if j in det_used:
|
||||||
|
continue
|
||||||
|
d = math.hypot(dx - track.x, dy - track.y)
|
||||||
|
if d < best_d:
|
||||||
|
best_d = d
|
||||||
|
best_j = j
|
||||||
|
if best_j >= 0:
|
||||||
|
dx, dy = detections[best_j]
|
||||||
|
track.update(dx, dy, self.step)
|
||||||
|
track.hit_count += 1
|
||||||
|
det_used.add(best_j)
|
||||||
|
|
||||||
|
# Spawn tracks for still-unmatched detections.
|
||||||
|
#
|
||||||
|
# When ``consensus_k > 1`` every new track starts as a candidate
|
||||||
|
# and remains invisible to ``get_positions`` until it accumulates
|
||||||
|
# the required matches. Penned latching is deferred to after
|
||||||
|
# promotion — otherwise gate-area phantoms could still skip the
|
||||||
|
# consensus filter by landing inside the pen column and being
|
||||||
|
# latched forever, which is exactly the failure mode the filter
|
||||||
|
# is meant to eliminate. ``max_new_tracks_per_step`` continues
|
||||||
|
# to rate-cap spawns.
|
||||||
|
spawned = 0
|
||||||
|
spawn_candidates = self._consensus_k > 1
|
||||||
|
for j, (dx, dy) in enumerate(detections):
|
||||||
|
if j in det_used:
|
||||||
|
continue
|
||||||
|
if spawned >= self._max_new_per_step:
|
||||||
|
break
|
||||||
|
if spawn_candidates:
|
||||||
|
self._tracks[self._next_id] = Track(
|
||||||
|
dx, dy, self.step, penned=False, candidate=True)
|
||||||
|
else:
|
||||||
|
penned = self._is_penned(dx, dy)
|
||||||
|
self._tracks[self._next_id] = Track(
|
||||||
|
dx, dy, self.step, penned=penned, candidate=False)
|
||||||
|
self._next_id += 1
|
||||||
|
spawned += 1
|
||||||
|
|
||||||
|
# Promote candidates that have accumulated enough matches.
|
||||||
|
for track in self._tracks.values():
|
||||||
|
if track.candidate and track.hit_count >= self._consensus_k:
|
||||||
|
track.candidate = False
|
||||||
|
|
||||||
|
# Promote active tracks whose current estimate crosses the gate.
|
||||||
|
# Candidates are deliberately excluded — a track that hasn't yet
|
||||||
|
# earned visibility shouldn't be allowed to latch as penned
|
||||||
|
# either (that path is exactly how south-wall FPs persisted
|
||||||
|
# forever before the consensus filter existed).
|
||||||
|
for track in self._tracks.values():
|
||||||
|
if track.penned or track.candidate:
|
||||||
|
continue
|
||||||
|
px, py = track.predicted_position(
|
||||||
|
self.step, self._predict_steps, self._velocity_clamp)
|
||||||
|
if self._is_penned(px, py):
|
||||||
|
track.penned = True
|
||||||
|
|
||||||
|
# Forget stale tracks. Candidates have their own short timeout
|
||||||
|
# (one window to confirm or die); promoted active tracks decay at
|
||||||
|
# forget_steps; penned tracks decay 8× slower because real penned
|
||||||
|
# sheep are still observed when the dog faces the pen.
|
||||||
|
penned_forget = self._forget_steps * 8
|
||||||
|
stale: list[int] = []
|
||||||
|
for tid, t in self._tracks.items():
|
||||||
|
age = self.step - t.last_seen
|
||||||
|
if t.candidate:
|
||||||
|
if age > self._consensus_max_age:
|
||||||
|
stale.append(tid)
|
||||||
|
elif t.penned:
|
||||||
|
if age > penned_forget:
|
||||||
|
stale.append(tid)
|
||||||
|
else:
|
||||||
|
if age > self._forget_steps:
|
||||||
|
stale.append(tid)
|
||||||
|
for tid in stale:
|
||||||
|
del self._tracks[tid]
|
||||||
|
|
||||||
|
# Hard cap on the visible (promoted, not penned) active set —
|
||||||
|
# drop the oldest-seen overflow. Candidates are not counted here:
|
||||||
|
# they don't compete for slots until they earn promotion, and
|
||||||
|
# rate-limiting their spawn is the job of ``max_new_per_step``.
|
||||||
|
active = [(tid, t.last_seen) for tid, t in self._tracks.items()
|
||||||
|
if not t.penned and not t.candidate]
|
||||||
|
if len(active) > MAX_ACTIVE_TRACKS:
|
||||||
|
active.sort(key=lambda kv: kv[1])
|
||||||
|
for tid, _ in active[: len(active) - MAX_ACTIVE_TRACKS]:
|
||||||
|
del self._tracks[tid]
|
||||||
|
|
||||||
|
return self.get_positions()
|
||||||
|
|
||||||
|
def _is_penned(self, x: float, y: float) -> bool:
|
||||||
|
"""Check whether a position should be considered penned.
|
||||||
|
|
||||||
|
Uses ``pen_latch_depth`` to require the position to be that many
|
||||||
|
metres past the gate line before latching. Increasing the depth
|
||||||
|
prevents gate-area LiDAR false positives (gate hardware reflections
|
||||||
|
at y ≈ -15) from being permanently latched as penned tracks.
|
||||||
|
"""
|
||||||
|
from herding.world.geometry import GATE_Y
|
||||||
|
# Apply depth threshold to both in_pen and is_penned so
|
||||||
|
# that any position in the gate column must clear GATE_Y - depth.
|
||||||
|
threshold = GATE_Y - self._pen_latch_depth
|
||||||
|
return (in_pen(x, y) or is_penned(x, y)) and y <= threshold
|
||||||
|
|
||||||
|
def get_positions(self, min_freshness: int | None = None) -> dict[str, tuple[float, float]]:
|
||||||
|
"""Promoted (non-candidate, non-penned) tracks as ``{name: (x, y)}``.
|
||||||
|
|
||||||
|
For tracks currently being predicted (occluded but within
|
||||||
|
predict_steps), returns the extrapolated position so the teacher
|
||||||
|
sees a smooth estimate.
|
||||||
|
|
||||||
|
Candidate tracks — those that have not yet accumulated
|
||||||
|
``consensus_k`` matches — are excluded so a one-shot phantom
|
||||||
|
detection never reaches the policy/teacher.
|
||||||
|
|
||||||
|
``min_freshness`` (optional, deploy-only): drop tracks whose
|
||||||
|
last_seen is older than ``step - min_freshness``. Real sheep in
|
||||||
|
FOV are detected nearly every step; phantom tracks from sporadic
|
||||||
|
Webots FPs stop being re-observed and decay. Default ``None``
|
||||||
|
preserves training behaviour (extrapolated tracks visible).
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
for tid, track in self._tracks.items():
|
||||||
|
if track.penned or track.candidate:
|
||||||
|
continue
|
||||||
|
if (min_freshness is not None
|
||||||
|
and self.step - track.last_seen > min_freshness):
|
||||||
|
continue
|
||||||
|
px, py = track.predicted_position(
|
||||||
|
self.step, self._predict_steps, self._velocity_clamp)
|
||||||
|
result[f"t{tid}"] = (px, py)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_penned_set(self) -> set[str]:
|
||||||
|
return {f"t{tid}" for tid, t in self._tracks.items() if t.penned}
|
||||||
|
|
||||||
|
def n_active(self) -> int:
|
||||||
|
"""Number of promoted (non-candidate, non-penned) tracks."""
|
||||||
|
return sum(1 for t in self._tracks.values()
|
||||||
|
if not t.penned and not t.candidate)
|
||||||
|
|
||||||
|
def n_penned(self) -> int:
|
||||||
|
return sum(1 for t in self._tracks.values() if t.penned)
|
||||||
|
|
||||||
|
def n_candidate(self) -> int:
|
||||||
|
"""Number of unpromoted candidate tracks awaiting consensus."""
|
||||||
|
return sum(1 for t in self._tracks.values() if t.candidate)
|
||||||
|
|
||||||
|
def n_predicted(self) -> int:
|
||||||
|
"""Number of promoted active tracks currently being extrapolated (not directly observed)."""
|
||||||
|
return sum(1 for t in self._tracks.values()
|
||||||
|
if not t.penned and not t.candidate
|
||||||
|
and (self.step - t.last_seen) > 0
|
||||||
|
and (self.step - t.last_seen) <= self._predict_steps)
|
||||||
@@ -0,0 +1,234 @@
|
|||||||
|
"""Differential-drive and mecanum kinematics, shared by the env and Webots
|
||||||
|
controllers.
|
||||||
|
|
||||||
|
First-order rigid-body model — no slip, wheel-accel limits, or contact
|
||||||
|
forces by default. Pass ``slip_std`` and an ``rng`` to
|
||||||
|
:func:`kinematics_step` / :func:`mecanum_step` to add
|
||||||
|
per-wheel Gaussian speed noise for domain randomisation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def kinematics_step(x, y, h, w_left, w_right, wheel_radius, wheel_base, dt,
|
||||||
|
slip_std: float = 0.0,
|
||||||
|
rng: Optional[np.random.Generator] = None):
|
||||||
|
"""Integrate one step of differential-drive forward kinematics.
|
||||||
|
|
||||||
|
Inputs
|
||||||
|
------
|
||||||
|
x, y : robot position (m)
|
||||||
|
h : robot heading (rad), 0 = +x axis
|
||||||
|
w_left, w_right : wheel angular velocities (rad/s)
|
||||||
|
wheel_radius, wheel_base : robot dimensions (m)
|
||||||
|
dt : timestep (s)
|
||||||
|
slip_std : optional Gaussian std (rad/s) added to each wheel speed
|
||||||
|
rng : numpy Generator for slip noise; required when slip_std > 0
|
||||||
|
|
||||||
|
Returns (new_x, new_y, new_h).
|
||||||
|
"""
|
||||||
|
if slip_std > 0.0 and rng is not None:
|
||||||
|
noise = rng.normal(0.0, slip_std, size=2)
|
||||||
|
w_left = w_left + noise[0]
|
||||||
|
w_right = w_right + noise[1]
|
||||||
|
v = (w_right + w_left) * wheel_radius * 0.5
|
||||||
|
omega = (w_right - w_left) * wheel_radius / wheel_base
|
||||||
|
new_x = x + v * math.cos(h) * dt
|
||||||
|
new_y = y + v * math.sin(h) * dt
|
||||||
|
new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
|
||||||
|
return new_x, new_y, new_h
|
||||||
|
|
||||||
|
|
||||||
|
def velocity_to_wheels(vx, vy, h, max_linear, wheel_radius, max_wheel_omega,
|
||||||
|
k_turn=4.0):
|
||||||
|
"""Convert a desired (vx, vy) intent in [-1, 1]² to wheel speeds.
|
||||||
|
|
||||||
|
Forward speed scales by ``cos(err)`` (clamped to ±90°); a P
|
||||||
|
controller on heading error contributes the wheel-rate differential.
|
||||||
|
"""
|
||||||
|
speed_ms = math.hypot(vx, vy) * max_linear
|
||||||
|
if speed_ms < 1e-3:
|
||||||
|
return 0.0, 0.0
|
||||||
|
target_h = math.atan2(vy, vx)
|
||||||
|
err = math.atan2(math.sin(target_h - h), math.cos(target_h - h))
|
||||||
|
clamped_err = max(-math.pi / 2, min(math.pi / 2, err))
|
||||||
|
fwd_ms = speed_ms * math.cos(clamped_err)
|
||||||
|
fwd_rad = fwd_ms / wheel_radius
|
||||||
|
turn = k_turn * err
|
||||||
|
left = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad - turn))
|
||||||
|
right = max(-max_wheel_omega, min(max_wheel_omega, fwd_rad + turn))
|
||||||
|
return left, right
|
||||||
|
|
||||||
|
|
||||||
|
def heading_speed_to_wheels(heading, speed_motor, h, max_wheel_omega,
|
||||||
|
k_turn=4.0):
|
||||||
|
"""Sheep variant: speed in wheel rad/s, target as a heading angle."""
|
||||||
|
err = math.atan2(math.sin(heading - h), math.cos(heading - h))
|
||||||
|
fwd = max(0.0, math.cos(err)) * speed_motor
|
||||||
|
turn = k_turn * err
|
||||||
|
left = max(-max_wheel_omega, min(max_wheel_omega, fwd - turn))
|
||||||
|
right = max(-max_wheel_omega, min(max_wheel_omega, fwd + turn))
|
||||||
|
return left, right
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Mecanum (4-wheel omnidirectional) kinematics
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def mecanum_step(x, y, h, w_fl, w_fr, w_rl, w_rr,
|
||||||
|
wheel_radius, lx, ly, dt,
|
||||||
|
slip_std: float = 0.0,
|
||||||
|
rng: Optional[np.random.Generator] = None,
|
||||||
|
strafe_efficiency: float = 1.0,
|
||||||
|
strafe_to_forward_bleed: float = 0.0):
|
||||||
|
"""Integrate one step of mecanum forward kinematics.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x, y : robot position (m)
|
||||||
|
h : robot heading (rad), 0 = +x axis
|
||||||
|
w_fl, w_fr, w_rl, w_rr : wheel angular velocities (rad/s)
|
||||||
|
wheel_radius : wheel radius (m)
|
||||||
|
lx : half the front-to-back axle distance (m)
|
||||||
|
ly : half the left-to-right axle distance (m)
|
||||||
|
dt : timestep (s)
|
||||||
|
slip_std : optional Gaussian std (rad/s) added to each wheel speed
|
||||||
|
rng : numpy Generator for slip noise; required when slip_std > 0
|
||||||
|
strafe_efficiency : scales the realised lateral (vy_body) velocity.
|
||||||
|
``1.0`` (default) = perfect mecanum (textbook X-pattern). Set to
|
||||||
|
the value that matches deployed-platform calibration to train
|
||||||
|
a policy that compensates for under-actuated strafing — Webots
|
||||||
|
with the roller-hinge mecanum proto currently calibrates to
|
||||||
|
~0.4 of textbook on strafe.
|
||||||
|
strafe_to_forward_bleed : fraction of |vy_body_ideal| added to
|
||||||
|
vx_body to simulate the consistent body-x bleed-through that
|
||||||
|
accompanies strafing in Webots' physical-roller mecanum. Use a
|
||||||
|
*negative* value (Webots calibrates to ≈ -0.28) to model the
|
||||||
|
backward bleed seen on strafe; positive would model forward
|
||||||
|
bleed. The bleed magnitude is symmetric in strafe sign — both
|
||||||
|
+y and -y commands produce the same x-direction error.
|
||||||
|
|
||||||
|
Returns (new_x, new_y, new_h).
|
||||||
|
"""
|
||||||
|
if slip_std > 0.0 and rng is not None:
|
||||||
|
noise = rng.normal(0.0, slip_std, size=4)
|
||||||
|
w_fl, w_fr = w_fl + noise[0], w_fr + noise[1]
|
||||||
|
w_rl, w_rr = w_rl + noise[2], w_rr + noise[3]
|
||||||
|
r = wheel_radius
|
||||||
|
vx_body = (w_fl + w_fr + w_rl + w_rr) * r / 4.0
|
||||||
|
vy_body_ideal = (-w_fl + w_fr + w_rl - w_rr) * r / 4.0
|
||||||
|
vy_body = vy_body_ideal * strafe_efficiency
|
||||||
|
if strafe_to_forward_bleed != 0.0:
|
||||||
|
# Bleed-through is asymmetric — forward in body frame, matching
|
||||||
|
# Webots behaviour where strafe commands push the dog forward
|
||||||
|
# regardless of strafe sign (the rollers slip the same way
|
||||||
|
# symmetrically across the body's longitudinal axis).
|
||||||
|
vx_body += strafe_to_forward_bleed * abs(vy_body_ideal)
|
||||||
|
omega = (-w_fl + w_fr - w_rl + w_rr) * r / (4.0 * (lx + ly))
|
||||||
|
|
||||||
|
cos_h = math.cos(h)
|
||||||
|
sin_h = math.sin(h)
|
||||||
|
vx_world = vx_body * cos_h - vy_body * sin_h
|
||||||
|
vy_world = vx_body * sin_h + vy_body * cos_h
|
||||||
|
|
||||||
|
new_x = x + vx_world * dt
|
||||||
|
new_y = y + vy_world * dt
|
||||||
|
new_h = math.atan2(math.sin(h + omega * dt), math.cos(h + omega * dt))
|
||||||
|
return new_x, new_y, new_h
|
||||||
|
|
||||||
|
|
||||||
|
def mecanum_inverse(vx_body, vy_body, omega, wheel_radius, lx, ly,
|
||||||
|
max_wheel_omega):
|
||||||
|
"""Mecanum inverse kinematics: body-frame velocities to 4 wheel speeds.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
vx_body, vy_body : desired body-frame linear velocities (m/s)
|
||||||
|
omega : desired yaw rate (rad/s)
|
||||||
|
wheel_radius : wheel radius (m)
|
||||||
|
lx : half front-to-back axle distance (m)
|
||||||
|
ly : half left-to-right axle distance (m)
|
||||||
|
max_wheel_omega : wheel angular velocity clamp (rad/s)
|
||||||
|
|
||||||
|
Returns (w_fl, w_fr, w_rl, w_rr).
|
||||||
|
"""
|
||||||
|
r = wheel_radius
|
||||||
|
k = lx + ly
|
||||||
|
w_fl = (vx_body - vy_body - k * omega) / r
|
||||||
|
w_fr = (vx_body + vy_body + k * omega) / r
|
||||||
|
w_rl = (vx_body + vy_body - k * omega) / r
|
||||||
|
w_rr = (vx_body - vy_body + k * omega) / r
|
||||||
|
|
||||||
|
scale = max(abs(w_fl), abs(w_fr), abs(w_rl), abs(w_rr), 1e-9)
|
||||||
|
if scale > max_wheel_omega:
|
||||||
|
ratio = max_wheel_omega / scale
|
||||||
|
w_fl *= ratio
|
||||||
|
w_fr *= ratio
|
||||||
|
w_rl *= ratio
|
||||||
|
w_rr *= ratio
|
||||||
|
|
||||||
|
return w_fl, w_fr, w_rl, w_rr
|
||||||
|
|
||||||
|
|
||||||
|
def velocity_to_mecanum_wheels(vx, vy, omega, h, max_linear, wheel_radius,
|
||||||
|
lx, ly, max_wheel_omega,
|
||||||
|
k_turn=4.0, wheel_base=0.28):
|
||||||
|
"""Convert world-frame (vx, vy, omega) action in [-1, 1]^3 to 4 wheel speeds.
|
||||||
|
|
||||||
|
Truly holonomic interpretation: (vx, vy) is the desired *world-frame*
|
||||||
|
velocity (magnitude up to ``max_linear`` m/s) and ``omega`` is the
|
||||||
|
desired yaw rate (independent of motion direction). The dog can
|
||||||
|
crab-walk and rotate at the same time.
|
||||||
|
|
||||||
|
This matches the universal teacher's signal: drive toward a standoff
|
||||||
|
point while facing the sheep / pen separately. With the older
|
||||||
|
non-holonomic version, ``omega`` from the teacher fought against the
|
||||||
|
forward-only kinematics and dropped success rates instead of helping.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
vx, vy : desired world-frame velocity intent in [-1, 1] (clamped on
|
||||||
|
magnitude to ≤ 1)
|
||||||
|
omega : desired yaw rate intent in [-1, 1]
|
||||||
|
h : current heading (rad), 0 = +x
|
||||||
|
max_linear : max linear speed (m/s)
|
||||||
|
wheel_radius : wheel radius (m)
|
||||||
|
lx, ly : half axle distances (m)
|
||||||
|
max_wheel_omega : wheel angular velocity clamp (rad/s)
|
||||||
|
k_turn : unused (kept for signature compatibility)
|
||||||
|
wheel_base : unused (kept for signature compatibility)
|
||||||
|
|
||||||
|
Returns (w_fl, w_fr, w_rl, w_rr).
|
||||||
|
"""
|
||||||
|
# Clamp the action magnitude in the (vx, vy) unit disk.
|
||||||
|
norm = math.hypot(vx, vy)
|
||||||
|
if norm > 1.0:
|
||||||
|
vx /= norm
|
||||||
|
vy /= norm
|
||||||
|
|
||||||
|
# World-frame velocity → body-frame velocity (rotate by -h).
|
||||||
|
vx_world = vx * max_linear
|
||||||
|
vy_world = vy * max_linear
|
||||||
|
cos_h = math.cos(h)
|
||||||
|
sin_h = math.sin(h)
|
||||||
|
vx_body = cos_h * vx_world + sin_h * vy_world
|
||||||
|
vy_body = -sin_h * vx_world + cos_h * vy_world
|
||||||
|
|
||||||
|
# Yaw rate: omega ∈ [-1, 1] maps to ± max_linear / (lx + ly) — same
|
||||||
|
# peak yaw as the old "omega_extra" channel, but used directly
|
||||||
|
# rather than added to a heading-tracker.
|
||||||
|
yaw_max = max_linear / max(lx + ly, 1e-6)
|
||||||
|
omega_rad = omega * yaw_max
|
||||||
|
|
||||||
|
if abs(vx_body) < 1e-3 and abs(vy_body) < 1e-3 and abs(omega_rad) < 1e-3:
|
||||||
|
return 0.0, 0.0, 0.0, 0.0
|
||||||
|
|
||||||
|
return mecanum_inverse(
|
||||||
|
vx_body, vy_body, omega_rad,
|
||||||
|
wheel_radius, lx, ly, max_wheel_omega,
|
||||||
|
)
|
||||||
@@ -0,0 +1,181 @@
|
|||||||
|
"""Sheep flocking dynamics — Strömbom 2014 / Reynolds 1987.
|
||||||
|
|
||||||
|
Per-sheep behavioural step used by both the Webots sheep controller
|
||||||
|
and the training environment. Each step a force stack is summed:
|
||||||
|
|
||||||
|
flee — quadratic ramp away from dog within FLEE_DIST
|
||||||
|
(Strömbom 2014, term ρa)
|
||||||
|
cohesion — drift toward local centre of mass of peers within
|
||||||
|
COHESION_DIST (Strömbom 2014, term c). Weight is
|
||||||
|
higher while fleeing — fear-induced cohesion.
|
||||||
|
separation — short-range inverse-distance repulsion from peers
|
||||||
|
(Strömbom 2014 term α; Reynolds 1987)
|
||||||
|
wander — small persistent drift (Strömbom 2014 noise term ε)
|
||||||
|
|
||||||
|
Walls, the south-wall gate column, and in-pen containment are
|
||||||
|
environment-specific additions for the fenced Webots field.
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
- Strömbom et al. (2014). "Solving the shepherding problem: heuristics
|
||||||
|
for herding autonomous, interacting agents." J R Soc Interface 11.
|
||||||
|
- Reynolds (1987). "Flocks, herds and schools: A distributed
|
||||||
|
behavioural model." SIGGRAPH '87.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
|
||||||
|
from herding.world.geometry import (
|
||||||
|
FIELD_SHAPE, FIELD_ROUND_R,
|
||||||
|
FIELD_X, FIELD_Y,
|
||||||
|
PEN_X, PEN_Y,
|
||||||
|
GATE_X, GATE_Y,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Speeds are in wheel rad/s (motor units); m/s = speed * SHEEP_WHEEL_RADIUS.
|
||||||
|
MAX_SPEED = 22.0
|
||||||
|
FLEE_SPEED = 20.0
|
||||||
|
WANDER_SPEED = 3.0
|
||||||
|
|
||||||
|
WALL_MARGIN = 5.0
|
||||||
|
WALL_HARD_MARGIN = 1.0
|
||||||
|
WALL_HARD_GAIN = 50.0
|
||||||
|
|
||||||
|
FLEE_DIST = 7.0
|
||||||
|
SEPARATION_DIST = 2.5
|
||||||
|
COHESION_DIST = 12.0
|
||||||
|
|
||||||
|
PEN_MARGIN = 0.8
|
||||||
|
|
||||||
|
|
||||||
|
def _peers_iter(peers):
|
||||||
|
"""Accept either a {name: (x, y)} dict or an iterable of (x, y) tuples."""
|
||||||
|
if isinstance(peers, dict):
|
||||||
|
return list(peers.values())
|
||||||
|
return list(peers)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_heading_speed(x, y, penned, dog_xy, peers, wander_angle, rng=None):
|
||||||
|
"""Return ``(heading, speed, new_wander_angle)`` for one sheep step.
|
||||||
|
|
||||||
|
``speed`` is in wheel rad/s, bounded by ``[WANDER_SPEED, FLEE_SPEED]``.
|
||||||
|
``heading`` is the world-frame target heading (atan2 convention).
|
||||||
|
``rng`` is an optional ``random.Random`` used for wander jitter; if
|
||||||
|
``None`` uses the module's global ``random``.
|
||||||
|
"""
|
||||||
|
fx, fy = 0.0, 0.0
|
||||||
|
peer_list = _peers_iter(peers)
|
||||||
|
rnd = rng if rng is not None else random
|
||||||
|
|
||||||
|
if penned:
|
||||||
|
# Pen containment: bounce off all four pen walls.
|
||||||
|
pm = PEN_MARGIN
|
||||||
|
if x < PEN_X[0] + pm:
|
||||||
|
fx += ((PEN_X[0] + pm - x) / pm) * 15.0
|
||||||
|
if x > PEN_X[1] - pm:
|
||||||
|
fx -= ((x - (PEN_X[1] - pm)) / pm) * 15.0
|
||||||
|
if y < PEN_Y[0] + pm:
|
||||||
|
fy += ((PEN_Y[0] + pm - y) / pm) * 15.0
|
||||||
|
if y > PEN_Y[1] - pm:
|
||||||
|
fy -= ((y - (PEN_Y[1] - pm)) / pm) * 15.0
|
||||||
|
|
||||||
|
# Mild peer separation so penned sheep don't crowd one corner.
|
||||||
|
for px, py in peer_list:
|
||||||
|
dx, dy = px - x, py - y
|
||||||
|
d = math.hypot(dx, dy)
|
||||||
|
if 0.05 < d < SEPARATION_DIST:
|
||||||
|
push = (SEPARATION_DIST - d) / d
|
||||||
|
fx -= (dx / d) * push * 2.5
|
||||||
|
fy -= (dy / d) * push * 2.5
|
||||||
|
|
||||||
|
if rnd.random() < 0.02:
|
||||||
|
wander_angle += rnd.uniform(-0.6, 0.6)
|
||||||
|
fx += math.cos(wander_angle) * 0.5
|
||||||
|
fy += math.sin(wander_angle) * 0.5
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Free-roaming sheep in the field.
|
||||||
|
fleeing = False
|
||||||
|
if dog_xy is not None:
|
||||||
|
ddx = dog_xy[0] - x
|
||||||
|
ddy = dog_xy[1] - y
|
||||||
|
dist = math.hypot(ddx, ddy)
|
||||||
|
if 0.01 < dist < FLEE_DIST:
|
||||||
|
fleeing = True
|
||||||
|
t = 1.0 - dist / FLEE_DIST
|
||||||
|
s = t * t * 20.0
|
||||||
|
fx -= (ddx / dist) * s
|
||||||
|
fy -= (ddy / dist) * s
|
||||||
|
|
||||||
|
# Cohesion: drift toward the local CoM of peers within
|
||||||
|
# COHESION_DIST. Stronger while fleeing — fear-induced
|
||||||
|
# cohesion keeps the flock together through the gate.
|
||||||
|
cx, cy, cn = 0.0, 0.0, 0
|
||||||
|
for px, py in peer_list:
|
||||||
|
d = math.hypot(px - x, py - y)
|
||||||
|
if 0.3 < d < COHESION_DIST:
|
||||||
|
cx += px
|
||||||
|
cy += py
|
||||||
|
cn += 1
|
||||||
|
if cn > 0:
|
||||||
|
w = 3.0 if fleeing else 1.0
|
||||||
|
fx += (cx / cn - x) * w
|
||||||
|
fy += (cy / cn - y) * w
|
||||||
|
|
||||||
|
# Separation — inverse-distance push from peers.
|
||||||
|
for px, py in peer_list:
|
||||||
|
ddx, ddy = px - x, py - y
|
||||||
|
d = math.hypot(ddx, ddy)
|
||||||
|
if 0.05 < d < SEPARATION_DIST:
|
||||||
|
push = (SEPARATION_DIST - d) / d
|
||||||
|
fx -= (ddx / d) * push * 2.5
|
||||||
|
fy -= (ddy / d) * push * 2.5
|
||||||
|
|
||||||
|
# Wall soft repulsion.
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
r = math.hypot(x, y)
|
||||||
|
wall_d = FIELD_ROUND_R - r
|
||||||
|
in_gate_col = (GATE_X[0] <= x <= GATE_X[1]
|
||||||
|
and y < GATE_Y + WALL_MARGIN)
|
||||||
|
if wall_d < WALL_MARGIN and r > 1e-6 and not in_gate_col:
|
||||||
|
gain = ((WALL_MARGIN - wall_d) / WALL_MARGIN) * 6.0
|
||||||
|
fx -= (x / r) * gain
|
||||||
|
fy -= (y / r) * gain
|
||||||
|
# Hard escape band.
|
||||||
|
if wall_d < WALL_HARD_MARGIN and not in_gate_col:
|
||||||
|
hgain = WALL_HARD_GAIN * (1.0 - wall_d / WALL_HARD_MARGIN)
|
||||||
|
fx -= (x / r) * hgain
|
||||||
|
fy -= (y / r) * hgain
|
||||||
|
else:
|
||||||
|
# Rectangular: south wall absent inside the gate column.
|
||||||
|
if x < FIELD_X[0] + WALL_MARGIN:
|
||||||
|
fx += ((FIELD_X[0] + WALL_MARGIN - x) / WALL_MARGIN) * 6.0
|
||||||
|
if x > FIELD_X[1] - WALL_MARGIN:
|
||||||
|
fx -= ((x - (FIELD_X[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||||
|
if y > FIELD_Y[1] - WALL_MARGIN:
|
||||||
|
fy -= ((y - (FIELD_Y[1] - WALL_MARGIN)) / WALL_MARGIN) * 6.0
|
||||||
|
if y < FIELD_Y[0] + WALL_MARGIN and not (GATE_X[0] <= x <= GATE_X[1]):
|
||||||
|
fy += ((FIELD_Y[0] + WALL_MARGIN - y) / WALL_MARGIN) * 6.0
|
||||||
|
|
||||||
|
# Hard escape band — overrides everything else near a wall.
|
||||||
|
m, g = WALL_HARD_MARGIN, WALL_HARD_GAIN
|
||||||
|
if x - FIELD_X[0] < m:
|
||||||
|
fx = max(fx, g * (1.0 - (x - FIELD_X[0]) / m))
|
||||||
|
if FIELD_X[1] - x < m:
|
||||||
|
fx = min(fx, -g * (1.0 - (FIELD_X[1] - x) / m))
|
||||||
|
if FIELD_Y[1] - y < m:
|
||||||
|
fy = min(fy, -g * (1.0 - (FIELD_Y[1] - y) / m))
|
||||||
|
if (y - FIELD_Y[0] < m) and not (GATE_X[0] <= x <= GATE_X[1]):
|
||||||
|
fy = max(fy, g * (1.0 - (y - FIELD_Y[0]) / m))
|
||||||
|
|
||||||
|
if not fleeing:
|
||||||
|
if rnd.random() < 0.02:
|
||||||
|
wander_angle += rnd.uniform(-0.6, 0.6)
|
||||||
|
fx += math.cos(wander_angle) * 0.5
|
||||||
|
fy += math.sin(wander_angle) * 0.5
|
||||||
|
|
||||||
|
heading = math.atan2(fy, fx)
|
||||||
|
mag = math.hypot(fx, fy)
|
||||||
|
speed = max(WANDER_SPEED, min(FLEE_SPEED, mag * 3.0))
|
||||||
|
return heading, speed, wander_angle
|
||||||
@@ -0,0 +1,215 @@
|
|||||||
|
"""World geometry and robot specs.
|
||||||
|
|
||||||
|
Coordinates are metres; (0, 0) is the field centre, +x east, +y north.
|
||||||
|
These constants mirror ``worlds/field.wbt`` and the proto files — if
|
||||||
|
the world changes, this file is the single point of update.
|
||||||
|
|
||||||
|
field (rectangular)
|
||||||
|
+-----------+
|
||||||
|
| |
|
||||||
|
| ...... |
|
||||||
|
+---||||----+ y = -15 (south wall, 3 m gate at x in [10, 13])
|
||||||
|
||||
|
||||||
|
|pen| y in [-22, -15]
|
||||||
|
+---+
|
||||||
|
|
||||||
|
field_round (circular, R = 15 m)
|
||||||
|
.---.
|
||||||
|
/ ... \\
|
||||||
|
| ..... | gate at south, x in [-1.83, 1.83]
|
||||||
|
\\ ... /
|
||||||
|
'-+-' pen y in [-22, -15]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import math
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Field shape selection — controlled by HERDING_WORLD env var at runtime.
|
||||||
|
# Defaults to "field" (rectangular). The launcher writes it into the
|
||||||
|
# runtime cfg so the controller can pick it up too.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
FIELD_SHAPE = (os.environ.get("HERDING_WORLD", "field")).lower()
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Rectangular field (field.wbt) ====================
|
||||||
|
FIELD_X = (-15.0, 15.0)
|
||||||
|
FIELD_Y = (-15.0, 15.0)
|
||||||
|
FIELD_INSIDE_MARGIN = 0.5
|
||||||
|
|
||||||
|
# Pen (external, south of the field)
|
||||||
|
PEN_X = (10.0, 13.0)
|
||||||
|
PEN_Y = (-22.0, -15.0)
|
||||||
|
PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
|
||||||
|
PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
|
||||||
|
|
||||||
|
# Gate (hole in the south wall)
|
||||||
|
GATE_X = PEN_X
|
||||||
|
GATE_Y = -15.0
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Round field (field_round.wbt) ====================
|
||||||
|
FIELD_ROUND_R = 15.0
|
||||||
|
FIELD_ROUND_PEN_X = (-1.5, 1.5)
|
||||||
|
FIELD_ROUND_PEN_Y = (-22.0, -15.0)
|
||||||
|
FIELD_ROUND_PEN_CENTER = (
|
||||||
|
0.5 * (FIELD_ROUND_PEN_X[0] + FIELD_ROUND_PEN_X[1]),
|
||||||
|
0.5 * (FIELD_ROUND_PEN_Y[0] + FIELD_ROUND_PEN_Y[1]),
|
||||||
|
)
|
||||||
|
FIELD_ROUND_PEN_ENTRY = (0.0, -15.0)
|
||||||
|
FIELD_ROUND_GATE_X = FIELD_ROUND_PEN_X
|
||||||
|
FIELD_ROUND_GATE_Y = -15.0
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Active geometry (resolved at import) ===============
|
||||||
|
# Rectangular defaults are already assigned above. Override for round.
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
PEN_X = FIELD_ROUND_PEN_X
|
||||||
|
PEN_Y = FIELD_ROUND_PEN_Y
|
||||||
|
PEN_CENTER = FIELD_ROUND_PEN_CENTER
|
||||||
|
PEN_ENTRY = FIELD_ROUND_PEN_ENTRY
|
||||||
|
GATE_X = FIELD_ROUND_GATE_X
|
||||||
|
GATE_Y = FIELD_ROUND_GATE_Y
|
||||||
|
|
||||||
|
|
||||||
|
def configure_from_args(argv: list[str] | None = None) -> str:
|
||||||
|
"""Parse ``--world`` from *argv* (or ``sys.argv[1:]``), call :func:`configure`,
|
||||||
|
and set ``HERDING_WORLD`` in the environment.
|
||||||
|
|
||||||
|
Returns the resolved world name (``"field"`` or ``"field_round"``).
|
||||||
|
|
||||||
|
Call this at the very top of any script that accepts a ``--world`` flag,
|
||||||
|
*before* importing anything from ``herding.*`` that depends on field
|
||||||
|
geometry. This centralises the pre-parse logic that was previously
|
||||||
|
duplicated in ``bc/collect.py``, ``rl/train.py``, and ``eval.py``::
|
||||||
|
|
||||||
|
from herding.world.geometry import configure_from_args
|
||||||
|
configure_from_args() # reads sys.argv
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys as _sys
|
||||||
|
args = argv if argv is not None else _sys.argv[1:]
|
||||||
|
world = "field"
|
||||||
|
for i, a in enumerate(args):
|
||||||
|
if a == "--world" and i + 1 < len(args):
|
||||||
|
world = args[i + 1]
|
||||||
|
break
|
||||||
|
if a.startswith("--world="):
|
||||||
|
world = a.split("=", 1)[1]
|
||||||
|
break
|
||||||
|
configure(world)
|
||||||
|
os.environ["HERDING_WORLD"] = world
|
||||||
|
return world
|
||||||
|
|
||||||
|
|
||||||
|
def configure(shape: str) -> None:
|
||||||
|
"""Switch the active field geometry at runtime.
|
||||||
|
|
||||||
|
Call this **before** importing any other ``herding.*`` module that
|
||||||
|
depends on the constants below (flocking_sim, lidar_sim, obs, etc.).
|
||||||
|
The import-time env-var path (``HERDING_WORLD``) still works; this
|
||||||
|
function is for scripts that need to choose the world via a CLI flag.
|
||||||
|
"""
|
||||||
|
global FIELD_SHAPE, PEN_X, PEN_Y, PEN_CENTER, PEN_ENTRY, GATE_X, GATE_Y
|
||||||
|
shape = shape.lower()
|
||||||
|
FIELD_SHAPE = shape
|
||||||
|
if shape == "field_round":
|
||||||
|
PEN_X = FIELD_ROUND_PEN_X
|
||||||
|
PEN_Y = FIELD_ROUND_PEN_Y
|
||||||
|
PEN_CENTER = FIELD_ROUND_PEN_CENTER
|
||||||
|
PEN_ENTRY = FIELD_ROUND_PEN_ENTRY
|
||||||
|
GATE_X = FIELD_ROUND_GATE_X
|
||||||
|
GATE_Y = FIELD_ROUND_GATE_Y
|
||||||
|
else:
|
||||||
|
PEN_X = (10.0, 13.0)
|
||||||
|
PEN_Y = (-22.0, -15.0)
|
||||||
|
PEN_CENTER = (0.5 * (PEN_X[0] + PEN_X[1]), 0.5 * (PEN_Y[0] + PEN_Y[1]))
|
||||||
|
PEN_ENTRY = (0.5 * (PEN_X[0] + PEN_X[1]), -15.0)
|
||||||
|
GATE_X = PEN_X
|
||||||
|
GATE_Y = -15.0
|
||||||
|
|
||||||
|
# Dog spec — protos/ShepherdDog.proto
|
||||||
|
DOG_WHEEL_RADIUS = 0.038 # m
|
||||||
|
DOG_WHEEL_BASE = 0.28 # m, axle-to-axle
|
||||||
|
DOG_MAX_WHEEL_OMEGA = 70.0 # rad/s
|
||||||
|
DOG_MAX_LINEAR = DOG_WHEEL_RADIUS * DOG_MAX_WHEEL_OMEGA # ≈ 2.66 m/s
|
||||||
|
|
||||||
|
# Dog mecanum spec — 4-wheel omnidirectional layout
|
||||||
|
DOG_WHEEL_BASE_X = 0.28 # m, front-to-back axle distance
|
||||||
|
DOG_WHEEL_BASE_Y = 0.28 # m, left-to-right axle distance
|
||||||
|
|
||||||
|
# Sheep spec — protos/Sheep.proto
|
||||||
|
SHEEP_WHEEL_RADIUS = 0.031 # m
|
||||||
|
SHEEP_WHEEL_BASE = 0.20 # m
|
||||||
|
SHEEP_MAX_WHEEL_OMEGA = 25.0 # rad/s
|
||||||
|
SHEEP_MAX_LINEAR = SHEEP_WHEEL_RADIUS * SHEEP_MAX_WHEEL_OMEGA # ≈ 0.78 m/s
|
||||||
|
|
||||||
|
WEBOTS_DT = 0.016 # seconds (matches WorldInfo.basicTimeStep)
|
||||||
|
|
||||||
|
# Virtual south wall — env and controller both keep the dog north of this.
|
||||||
|
DOG_SOUTH_LIMIT = -14.5
|
||||||
|
|
||||||
|
MAX_SHEEP = 10
|
||||||
|
|
||||||
|
|
||||||
|
def in_pen(x: float, y: float) -> bool:
|
||||||
|
"""True if (x, y) lies inside the external pen rectangle."""
|
||||||
|
return PEN_X[0] < x < PEN_X[1] and PEN_Y[0] < y < PEN_Y[1]
|
||||||
|
|
||||||
|
|
||||||
|
def in_field(x: float, y: float, margin: float = 0.0) -> bool:
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
r = FIELD_ROUND_R - margin
|
||||||
|
return x * x + y * y <= r * r
|
||||||
|
return (FIELD_X[0] + margin <= x <= FIELD_X[1] - margin
|
||||||
|
and FIELD_Y[0] + margin <= y <= FIELD_Y[1] - margin)
|
||||||
|
|
||||||
|
|
||||||
|
def in_gate_corridor(x: float, y: float, margin: float = 0.0) -> bool:
|
||||||
|
"""True if (x, y) lies in the column of the gate (between field and pen)."""
|
||||||
|
return (GATE_X[0] - margin <= x <= GATE_X[1] + margin
|
||||||
|
and PEN_Y[0] - margin <= y <= GATE_Y + margin)
|
||||||
|
|
||||||
|
|
||||||
|
def is_penned(x: float, y: float, latch_margin: float = 0.2) -> bool:
|
||||||
|
"""True iff (x, y) is in the gate column and south of the gate line."""
|
||||||
|
return (GATE_X[0] - latch_margin <= x <= GATE_X[1] + latch_margin
|
||||||
|
and y <= GATE_Y)
|
||||||
|
|
||||||
|
|
||||||
|
def distance_to_pen_entry(x: float, y: float) -> float:
|
||||||
|
return math.hypot(x - PEN_ENTRY[0], y - PEN_ENTRY[1])
|
||||||
|
|
||||||
|
|
||||||
|
def distance_to_wall(x: float, y: float) -> float:
|
||||||
|
"""Shortest distance from (x, y) to the nearest field wall.
|
||||||
|
|
||||||
|
For a rectangular field this is the minimum Manhattan distance to the
|
||||||
|
four bounding walls. For a round field it is ``R - sqrt(x²+y²)``.
|
||||||
|
Returns a negative value if the point is outside the field.
|
||||||
|
"""
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
return FIELD_ROUND_R - math.hypot(x, y)
|
||||||
|
return min(
|
||||||
|
x - FIELD_X[0], FIELD_X[1] - x,
|
||||||
|
y - FIELD_Y[0], FIELD_Y[1] - y,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def clip_to_field(x: float, y: float, margin: float = 0.2) -> tuple[float, float]:
|
||||||
|
"""Clip (x, y) inside the field boundary with a small margin.
|
||||||
|
|
||||||
|
For round fields the point is projected radially inward if it exceeds
|
||||||
|
the circular boundary.
|
||||||
|
"""
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
r = math.hypot(x, y)
|
||||||
|
limit = FIELD_ROUND_R - margin
|
||||||
|
if r > limit and r > 1e-6:
|
||||||
|
scale = limit / r
|
||||||
|
return x * scale, y * scale
|
||||||
|
return x, y
|
||||||
|
return (
|
||||||
|
max(FIELD_X[0] + margin, min(FIELD_X[1] - margin, x)),
|
||||||
|
max(FIELD_Y[0] + margin, min(FIELD_Y[1] - margin, y)),
|
||||||
|
)
|
||||||
@@ -138,7 +138,8 @@ PROTO ShepherdDog [
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
# Lidar — front-facing 140° FOV, mounted at snout tip
|
# Lidar — front-facing 140° FOV (canonical hardware spec).
|
||||||
|
# See ShepherdDog360.proto for the 360° robustness-ablation variant.
|
||||||
Lidar {
|
Lidar {
|
||||||
translation 0.05 0 0.01
|
translation 0.05 0 0.01
|
||||||
name "lidar"
|
name "lidar"
|
||||||
|
|||||||
@@ -0,0 +1,691 @@
|
|||||||
|
#VRML_SIM R2025a utf8
|
||||||
|
# Shepherd Dog Robot - wheeled base with dog character on top, tail-mounted 360 lidar
|
||||||
|
|
||||||
|
EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/appearances/protos/TireRubber.proto"
|
||||||
|
|
||||||
|
PROTO ShepherdDog360 [
|
||||||
|
field SFVec3f translation 0 0 0
|
||||||
|
field SFRotation rotation 0 1 0 0
|
||||||
|
field SFString name "ShepherdDog"
|
||||||
|
field SFString controller "shepherd_dog"
|
||||||
|
field MFString controllerArgs []
|
||||||
|
field SFString customData ""
|
||||||
|
field SFBool supervisor FALSE
|
||||||
|
field SFBool synchronization TRUE
|
||||||
|
]
|
||||||
|
{
|
||||||
|
Robot {
|
||||||
|
translation IS translation
|
||||||
|
rotation IS rotation
|
||||||
|
name IS name
|
||||||
|
controller IS controller
|
||||||
|
controllerArgs IS controllerArgs
|
||||||
|
customData IS customData
|
||||||
|
supervisor IS supervisor
|
||||||
|
synchronization IS synchronization
|
||||||
|
children [
|
||||||
|
# ========== CHASSIS / BASE ==========
|
||||||
|
DEF CHASSIS Transform {
|
||||||
|
translation 0 0 0.05
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance DEF CHASSIS_APP PBRAppearance {
|
||||||
|
baseColor 0.2 0.2 0.2
|
||||||
|
roughness 0.6
|
||||||
|
metalness 0.3
|
||||||
|
}
|
||||||
|
geometry Box {
|
||||||
|
size 0.32 0.16 0.06
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
# Front slope
|
||||||
|
DEF CHASSIS_FRONT Transform {
|
||||||
|
translation 0.14 0 0.07
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance USE CHASSIS_APP
|
||||||
|
geometry Box {
|
||||||
|
size 0.06 0.14 0.04
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
# Rear slope
|
||||||
|
DEF CHASSIS_REAR Transform {
|
||||||
|
translation -0.14 0 0.07
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance USE CHASSIS_APP
|
||||||
|
geometry Box {
|
||||||
|
size 0.06 0.14 0.04
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== DOG BODY on top of chassis ==========
|
||||||
|
DEF BODY Transform {
|
||||||
|
translation 0 0 0.11
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance DEF FUR_BROWN PBRAppearance {
|
||||||
|
baseColor 0.55 0.35 0.17
|
||||||
|
roughness 0.85
|
||||||
|
metalness 0.0
|
||||||
|
}
|
||||||
|
geometry Box {
|
||||||
|
size 0.30 0.16 0.08
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== CHEST ==========
|
||||||
|
DEF CHEST Transform {
|
||||||
|
translation 0.12 0 0.11
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance DEF FUR_CREAM PBRAppearance {
|
||||||
|
baseColor 0.85 0.72 0.55
|
||||||
|
roughness 0.85
|
||||||
|
metalness 0.0
|
||||||
|
}
|
||||||
|
geometry Box {
|
||||||
|
size 0.08 0.18 0.08
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== HEAD ==========
|
||||||
|
DEF HEAD Transform {
|
||||||
|
translation 0.20 0 0.17
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance USE FUR_BROWN
|
||||||
|
geometry Box {
|
||||||
|
size 0.10 0.12 0.09
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== SNOUT + LIDAR ==========
|
||||||
|
DEF SNOUT Transform {
|
||||||
|
translation 0.28 0 0.155
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance USE FUR_CREAM
|
||||||
|
geometry Box {
|
||||||
|
size 0.08 0.07 0.05
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# Nose
|
||||||
|
Transform {
|
||||||
|
translation 0.04 0 0.01
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.1 0.1 0.1
|
||||||
|
roughness 0.4
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.013
|
||||||
|
subdivision 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
# Lidar — 360° FOV (was 140°/2.44 rad). Wider FOV closes the
|
||||||
|
# FOV-loss perception gap so policies trained on 360° gym sim
|
||||||
|
# transfer cleanly without retraining.
|
||||||
|
Lidar {
|
||||||
|
translation 0.05 0 0.01
|
||||||
|
name "lidar"
|
||||||
|
horizontalResolution 360
|
||||||
|
fieldOfView 6.28
|
||||||
|
numberOfLayers 1
|
||||||
|
minRange 0.10
|
||||||
|
maxRange 15.0
|
||||||
|
noise 0.005
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== LEFT EAR ==========
|
||||||
|
DEF LEFT_EAR HingeJoint {
|
||||||
|
jointParameters HingeJointParameters {
|
||||||
|
axis 0 0 1
|
||||||
|
anchor 0.19 0.055 0.21
|
||||||
|
}
|
||||||
|
device [
|
||||||
|
RotationalMotor {
|
||||||
|
name "left ear motor"
|
||||||
|
maxVelocity 10.0
|
||||||
|
minPosition -0.5
|
||||||
|
maxPosition 0.5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
endPoint Solid {
|
||||||
|
translation 0.19 0.055 0.21
|
||||||
|
rotation 0 0 1 0.2
|
||||||
|
name "left ear"
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance DEF FUR_DARK PBRAppearance {
|
||||||
|
baseColor 0.35 0.20 0.10
|
||||||
|
roughness 0.85
|
||||||
|
metalness 0.0
|
||||||
|
}
|
||||||
|
geometry Box {
|
||||||
|
size 0.035 0.025 0.06
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
boundingObject Box {
|
||||||
|
size 0.035 0.025 0.06
|
||||||
|
}
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 0.005
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== RIGHT EAR ==========
|
||||||
|
DEF RIGHT_EAR HingeJoint {
|
||||||
|
jointParameters HingeJointParameters {
|
||||||
|
axis 0 0 1
|
||||||
|
anchor 0.19 -0.055 0.21
|
||||||
|
}
|
||||||
|
device [
|
||||||
|
RotationalMotor {
|
||||||
|
name "right ear motor"
|
||||||
|
maxVelocity 10.0
|
||||||
|
minPosition -0.5
|
||||||
|
maxPosition 0.5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
endPoint Solid {
|
||||||
|
translation 0.19 -0.055 0.21
|
||||||
|
rotation 0 0 -1 0.2
|
||||||
|
name "right ear"
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance USE FUR_DARK
|
||||||
|
geometry Box {
|
||||||
|
size 0.035 0.025 0.06
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
boundingObject Box {
|
||||||
|
size 0.035 0.025 0.06
|
||||||
|
}
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 0.005
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== EYES ==========
|
||||||
|
DEF LEFT_EYE Transform {
|
||||||
|
translation 0.25 0.05 0.19
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.95 0.95 0.95
|
||||||
|
roughness 0.3
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.016
|
||||||
|
subdivision 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# Pupil
|
||||||
|
Transform {
|
||||||
|
translation 0.012 0 0.004
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.1 0.1 0.1
|
||||||
|
roughness 0.2
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.009
|
||||||
|
subdivision 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
DEF RIGHT_EYE Transform {
|
||||||
|
translation 0.25 -0.05 0.19
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.95 0.95 0.95
|
||||||
|
roughness 0.3
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.016
|
||||||
|
subdivision 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# Pupil
|
||||||
|
Transform {
|
||||||
|
translation 0.012 0 0.004
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.1 0.1 0.1
|
||||||
|
roughness 0.2
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.009
|
||||||
|
subdivision 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== COLLAR ==========
|
||||||
|
DEF COLLAR Transform {
|
||||||
|
translation 0.16 0 0.125
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.8 0.1 0.1
|
||||||
|
roughness 0.5
|
||||||
|
}
|
||||||
|
geometry Cylinder {
|
||||||
|
height 0.02
|
||||||
|
radius 0.095
|
||||||
|
subdivision 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# ID tag
|
||||||
|
Transform {
|
||||||
|
translation 0 0.10 0
|
||||||
|
rotation 1 0 0 1.5708
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.75 0.75 0.0
|
||||||
|
metalness 0.8
|
||||||
|
roughness 0.2
|
||||||
|
}
|
||||||
|
geometry Cylinder {
|
||||||
|
height 0.003
|
||||||
|
radius 0.018
|
||||||
|
subdivision 8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== TAIL (lidar inside tail tip ball) ==========
|
||||||
|
DEF TAIL HingeJoint {
|
||||||
|
jointParameters HingeJointParameters {
|
||||||
|
axis 0 1 0
|
||||||
|
anchor -0.15 0 0.11
|
||||||
|
}
|
||||||
|
device [
|
||||||
|
RotationalMotor {
|
||||||
|
name "tail motor"
|
||||||
|
maxVelocity 5.0
|
||||||
|
minPosition -1.0
|
||||||
|
maxPosition 1.0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
endPoint Solid {
|
||||||
|
translation -0.17 0 0.13
|
||||||
|
name "tail solid"
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance USE FUR_BROWN
|
||||||
|
geometry Capsule {
|
||||||
|
height 0.12
|
||||||
|
radius 0.013
|
||||||
|
top FALSE
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# Tail tip ball
|
||||||
|
Transform {
|
||||||
|
translation 0 0 0.08
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.2 0.2 0.2
|
||||||
|
roughness 0.3
|
||||||
|
metalness 0.6
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.028
|
||||||
|
subdivision 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
boundingObject Group {
|
||||||
|
children [
|
||||||
|
Capsule {
|
||||||
|
height 0.12
|
||||||
|
radius 0.013
|
||||||
|
}
|
||||||
|
Transform {
|
||||||
|
translation 0 0 0.08
|
||||||
|
children [
|
||||||
|
Sphere {
|
||||||
|
radius 0.028
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 0.08
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== RIGHT AXLE ARM (horizontal bar from chassis to wheel) ==========
|
||||||
|
DEF RIGHT_AXLE Transform {
|
||||||
|
translation 0 -0.115 0.038
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.5 0.5 0.5
|
||||||
|
roughness 0.3
|
||||||
|
metalness 0.8
|
||||||
|
}
|
||||||
|
geometry Box {
|
||||||
|
size 0.02 0.08 0.02
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== LEFT AXLE ARM ==========
|
||||||
|
DEF LEFT_AXLE Transform {
|
||||||
|
translation 0 0.115 0.038
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.5 0.5 0.5
|
||||||
|
roughness 0.3
|
||||||
|
metalness 0.8
|
||||||
|
}
|
||||||
|
geometry Box {
|
||||||
|
size 0.02 0.08 0.02
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== RIGHT WHEEL ==========
|
||||||
|
DEF RIGHT_WHEEL_JOINT HingeJoint {
|
||||||
|
jointParameters HingeJointParameters {
|
||||||
|
axis 0 1 0
|
||||||
|
anchor 0 -0.14 0.038
|
||||||
|
}
|
||||||
|
device [
|
||||||
|
RotationalMotor {
|
||||||
|
name "right wheel motor"
|
||||||
|
maxVelocity 70.0
|
||||||
|
maxTorque 20.0
|
||||||
|
}
|
||||||
|
PositionSensor {
|
||||||
|
name "right wheel sensor"
|
||||||
|
resolution 0.00628
|
||||||
|
}
|
||||||
|
]
|
||||||
|
endPoint Solid {
|
||||||
|
translation 0 -0.14 0.038
|
||||||
|
rotation 0 -1 0 1.570796
|
||||||
|
children [
|
||||||
|
DEF WHEEL_VIS Pose {
|
||||||
|
rotation 1 0 0 -1.5708
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.15 0.15 0.15
|
||||||
|
roughness 0.4
|
||||||
|
metalness 0.5
|
||||||
|
}
|
||||||
|
geometry Cylinder {
|
||||||
|
height 0.016
|
||||||
|
radius 0.035
|
||||||
|
subdivision 24
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.6 0.6 0.6
|
||||||
|
roughness 0.3
|
||||||
|
metalness 0.7
|
||||||
|
}
|
||||||
|
geometry Cylinder {
|
||||||
|
height 0.018
|
||||||
|
radius 0.014
|
||||||
|
subdivision 12
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Shape {
|
||||||
|
appearance TireRubber {
|
||||||
|
textureTransform TextureTransform {
|
||||||
|
scale 1.5 0.6
|
||||||
|
}
|
||||||
|
type "bike"
|
||||||
|
}
|
||||||
|
geometry Cylinder {
|
||||||
|
height 0.022
|
||||||
|
radius 0.038
|
||||||
|
subdivision 24
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
name "right wheel"
|
||||||
|
boundingObject Pose {
|
||||||
|
rotation 1 0 0 -1.5708
|
||||||
|
children [
|
||||||
|
Cylinder {
|
||||||
|
height 0.022
|
||||||
|
radius 0.038
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 0.06
|
||||||
|
centerOfMass [
|
||||||
|
0 0 0
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== LEFT WHEEL ==========
|
||||||
|
DEF LEFT_WHEEL_JOINT HingeJoint {
|
||||||
|
jointParameters HingeJointParameters {
|
||||||
|
axis 0 1 0
|
||||||
|
anchor 0 0.14 0.038
|
||||||
|
}
|
||||||
|
device [
|
||||||
|
RotationalMotor {
|
||||||
|
name "left wheel motor"
|
||||||
|
maxVelocity 70.0
|
||||||
|
maxTorque 20.0
|
||||||
|
}
|
||||||
|
PositionSensor {
|
||||||
|
name "left wheel sensor"
|
||||||
|
resolution 0.00628
|
||||||
|
}
|
||||||
|
]
|
||||||
|
endPoint Solid {
|
||||||
|
translation 0 0.14 0.038
|
||||||
|
rotation 0.707105 0 0.707109 -3.14159
|
||||||
|
children [
|
||||||
|
USE WHEEL_VIS
|
||||||
|
]
|
||||||
|
name "left wheel"
|
||||||
|
boundingObject Pose {
|
||||||
|
rotation 1 0 0 -1.5708
|
||||||
|
children [
|
||||||
|
Cylinder {
|
||||||
|
height 0.022
|
||||||
|
radius 0.038
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 0.12
|
||||||
|
centerOfMass [
|
||||||
|
0 0 0
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== FRONT CASTER ==========
|
||||||
|
DEF FRONT_CASTER BallJoint {
|
||||||
|
jointParameters BallJointParameters {
|
||||||
|
anchor 0.14 0 0.02
|
||||||
|
}
|
||||||
|
endPoint Solid {
|
||||||
|
translation 0.14 0 0.02
|
||||||
|
name "front caster"
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.2 0.2 0.2
|
||||||
|
roughness 0.3
|
||||||
|
metalness 0.5
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.02
|
||||||
|
subdivision 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
boundingObject Sphere {
|
||||||
|
radius 0.02
|
||||||
|
}
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 0.03
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== REAR CASTER ==========
|
||||||
|
DEF REAR_CASTER BallJoint {
|
||||||
|
jointParameters BallJointParameters {
|
||||||
|
anchor -0.14 0 0.02
|
||||||
|
}
|
||||||
|
endPoint Solid {
|
||||||
|
translation -0.14 0 0.02
|
||||||
|
name "rear caster"
|
||||||
|
children [
|
||||||
|
Shape {
|
||||||
|
appearance PBRAppearance {
|
||||||
|
baseColor 0.2 0.2 0.2
|
||||||
|
roughness 0.3
|
||||||
|
metalness 0.5
|
||||||
|
}
|
||||||
|
geometry Sphere {
|
||||||
|
radius 0.02
|
||||||
|
subdivision 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
boundingObject Sphere {
|
||||||
|
radius 0.02
|
||||||
|
}
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 0.03
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== IMU SENSORS ==========
|
||||||
|
Accelerometer {
|
||||||
|
translation 0 0 0.10
|
||||||
|
name "accelerometer"
|
||||||
|
}
|
||||||
|
Gyro {
|
||||||
|
translation 0 0 0.10
|
||||||
|
name "gyro"
|
||||||
|
}
|
||||||
|
Compass {
|
||||||
|
translation 0 0 0.10
|
||||||
|
name "compass"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== GPS ==========
|
||||||
|
GPS {
|
||||||
|
translation 0 0 0.17
|
||||||
|
name "gps"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== RECEIVER ==========
|
||||||
|
Receiver {
|
||||||
|
name "receiver"
|
||||||
|
channel 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== EMITTER ==========
|
||||||
|
Emitter {
|
||||||
|
name "emitter"
|
||||||
|
channel 1
|
||||||
|
range 50.0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# ========== BOUNDING OBJECT ==========
|
||||||
|
boundingObject Group {
|
||||||
|
children [
|
||||||
|
# Chassis box
|
||||||
|
Transform {
|
||||||
|
translation 0 0 0.05
|
||||||
|
children [
|
||||||
|
Box {
|
||||||
|
size 0.32 0.16 0.06
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
# Body box
|
||||||
|
Transform {
|
||||||
|
translation 0 0 0.11
|
||||||
|
children [
|
||||||
|
Box {
|
||||||
|
size 0.30 0.16 0.08
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ========== PHYSICS ==========
|
||||||
|
physics Physics {
|
||||||
|
density -1
|
||||||
|
mass 5.0
|
||||||
|
centerOfMass [
|
||||||
|
0 0 0.03
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,8 @@
|
|||||||
|
"""Pytest configuration — ensure the project root is on ``sys.path``."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
_PROJECT_ROOT = os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
if _PROJECT_ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _PROJECT_ROOT)
|
||||||
@@ -0,0 +1,290 @@
|
|||||||
|
"""Tests for herding/config.py — dataclass construction, defaults, overrides."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from herding.config import (
|
||||||
|
DetectionConfig,
|
||||||
|
DomainRandomConfig,
|
||||||
|
HerdingConfig,
|
||||||
|
HERDING_DEFAULT,
|
||||||
|
HERDING_WEBOTS,
|
||||||
|
LidarConfig,
|
||||||
|
LIDAR_FULL,
|
||||||
|
LIDAR_WEBOTS,
|
||||||
|
RobotConfig,
|
||||||
|
TrackerConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# LidarConfig
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestLidarConfig:
|
||||||
|
def test_defaults_match_full_circle_preset(self):
|
||||||
|
assert LidarConfig() == LIDAR_FULL
|
||||||
|
|
||||||
|
def test_webots_preset(self):
|
||||||
|
assert LIDAR_WEBOTS.n_rays == 180
|
||||||
|
assert abs(LIDAR_WEBOTS.fov_rad - math.radians(140.0)) < 1e-9
|
||||||
|
|
||||||
|
def test_frozen(self):
|
||||||
|
cfg = LidarConfig()
|
||||||
|
with pytest.raises((AttributeError, TypeError)):
|
||||||
|
cfg.n_rays = 42 # type: ignore[misc]
|
||||||
|
|
||||||
|
def test_invalid_n_rays(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
LidarConfig(n_rays=0)
|
||||||
|
|
||||||
|
def test_invalid_fov(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
LidarConfig(fov_rad=0.0)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
LidarConfig(fov_rad=math.pi * 3)
|
||||||
|
|
||||||
|
def test_invalid_max_range(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
LidarConfig(max_range=-1.0)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# TrackerConfig
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestTrackerConfig:
|
||||||
|
def test_defaults(self):
|
||||||
|
cfg = TrackerConfig()
|
||||||
|
assert cfg.forget_steps == 200
|
||||||
|
assert cfg.max_new_tracks_per_step == 10
|
||||||
|
|
||||||
|
def test_webots_preset_tighter(self):
|
||||||
|
cfg = HERDING_WEBOTS.tracker
|
||||||
|
# forget_steps was extended so confirmed sheep tracks survive
|
||||||
|
# sparse 140° FOV re-sightings; consensus blocks phantoms from
|
||||||
|
# reaching this lifetime.
|
||||||
|
assert cfg.forget_steps >= 200
|
||||||
|
assert cfg.max_new_tracks_per_step == 1
|
||||||
|
assert cfg.pen_latch_depth == 2.0
|
||||||
|
|
||||||
|
def test_default_consensus_enabled(self):
|
||||||
|
# Consensus is on by default — it filters tracker phantoms that
|
||||||
|
# confused the policy on the round field (52% → 88%) at no cost
|
||||||
|
# on the rectangular field (100% → 100%). Pass-through (k=1) is
|
||||||
|
# still available by explicitly constructing TrackerConfig(consensus_k=1).
|
||||||
|
cfg = TrackerConfig()
|
||||||
|
assert cfg.consensus_k >= 2
|
||||||
|
assert cfg.consensus_radius_m > 0.0
|
||||||
|
assert cfg.consensus_max_age > cfg.consensus_k
|
||||||
|
|
||||||
|
def test_webots_preset_enables_consensus(self):
|
||||||
|
cfg = HERDING_WEBOTS.tracker
|
||||||
|
assert cfg.consensus_k > 1
|
||||||
|
assert cfg.consensus_radius_m > 0.0
|
||||||
|
assert cfg.consensus_max_age >= cfg.consensus_k
|
||||||
|
|
||||||
|
def test_invalid_forget_steps(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
TrackerConfig(forget_steps=0)
|
||||||
|
|
||||||
|
def test_invalid_max_new_tracks(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
TrackerConfig(max_new_tracks_per_step=0)
|
||||||
|
|
||||||
|
def test_invalid_consensus_params(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
TrackerConfig(consensus_k=0)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
TrackerConfig(consensus_radius_m=0.0)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
TrackerConfig(consensus_max_age=0)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DetectionConfig
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestDetectionConfig:
|
||||||
|
def test_defaults(self):
|
||||||
|
cfg = DetectionConfig()
|
||||||
|
assert cfg.wall_reject == 0.5
|
||||||
|
|
||||||
|
def test_webots_preset_wall_reject(self):
|
||||||
|
# wall_reject stays at 0.5 m — 1.0 m was too aggressive near the south gate
|
||||||
|
cfg = HERDING_WEBOTS.detection
|
||||||
|
assert cfg.wall_reject == 0.5
|
||||||
|
|
||||||
|
def test_invalid_wall_reject(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
DetectionConfig(wall_reject=-0.1)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# RobotConfig
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestRobotConfig:
|
||||||
|
def test_max_linear_derived(self):
|
||||||
|
cfg = RobotConfig()
|
||||||
|
assert abs(cfg.max_linear - cfg.wheel_radius * cfg.max_wheel_omega) < 1e-9
|
||||||
|
|
||||||
|
def test_default_action_smooth_zero(self):
|
||||||
|
assert RobotConfig().action_smooth == 0.0
|
||||||
|
|
||||||
|
def test_webots_action_smooth(self):
|
||||||
|
assert HERDING_WEBOTS.robot.action_smooth == 0.55
|
||||||
|
|
||||||
|
def test_invalid_action_smooth(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
RobotConfig(action_smooth=1.0)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
RobotConfig(action_smooth=-0.1)
|
||||||
|
|
||||||
|
def test_default_strafe_passthrough(self):
|
||||||
|
cfg = RobotConfig()
|
||||||
|
assert cfg.strafe_efficiency == 1.0
|
||||||
|
assert cfg.strafe_to_forward_bleed == 0.0
|
||||||
|
|
||||||
|
def test_invalid_strafe_efficiency(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
RobotConfig(strafe_efficiency=0.0)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
RobotConfig(strafe_efficiency=1.5)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
RobotConfig(strafe_efficiency=-0.1)
|
||||||
|
|
||||||
|
def test_mec_webots_preset(self):
|
||||||
|
from herding.config import HERDING_MEC_WEBOTS
|
||||||
|
# Mecanum runs deploy via Supervisor kinematic injection
|
||||||
|
# (controllers/shepherd_dog/shepherd_dog.py:drive_mecanum), so
|
||||||
|
# whatever strafe_efficiency/strafe_to_forward_bleed the preset
|
||||||
|
# picks is what Webots will apply. The preset is allowed to be
|
||||||
|
# textbook (1.0, 0.0) or matched (<1.0, ≠0.0).
|
||||||
|
cfg = HERDING_MEC_WEBOTS.robot
|
||||||
|
assert 0.0 < cfg.strafe_efficiency <= 1.0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# DomainRandomConfig
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestDomainRandomConfig:
|
||||||
|
def test_all_zeros_by_default(self):
|
||||||
|
cfg = DomainRandomConfig()
|
||||||
|
assert cfg.fp_rate == 0.0
|
||||||
|
assert cfg.wheel_slip_std == 0.0
|
||||||
|
assert cfg.compass_noise_std == 0.0
|
||||||
|
|
||||||
|
def test_invalid_fp_rate(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
DomainRandomConfig(fp_rate=-1.0)
|
||||||
|
|
||||||
|
def test_invalid_slip_std(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
DomainRandomConfig(wheel_slip_std=-0.01)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# HerdingConfig
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestHerdingConfig:
|
||||||
|
def test_default_is_herding_default(self):
|
||||||
|
assert HerdingConfig() == HERDING_DEFAULT
|
||||||
|
|
||||||
|
def test_replace_sub_config(self):
|
||||||
|
new_cfg = HERDING_WEBOTS.replace(
|
||||||
|
domain_random=DomainRandomConfig(fp_rate=2.0)
|
||||||
|
)
|
||||||
|
assert new_cfg.domain_random.fp_rate == 2.0
|
||||||
|
# Other sub-configs unchanged
|
||||||
|
assert new_cfg.tracker == HERDING_WEBOTS.tracker
|
||||||
|
assert new_cfg.lidar == HERDING_WEBOTS.lidar
|
||||||
|
|
||||||
|
def test_herding_default_matches_original_module_constants(self):
|
||||||
|
"""Verify the default config reproduces the original hardcoded values."""
|
||||||
|
from herding.perception.lidar_sim import (
|
||||||
|
LIDAR_N_RAYS, LIDAR_FOV, LIDAR_MAX_RANGE, LIDAR_NOISE,
|
||||||
|
SHEEP_RADIUS, POST_RADIUS,
|
||||||
|
)
|
||||||
|
from herding.perception.lidar_perception import (
|
||||||
|
GAP_THRESHOLD, MAX_CLUSTER_SPAN, RANGE_HIT_EPS,
|
||||||
|
SPLIT_RANGE_GAP, WALL_REJECT, STATIC_REJECT,
|
||||||
|
)
|
||||||
|
from herding.perception.sheep_tracker import (
|
||||||
|
GATE_M, REACQUIRE_GATE_M, REACQUIRE_MIN_AGE, PENNED_GATE_M,
|
||||||
|
FORGET_STEPS, PREDICT_STEPS, VELOCITY_CLAMP,
|
||||||
|
)
|
||||||
|
cfg = HERDING_DEFAULT
|
||||||
|
assert cfg.lidar.n_rays == LIDAR_N_RAYS
|
||||||
|
assert cfg.lidar.fov_rad == LIDAR_FOV
|
||||||
|
assert cfg.lidar.max_range == LIDAR_MAX_RANGE
|
||||||
|
assert cfg.lidar.noise_std == LIDAR_NOISE
|
||||||
|
assert cfg.lidar.sheep_radius == SHEEP_RADIUS
|
||||||
|
assert cfg.lidar.post_radius == POST_RADIUS
|
||||||
|
assert cfg.detection.gap_threshold == GAP_THRESHOLD
|
||||||
|
assert cfg.detection.max_cluster_span == MAX_CLUSTER_SPAN
|
||||||
|
assert cfg.detection.range_hit_eps == RANGE_HIT_EPS
|
||||||
|
assert cfg.detection.split_range_gap == SPLIT_RANGE_GAP
|
||||||
|
assert cfg.detection.wall_reject == WALL_REJECT
|
||||||
|
assert cfg.detection.static_reject == STATIC_REJECT
|
||||||
|
assert cfg.tracker.gate_m == GATE_M
|
||||||
|
assert cfg.tracker.reacquire_gate_m == REACQUIRE_GATE_M
|
||||||
|
assert cfg.tracker.reacquire_min_age == REACQUIRE_MIN_AGE
|
||||||
|
assert cfg.tracker.penned_gate_m == PENNED_GATE_M
|
||||||
|
assert cfg.tracker.forget_steps == FORGET_STEPS
|
||||||
|
assert cfg.tracker.predict_steps == PREDICT_STEPS
|
||||||
|
assert cfg.tracker.velocity_clamp == VELOCITY_CLAMP
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Integration: HerdingEnv honours the config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestHerdingEnvConfig:
|
||||||
|
def test_default_env_unchanged(self):
|
||||||
|
"""HerdingEnv() still works with no config — zero behaviour change."""
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
env = HerdingEnv(n_sheep=1, max_steps=5, difficulty=1.0, seed=0)
|
||||||
|
obs, info = env.reset()
|
||||||
|
assert obs.shape == (32,)
|
||||||
|
obs2, *_ = env.step(env.action_space.sample())
|
||||||
|
assert obs2.shape == (32,)
|
||||||
|
|
||||||
|
def test_webots_config_propagates_action_smooth(self):
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
env = HerdingEnv(herding_cfg=HERDING_WEBOTS)
|
||||||
|
assert env.ACTION_SMOOTH == 0.55
|
||||||
|
|
||||||
|
def test_webots_config_runs(self):
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
env = HerdingEnv(
|
||||||
|
n_sheep=2, max_steps=10, difficulty=1.0, seed=42,
|
||||||
|
herding_cfg=HERDING_WEBOTS,
|
||||||
|
)
|
||||||
|
obs, _ = env.reset()
|
||||||
|
for _ in range(5):
|
||||||
|
obs, _, terminated, truncated, _ = env.step(env.action_space.sample())
|
||||||
|
assert obs.shape == (32,)
|
||||||
|
|
||||||
|
def test_domain_random_fp_runs(self):
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
cfg = HERDING_WEBOTS.replace(
|
||||||
|
domain_random=DomainRandomConfig(fp_rate=3.0, fp_std_pos=0.2)
|
||||||
|
)
|
||||||
|
env = HerdingEnv(n_sheep=2, max_steps=10, difficulty=1.0, seed=7, herding_cfg=cfg)
|
||||||
|
env.reset()
|
||||||
|
for _ in range(5):
|
||||||
|
env.step(env.action_space.sample())
|
||||||
|
|
||||||
|
def test_domain_random_slip_runs(self):
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
cfg = HERDING_WEBOTS.replace(
|
||||||
|
domain_random=DomainRandomConfig(wheel_slip_std=0.05, compass_noise_std=0.02)
|
||||||
|
)
|
||||||
|
env = HerdingEnv(n_sheep=1, max_steps=10, difficulty=1.0, seed=3,
|
||||||
|
drive_mode="mecanum", herding_cfg=cfg)
|
||||||
|
env.reset()
|
||||||
|
for _ in range(5):
|
||||||
|
env.step(env.action_space.sample())
|
||||||
@@ -0,0 +1,211 @@
|
|||||||
|
"""Control primitives: speed modulation, Strömbom, Sequential, ActiveScan."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from herding.control.active_scan import (
|
||||||
|
EMPTY_DEBOUNCE_STEPS, INITIAL_SCAN_STEPS, ActiveScanTeacher,
|
||||||
|
)
|
||||||
|
from herding.control.modulation import (
|
||||||
|
MIN_SPEED, SLOW_NEAR_SHEEP, modulate_speed,
|
||||||
|
)
|
||||||
|
from herding.control.sequential import compute_action as sequential_action
|
||||||
|
from herding.control.strombom import (
|
||||||
|
DELTA_DRIVE, F_FACTOR, compute_action as strombom_action,
|
||||||
|
)
|
||||||
|
from herding.control.universal import compute_action as universal_action
|
||||||
|
from herding.world.geometry import PEN_ENTRY
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Modulation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_modulation_empty_input_passthrough():
|
||||||
|
assert modulate_speed(1.0, 0.0, (0.0, 0.0), []) == (1.0, 0.0)
|
||||||
|
assert modulate_speed(1.0, 0.0, (0.0, 0.0), {}) == (1.0, 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_modulation_far_sheep_passthrough():
|
||||||
|
vx, vy = modulate_speed(1.0, 0.0, (0.0, 0.0), [(100.0, 0.0)])
|
||||||
|
assert (vx, vy) == (1.0, 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_modulation_close_sheep_min_speed():
|
||||||
|
vx, vy = modulate_speed(1.0, 0.0, (0.0, 0.0), [(0.0, 0.0)])
|
||||||
|
assert math.isclose(vx, MIN_SPEED)
|
||||||
|
assert vy == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_modulation_preserves_direction():
|
||||||
|
vx, vy = modulate_speed(0.6, 0.8, (0.0, 0.0), [(1.0, 0.0)])
|
||||||
|
ratio = math.hypot(vx, vy)
|
||||||
|
# Direction preserved.
|
||||||
|
assert math.isclose(vx / ratio, 0.6, abs_tol=1e-6)
|
||||||
|
assert math.isclose(vy / ratio, 0.8, abs_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_modulation_linear_ramp_midpoint():
|
||||||
|
vx, _ = modulate_speed(1.0, 0.0, (0.0, 0.0),
|
||||||
|
[(SLOW_NEAR_SHEEP / 2, 0.0)])
|
||||||
|
expected = MIN_SPEED + (1.0 - MIN_SPEED) * 0.5
|
||||||
|
assert math.isclose(vx, expected, abs_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_modulation_accepts_dict_input():
|
||||||
|
vx_list, _ = modulate_speed(1.0, 0.0, (0.0, 0.0),
|
||||||
|
[(1.0, 0.0)])
|
||||||
|
vx_dict, _ = modulate_speed(1.0, 0.0, (0.0, 0.0),
|
||||||
|
{"t0": (1.0, 0.0)})
|
||||||
|
assert math.isclose(vx_list, vx_dict)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Strömbom
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_strombom_empty_input_idle():
|
||||||
|
vx, vy, mode = strombom_action((0.0, 0.0), {}, PEN_ENTRY)
|
||||||
|
assert (vx, vy, mode) == (0.0, 0.0, "idle")
|
||||||
|
|
||||||
|
|
||||||
|
def test_strombom_tight_flock_drives():
|
||||||
|
# A tight 3-sheep cluster centred at (0, 8): radius < F_FACTOR·√3.
|
||||||
|
sheep = {"s0": (0.0, 8.0), "s1": (0.5, 8.5), "s2": (-0.5, 8.0)}
|
||||||
|
vx, vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
|
||||||
|
assert mode == "drive"
|
||||||
|
assert math.isclose(math.hypot(vx, vy), 1.0, abs_tol=1e-3)
|
||||||
|
|
||||||
|
|
||||||
|
def test_strombom_scattered_flock_collects():
|
||||||
|
# Sparse, max radius > F_FACTOR·√n.
|
||||||
|
sheep = {"s0": (10.0, 10.0), "s1": (-10.0, -10.0), "s2": (0.0, 0.0)}
|
||||||
|
_vx, _vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
|
||||||
|
assert mode == "collect"
|
||||||
|
|
||||||
|
|
||||||
|
def test_strombom_ignores_already_penned_sheep():
|
||||||
|
"""Sheep south of the gate plane are excluded from the active set."""
|
||||||
|
sheep = {
|
||||||
|
"s_active": (5.0, 5.0),
|
||||||
|
"s_penned": (11.5, -20.0),
|
||||||
|
}
|
||||||
|
# With one active sheep, Strömbom drives (radius = 0 < threshold).
|
||||||
|
_vx, _vy, mode = strombom_action((0.0, 0.0), sheep, PEN_ENTRY)
|
||||||
|
assert mode == "drive"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Sequential
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_sequential_empty_input_idle():
|
||||||
|
vx, vy, mode = sequential_action((0.0, 0.0), {}, PEN_ENTRY)
|
||||||
|
assert (vx, vy, mode) == (0.0, 0.0, "idle")
|
||||||
|
|
||||||
|
|
||||||
|
def test_sequential_targets_closest_to_pen():
|
||||||
|
# With 2 sheep (≤ STRAGGLER_THRESHOLD), sequential goes straight to
|
||||||
|
# "targeted" phase and pushes the sheep nearest to the pen.
|
||||||
|
near = (10.0, -5.0) # closer to pen entry (11.5, -15)
|
||||||
|
far = (-10.0, 10.0)
|
||||||
|
sheep = {"near": near, "far": far}
|
||||||
|
vx, vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY)
|
||||||
|
assert mode == "targeted"
|
||||||
|
# Dog should be directed toward near sheep (south-east), not far (north-west).
|
||||||
|
assert vx > 0 and vy < 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_sequential_collects_when_scattered():
|
||||||
|
# With >STRAGGLER_THRESHOLD sheep and radius > F_FACTOR*sqrt(n):
|
||||||
|
# should use collect (Strombom) not targeted.
|
||||||
|
sheep = {f"s{i}": pos for i, pos in enumerate([
|
||||||
|
(12.0, 10.0), (-12.0, 10.0), (0.0, 12.0),
|
||||||
|
(12.0, -12.0), (-10.0, -8.0),
|
||||||
|
])}
|
||||||
|
_vx, _vy, mode = sequential_action((0.0, 0.0), sheep, PEN_ENTRY)
|
||||||
|
assert mode in ("collect", "drive")
|
||||||
|
|
||||||
|
|
||||||
|
def test_sequential_drives_when_compact():
|
||||||
|
# Compact flock of 5 sheep near centre — should drive, not collect.
|
||||||
|
sheep = {f"s{i}": (float(i) * 0.3, float(i) * 0.3)
|
||||||
|
for i in range(5)}
|
||||||
|
_vx, _vy, mode = sequential_action((0.0, 5.0), sheep, PEN_ENTRY)
|
||||||
|
assert mode == "drive"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ActiveScan wrapper
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_active_scan_initial_phase_rotates():
|
||||||
|
teacher = ActiveScanTeacher(strombom_action)
|
||||||
|
# First call → opening rotation regardless of input.
|
||||||
|
vx, vy, omega, mode = teacher(
|
||||||
|
(0.0, 0.0), 0.0, {"s0": (5.0, 0.0)}, PEN_ENTRY)
|
||||||
|
assert mode == "scan_initial"
|
||||||
|
assert omega == 0.0
|
||||||
|
assert math.isclose(math.hypot(vx, vy), 1.0, abs_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_active_scan_hands_off_to_base_after_opener():
|
||||||
|
teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=2)
|
||||||
|
# Burn through the opener.
|
||||||
|
for _ in range(2):
|
||||||
|
teacher((0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
|
||||||
|
_vx, _vy, _omega, mode = teacher(
|
||||||
|
(0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
|
||||||
|
# Either drive (Strömbom mode label) or collect; not scan_initial.
|
||||||
|
assert "scan" not in mode
|
||||||
|
|
||||||
|
|
||||||
|
def test_active_scan_holds_last_action_on_brief_empty():
|
||||||
|
teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=1)
|
||||||
|
# Step once (opening), then once with a visible sheep — sets last_action.
|
||||||
|
teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
|
||||||
|
teacher((0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY)
|
||||||
|
last = teacher.last_action
|
||||||
|
# Now a single empty frame → hold.
|
||||||
|
vx, vy, _omega, mode = teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
|
||||||
|
assert mode == "hold"
|
||||||
|
assert (vx, vy) == last
|
||||||
|
|
||||||
|
|
||||||
|
def test_active_scan_explores_after_sustained_empty():
|
||||||
|
teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=1)
|
||||||
|
teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY) # opener
|
||||||
|
for _ in range(EMPTY_DEBOUNCE_STEPS):
|
||||||
|
last_vx, last_vy, _omega, mode = teacher(
|
||||||
|
(5.0, 5.0), 0.0, {}, PEN_ENTRY)
|
||||||
|
assert mode in ("explore", "scan_at_centre")
|
||||||
|
|
||||||
|
|
||||||
|
def test_active_scan_preserves_mecanum_omega():
|
||||||
|
"""Regression: ActiveScanTeacher must propagate omega from a mecanum
|
||||||
|
base teacher, not silently drop it. Without this, BC mecanum demos
|
||||||
|
have omega=0 everywhere and the policy never learns to rotate.
|
||||||
|
"""
|
||||||
|
teacher = ActiveScanTeacher(universal_action, initial_scan_steps=1)
|
||||||
|
# Burn the opener so we exit phase 1.
|
||||||
|
teacher((0.0, 0.0), 0.0, {"s0": (8.0, 8.0)}, PEN_ENTRY,
|
||||||
|
drive_mode="mecanum")
|
||||||
|
# Place a sheep off to the side so the dog needs to face it.
|
||||||
|
# Dog at origin facing +x (heading=0); target at (0, 8) → desired
|
||||||
|
# heading +π/2, so omega should be positive.
|
||||||
|
vx, vy, omega, mode = teacher(
|
||||||
|
(0.0, 0.0), 0.0, {"s0": (0.0, 8.0)}, PEN_ENTRY,
|
||||||
|
drive_mode="mecanum")
|
||||||
|
assert mode in ("collect", "drive", "recovery")
|
||||||
|
assert abs(omega) > 0.05, f"omega should be non-zero on mecanum, got {omega}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_active_scan_reset_clears_state():
|
||||||
|
teacher = ActiveScanTeacher(strombom_action, initial_scan_steps=5)
|
||||||
|
for _ in range(3):
|
||||||
|
teacher((0.0, 0.0), 0.0, {}, PEN_ENTRY)
|
||||||
|
assert teacher.step == 3
|
||||||
|
teacher.reset()
|
||||||
|
assert teacher.step == 0
|
||||||
|
assert teacher.empty_streak == 0
|
||||||
@@ -0,0 +1,231 @@
|
|||||||
|
"""Differential-drive and mecanum kinematics tests."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from herding.world.diffdrive import (
|
||||||
|
heading_speed_to_wheels, kinematics_step,
|
||||||
|
mecanum_inverse, mecanum_step,
|
||||||
|
velocity_to_mecanum_wheels, velocity_to_wheels,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
WHEEL_R = 0.038
|
||||||
|
WHEEL_B = 0.28
|
||||||
|
MAX_OMEGA = 70.0
|
||||||
|
MAX_LINEAR = WHEEL_R * MAX_OMEGA
|
||||||
|
DT = 0.016
|
||||||
|
|
||||||
|
|
||||||
|
def test_kinematics_zero_input_is_identity():
|
||||||
|
x, y, h = kinematics_step(1.0, 2.0, 0.5, 0.0, 0.0, WHEEL_R, WHEEL_B, DT)
|
||||||
|
assert (x, y, h) == (1.0, 2.0, 0.5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_kinematics_forward_motion():
|
||||||
|
# Equal wheel speeds → pure translation along the heading.
|
||||||
|
x, y, h = kinematics_step(0.0, 0.0, 0.0, 10.0, 10.0, WHEEL_R, WHEEL_B, DT)
|
||||||
|
assert h == 0.0
|
||||||
|
assert math.isclose(x, 10.0 * WHEEL_R * DT)
|
||||||
|
assert y == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_kinematics_pure_rotation():
|
||||||
|
# Opposite wheel speeds → pure rotation, position unchanged.
|
||||||
|
x, y, h = kinematics_step(0.0, 0.0, 0.0, -5.0, 5.0, WHEEL_R, WHEEL_B, DT)
|
||||||
|
assert (x, y) == (0.0, 0.0)
|
||||||
|
assert h > 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_kinematics_heading_wrapped_to_pi():
|
||||||
|
_, _, h = kinematics_step(0.0, 0.0, math.pi - 0.01, 100.0, -100.0,
|
||||||
|
WHEEL_R, WHEEL_B, DT)
|
||||||
|
assert -math.pi <= h <= math.pi
|
||||||
|
|
||||||
|
|
||||||
|
def test_velocity_to_wheels_zero_velocity():
|
||||||
|
left, right = velocity_to_wheels(0.0, 0.0, 0.0,
|
||||||
|
MAX_LINEAR, WHEEL_R, MAX_OMEGA)
|
||||||
|
assert (left, right) == (0.0, 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_velocity_to_wheels_aligned_forward():
|
||||||
|
# Target straight ahead → equal positive wheel speeds.
|
||||||
|
left, right = velocity_to_wheels(1.0, 0.0, 0.0,
|
||||||
|
MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=4.0)
|
||||||
|
assert math.isclose(left, right, abs_tol=1e-6)
|
||||||
|
assert left > 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_velocity_to_wheels_perpendicular_target_spins():
|
||||||
|
# Target 90° from heading → forward speed ≈ 0, wheels equal-and-opposite.
|
||||||
|
left, right = velocity_to_wheels(0.0, 1.0, 0.0,
|
||||||
|
MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=4.0)
|
||||||
|
assert left + right == pytest.approx(0.0, abs=1e-6)
|
||||||
|
assert right > 0.0 # turning CCW (left of heading is +y for h=0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_velocity_to_wheels_clamped_to_max_omega():
|
||||||
|
# Far overshoot — both wheel commands clamped at ±MAX_OMEGA.
|
||||||
|
left, right = velocity_to_wheels(-1.0, 0.0, 0.0,
|
||||||
|
MAX_LINEAR, WHEEL_R, MAX_OMEGA, k_turn=20.0)
|
||||||
|
assert -MAX_OMEGA <= left <= MAX_OMEGA
|
||||||
|
assert -MAX_OMEGA <= right <= MAX_OMEGA
|
||||||
|
|
||||||
|
|
||||||
|
def test_heading_speed_to_wheels_aligned():
|
||||||
|
left, right = heading_speed_to_wheels(0.0, 10.0, 0.0, MAX_OMEGA)
|
||||||
|
assert math.isclose(left, right, abs_tol=1e-6)
|
||||||
|
assert left > 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_heading_speed_to_wheels_reverse_target_forwards_zero():
|
||||||
|
left, right = heading_speed_to_wheels(math.pi, 10.0, 0.0, MAX_OMEGA)
|
||||||
|
# cos(π) clamped at 0 → no forward; pure rotation.
|
||||||
|
assert left + right == pytest.approx(0.0, abs=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Mecanum kinematics tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
LX = 0.14 # half wheel_base_x
|
||||||
|
LY = 0.14 # half wheel_base_y
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_kinematics_zero_is_identity():
|
||||||
|
x, y, h = mecanum_step(
|
||||||
|
1.0, 2.0, 0.5, 0.0, 0.0, 0.0, 0.0, WHEEL_R, LX, LY, DT,
|
||||||
|
)
|
||||||
|
assert (x, y, h) == (1.0, 2.0, 0.5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_kinematics_pure_forward():
|
||||||
|
# All 4 wheels equal → pure forward (vx_body > 0, vy_body = 0).
|
||||||
|
w = 10.0
|
||||||
|
x, y, h = mecanum_step(
|
||||||
|
0.0, 0.0, 0.0, w, w, w, w, WHEEL_R, LX, LY, DT,
|
||||||
|
)
|
||||||
|
assert h == pytest.approx(0.0, abs=1e-9)
|
||||||
|
assert y == pytest.approx(0.0, abs=1e-9)
|
||||||
|
assert math.isclose(x, w * WHEEL_R * DT, rel_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_kinematics_pure_strafe():
|
||||||
|
# Strafe right (positive vy_body) with zero forward:
|
||||||
|
# vx_body = (w_fl+w_fr+w_rl+w_rr)*r/4 = 0 → sum of wheels = 0
|
||||||
|
# vy_body = (-w_fl+w_fr+w_rl-w_rr)*r/4 > 0
|
||||||
|
# Use w_fl=-10, w_fr=10, w_rl=10, w_rr=-10.
|
||||||
|
w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, 10.0, -10.0
|
||||||
|
x, y, h = mecanum_step(
|
||||||
|
0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
|
||||||
|
)
|
||||||
|
assert h == pytest.approx(0.0, abs=1e-9)
|
||||||
|
assert x == pytest.approx(0.0, abs=1e-9)
|
||||||
|
expected_vy = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
|
||||||
|
assert math.isclose(y, expected_vy * DT, rel_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_kinematics_strafe_efficiency_scales_y():
|
||||||
|
# With strafe_efficiency=0.4, realised strafe should be 40% of ideal.
|
||||||
|
w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, 10.0, -10.0
|
||||||
|
x, y, _ = mecanum_step(
|
||||||
|
0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
|
||||||
|
strafe_efficiency=0.4,
|
||||||
|
)
|
||||||
|
ideal_vy = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
|
||||||
|
assert math.isclose(y, 0.4 * ideal_vy * DT, rel_tol=1e-6)
|
||||||
|
assert x == pytest.approx(0.0, abs=1e-9)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_kinematics_strafe_bleed_pushes_backward():
|
||||||
|
# Negative bleed means strafe commands also push the body backward.
|
||||||
|
w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, 10.0, -10.0
|
||||||
|
x, y, _ = mecanum_step(
|
||||||
|
0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
|
||||||
|
strafe_efficiency=1.0,
|
||||||
|
strafe_to_forward_bleed=-0.28,
|
||||||
|
)
|
||||||
|
ideal_vy = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
|
||||||
|
assert math.isclose(y, ideal_vy * DT, rel_tol=1e-6)
|
||||||
|
expected_x = -0.28 * abs(ideal_vy) * DT
|
||||||
|
assert math.isclose(x, expected_x, rel_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_kinematics_forward_unaffected_by_strafe_params():
|
||||||
|
# Forward command should be untouched by strafe_efficiency / bleed.
|
||||||
|
w_fl = w_fr = w_rl = w_rr = 10.0
|
||||||
|
x, y, _ = mecanum_step(
|
||||||
|
0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
|
||||||
|
strafe_efficiency=0.4,
|
||||||
|
strafe_to_forward_bleed=-0.28,
|
||||||
|
)
|
||||||
|
expected_vx = (w_fl + w_fr + w_rl + w_rr) * WHEEL_R / 4.0
|
||||||
|
assert math.isclose(x, expected_vx * DT, rel_tol=1e-6)
|
||||||
|
assert y == pytest.approx(0.0, abs=1e-9)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_kinematics_pure_rotation():
|
||||||
|
# Pure rotation: vx_body=0, vy_body=0, omega>0.
|
||||||
|
# w_fl=-10, w_fr=10, w_rl=-10, w_rr=10 → all sums cancel except omega.
|
||||||
|
w_fl, w_fr, w_rl, w_rr = -10.0, 10.0, -10.0, 10.0
|
||||||
|
x, y, h = mecanum_step(
|
||||||
|
0.0, 0.0, 0.0, w_fl, w_fr, w_rl, w_rr, WHEEL_R, LX, LY, DT,
|
||||||
|
)
|
||||||
|
assert x == pytest.approx(0.0, abs=1e-9)
|
||||||
|
assert y == pytest.approx(0.0, abs=1e-9)
|
||||||
|
assert h > 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_inverse_roundtrip():
|
||||||
|
# Inverse → forward: pick desired body velocities, compute wheels,
|
||||||
|
# then verify forward kinematics recovers the same velocities.
|
||||||
|
vx_b = 0.5
|
||||||
|
vy_b = 0.3
|
||||||
|
omega = 0.2
|
||||||
|
w_fl, w_fr, w_rl, w_rr = mecanum_inverse(
|
||||||
|
vx_b, vy_b, omega, WHEEL_R, LX, LY, MAX_OMEGA,
|
||||||
|
)
|
||||||
|
vx_check = (w_fl + w_fr + w_rl + w_rr) * WHEEL_R / 4.0
|
||||||
|
vy_check = (-w_fl + w_fr + w_rl - w_rr) * WHEEL_R / 4.0
|
||||||
|
omega_check = (-w_fl + w_fr - w_rl + w_rr) * WHEEL_R / (4.0 * (LX + LY))
|
||||||
|
assert math.isclose(vx_b, vx_check, rel_tol=1e-6)
|
||||||
|
assert math.isclose(vy_b, vy_check, rel_tol=1e-6)
|
||||||
|
assert math.isclose(omega, omega_check, rel_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_mecanum_inverse_clamped():
|
||||||
|
# Request an extreme velocity — all wheels should be clamped.
|
||||||
|
w_fl, w_fr, w_rl, w_rr = mecanum_inverse(
|
||||||
|
100.0, 100.0, 50.0, WHEEL_R, LX, LY, MAX_OMEGA,
|
||||||
|
)
|
||||||
|
assert max(abs(w_fl), abs(w_fr), abs(w_rl), abs(w_rr)) <= MAX_OMEGA
|
||||||
|
|
||||||
|
|
||||||
|
def test_velocity_to_mecanum_wheels_zero():
|
||||||
|
result = velocity_to_mecanum_wheels(
|
||||||
|
0.0, 0.0, 0.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
|
||||||
|
wheel_base=WHEEL_B,
|
||||||
|
)
|
||||||
|
assert result == (0.0, 0.0, 0.0, 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_velocity_to_mecanum_wheels_forward():
|
||||||
|
w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
|
||||||
|
1.0, 0.0, 0.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
|
||||||
|
wheel_base=WHEEL_B,
|
||||||
|
)
|
||||||
|
# All 4 wheels should be positive and roughly equal.
|
||||||
|
assert all(w > 0.0 for w in (w_fl, w_fr, w_rl, w_rr))
|
||||||
|
assert math.isclose(w_fl, w_rr, rel_tol=1e-6)
|
||||||
|
assert math.isclose(w_fr, w_rl, rel_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_velocity_to_mecanum_wheels_clamped():
|
||||||
|
# Extreme input — all wheels within max.
|
||||||
|
ws = velocity_to_mecanum_wheels(
|
||||||
|
1.0, 1.0, 1.0, 0.0, MAX_LINEAR, WHEEL_R, LX, LY, MAX_OMEGA,
|
||||||
|
wheel_base=WHEEL_B,
|
||||||
|
)
|
||||||
|
assert all(abs(w) <= MAX_OMEGA for w in ws)
|
||||||
@@ -0,0 +1,116 @@
|
|||||||
|
"""Gymnasium env: contract, determinism, reward components."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from herding.world.geometry import MAX_SHEEP, PEN_ENTRY
|
||||||
|
from herding.perception.obs import OBS_DIM
|
||||||
|
from herding.control.strombom import compute_action as strombom_action
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_obs_action_shapes_single_frame():
|
||||||
|
env = HerdingEnv(n_sheep=3, seed=0, use_lidar=False)
|
||||||
|
obs, info = env.reset()
|
||||||
|
assert obs.shape == (OBS_DIM,)
|
||||||
|
assert obs.dtype == np.float32
|
||||||
|
obs, reward, term, trunc, info = env.step(
|
||||||
|
np.array([0.5, 0.0], dtype=np.float32))
|
||||||
|
assert obs.shape == (OBS_DIM,)
|
||||||
|
assert isinstance(reward, float)
|
||||||
|
assert isinstance(term, bool) and isinstance(trunc, bool)
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_observation_space_matches_frame_stack():
|
||||||
|
env = HerdingEnv(n_sheep=2, seed=0, use_lidar=False, frame_stack=4)
|
||||||
|
obs, _ = env.reset()
|
||||||
|
assert obs.shape == (OBS_DIM * 4,)
|
||||||
|
assert env.observation_space.shape == (OBS_DIM * 4,)
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_reset_determinism_same_seed():
|
||||||
|
a = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
|
||||||
|
b = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
|
||||||
|
obs_a, _ = a.reset(seed=42)
|
||||||
|
obs_b, _ = b.reset(seed=42)
|
||||||
|
assert np.allclose(obs_a, obs_b)
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_constructor_seed_applies_to_first_reset():
|
||||||
|
a = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
|
||||||
|
b = HerdingEnv(n_sheep=3, seed=42, use_lidar=False)
|
||||||
|
obs_a, _ = a.reset()
|
||||||
|
obs_b, _ = b.reset()
|
||||||
|
assert np.allclose(obs_a, obs_b)
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_curriculum_samples_full_range():
|
||||||
|
env = HerdingEnv(seed=0, use_lidar=False)
|
||||||
|
sizes = set()
|
||||||
|
for _ in range(40):
|
||||||
|
_, info = env.reset()
|
||||||
|
sizes.add(info["n_sheep"])
|
||||||
|
assert 1 in sizes
|
||||||
|
assert max(sizes) <= MAX_SHEEP
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_step_returns_finite_values():
|
||||||
|
env = HerdingEnv(n_sheep=2, max_steps=200, seed=1, use_lidar=False)
|
||||||
|
obs, _ = env.reset()
|
||||||
|
for _ in range(200):
|
||||||
|
action = np.array([0.5, 0.5], dtype=np.float32)
|
||||||
|
obs, reward, term, trunc, _ = env.step(action)
|
||||||
|
assert np.isfinite(obs).all()
|
||||||
|
assert math.isfinite(reward)
|
||||||
|
if term or trunc:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_options_n_sheep_overrides_curriculum():
|
||||||
|
env = HerdingEnv(seed=0, use_lidar=False)
|
||||||
|
_, info = env.reset(options={"n_sheep": 7})
|
||||||
|
assert info["n_sheep"] == 7
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_perceived_positions_lidar_vs_privileged():
|
||||||
|
env_priv = HerdingEnv(n_sheep=3, seed=0, use_lidar=False)
|
||||||
|
env_priv.reset(seed=0)
|
||||||
|
pos_priv = env_priv.perceived_positions()
|
||||||
|
assert len(pos_priv) == 3
|
||||||
|
|
||||||
|
env_lidar = HerdingEnv(n_sheep=3, seed=0, use_lidar=True)
|
||||||
|
env_lidar.reset(seed=0)
|
||||||
|
pos_lidar = env_lidar.perceived_positions()
|
||||||
|
# LiDAR mode returns whatever the tracker has — may be fewer than 3
|
||||||
|
# if sheep are out of FOV / range, but never more.
|
||||||
|
assert len(pos_lidar) <= 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_set_time_weight_affects_reward():
|
||||||
|
env = HerdingEnv(n_sheep=1, seed=0, use_lidar=False)
|
||||||
|
env.reset(seed=0)
|
||||||
|
_, r_default, *_ = env.step(np.array([0.0, 0.0], dtype=np.float32))
|
||||||
|
env.set_time_weight(-1.0)
|
||||||
|
env.reset(seed=0)
|
||||||
|
_, r_penalised, *_ = env.step(np.array([0.0, 0.0], dtype=np.float32))
|
||||||
|
assert r_penalised < r_default
|
||||||
|
|
||||||
|
|
||||||
|
def test_env_strombom_rollout_moves_dog():
|
||||||
|
env = HerdingEnv(n_sheep=2, max_steps=400, seed=1, use_lidar=False)
|
||||||
|
env.reset()
|
||||||
|
start = (env.dog_x, env.dog_y)
|
||||||
|
for _ in range(400):
|
||||||
|
positions = env.perceived_positions()
|
||||||
|
if not positions:
|
||||||
|
break
|
||||||
|
vx, vy, _ = strombom_action(
|
||||||
|
(env.dog_x, env.dog_y), positions, PEN_ENTRY)
|
||||||
|
obs, _r, term, trunc, _ = env.step(
|
||||||
|
np.array([vx, vy], dtype=np.float32))
|
||||||
|
if term or trunc:
|
||||||
|
break
|
||||||
|
displacement = math.hypot(env.dog_x - start[0], env.dog_y - start[1])
|
||||||
|
assert displacement > 0.05
|
||||||
@@ -0,0 +1,75 @@
|
|||||||
|
"""Geometric predicates and constants."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from herding.world.geometry import (
|
||||||
|
FIELD_X, FIELD_Y, GATE_X, GATE_Y, MAX_SHEEP, PEN_ENTRY, PEN_X, PEN_Y,
|
||||||
|
distance_to_pen_entry, in_field, in_gate_corridor, in_pen,
|
||||||
|
is_penned,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_field_dimensions():
|
||||||
|
assert FIELD_X == (-15.0, 15.0)
|
||||||
|
assert FIELD_Y == (-15.0, 15.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_pen_geometry():
|
||||||
|
assert PEN_X == (10.0, 13.0)
|
||||||
|
assert PEN_Y == (-22.0, -15.0)
|
||||||
|
assert PEN_ENTRY == (11.5, -15.0)
|
||||||
|
assert GATE_X == PEN_X
|
||||||
|
assert GATE_Y == -15.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_in_pen_strict_interior():
|
||||||
|
assert in_pen(11.5, -18.0)
|
||||||
|
assert not in_pen(10.0, -18.0) # boundary excluded
|
||||||
|
assert not in_pen(11.5, -15.0) # gate plane excluded
|
||||||
|
assert not in_pen(0.0, 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_in_field_with_margin():
|
||||||
|
assert in_field(0.0, 0.0)
|
||||||
|
assert in_field(14.0, 14.0)
|
||||||
|
assert not in_field(15.5, 0.0)
|
||||||
|
assert in_field(14.4, 0.0, margin=0.5)
|
||||||
|
assert not in_field(14.6, 0.0, margin=0.5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_in_gate_corridor():
|
||||||
|
assert in_gate_corridor(11.5, -18.0)
|
||||||
|
assert in_gate_corridor(10.0, -15.0)
|
||||||
|
assert not in_gate_corridor(11.5, -10.0)
|
||||||
|
assert not in_gate_corridor(5.0, -18.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_penned_latches_below_gate():
|
||||||
|
# In the gate column and south of the gate plane → penned.
|
||||||
|
assert is_penned(11.5, -15.0)
|
||||||
|
assert is_penned(10.5, -18.0)
|
||||||
|
assert is_penned(12.5, -22.0)
|
||||||
|
# Above the gate plane → not yet.
|
||||||
|
assert not is_penned(11.5, -14.9)
|
||||||
|
# Outside the gate column → not penned even if south.
|
||||||
|
assert not is_penned(0.0, -16.0)
|
||||||
|
assert not is_penned(14.0, -16.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_penned_latch_margin():
|
||||||
|
# Slight tolerance on the gate column.
|
||||||
|
assert is_penned(9.9, -15.5)
|
||||||
|
assert is_penned(13.1, -15.5)
|
||||||
|
assert not is_penned(9.7, -15.5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_distance_to_pen_entry():
|
||||||
|
assert distance_to_pen_entry(*PEN_ENTRY) == 0.0
|
||||||
|
assert math.isclose(distance_to_pen_entry(11.5, -10.0), 5.0)
|
||||||
|
assert math.isclose(distance_to_pen_entry(0.0, 0.0),
|
||||||
|
math.hypot(11.5, 15.0))
|
||||||
|
|
||||||
|
|
||||||
|
def test_max_sheep_positive_int():
|
||||||
|
assert isinstance(MAX_SHEEP, int)
|
||||||
|
assert MAX_SHEEP >= 1
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
"""Observation builder — shape, normalisation, order invariance."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from herding.perception.obs import OBS_DIM, build_obs
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_shape_and_dtype():
|
||||||
|
obs = build_obs((0.0, 0.0), 0.0, [(5.0, 5.0)], [False])
|
||||||
|
assert obs.shape == (OBS_DIM,)
|
||||||
|
assert obs.dtype == np.float32
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_no_active_sheep_terminal():
|
||||||
|
# All sheep penned → flock-summary fields zero, count zero.
|
||||||
|
obs = build_obs((0.0, 0.0), 0.0, [(1.0, 1.0), (2.0, 2.0)], [True, True])
|
||||||
|
assert obs[19] == 0.0
|
||||||
|
# Aggregate fields (CoM, radius, std, vectors) should all be zero.
|
||||||
|
assert np.allclose(obs[4:12], 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_dog_pose_normalised():
|
||||||
|
obs = build_obs((15.0, -15.0), math.pi / 2, [(0.0, 0.0)], [False])
|
||||||
|
assert math.isclose(obs[0], 1.0)
|
||||||
|
assert math.isclose(obs[1], -1.0)
|
||||||
|
assert math.isclose(obs[2], math.cos(math.pi / 2), abs_tol=1e-6)
|
||||||
|
assert math.isclose(obs[3], math.sin(math.pi / 2), abs_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_order_invariance():
|
||||||
|
"""Sheep order in the input list must not affect the observation."""
|
||||||
|
sheep = [(3.0, 2.0), (-5.0, 1.0), (0.0, 8.0)]
|
||||||
|
p = [False] * 3
|
||||||
|
a = build_obs((0.0, 0.0), 0.0, sheep, p)
|
||||||
|
b = build_obs((0.0, 0.0), 0.0, list(reversed(sheep)), list(reversed(p)))
|
||||||
|
assert np.allclose(a, b)
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_count_field_normalised_by_n_max():
|
||||||
|
sheep = [(1.0, 1.0)] * 5
|
||||||
|
p = [False] * 5
|
||||||
|
obs = build_obs((0.0, 0.0), 0.0, sheep, p, n_max=10)
|
||||||
|
assert math.isclose(obs[19], 0.5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_polar_histogram_sums_to_one():
|
||||||
|
sheep = [(1.0, 0.0), (-1.0, 0.0), (0.0, 1.0), (0.0, -1.0)]
|
||||||
|
obs = build_obs((0.0, 0.0), 0.0, sheep, [False] * 4)
|
||||||
|
assert math.isclose(float(obs[20:28].sum()), 1.0, abs_tol=1e-6)
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_named_channels_closest_rearmost():
|
||||||
|
# Channels 28..29 = (closest_to_pen - dog) / 15
|
||||||
|
# Channels 30..31 = (rearmost - dog) / 15
|
||||||
|
pen_x, pen_y = 11.5, -15.0
|
||||||
|
near = (pen_x + 1.0, pen_y + 1.0)
|
||||||
|
far = (-10.0, 10.0)
|
||||||
|
obs = build_obs((0.0, 0.0), 0.0, [near, far], [False, False])
|
||||||
|
tol = 1e-5
|
||||||
|
assert math.isclose(obs[28], near[0] / 15.0, abs_tol=tol)
|
||||||
|
assert math.isclose(obs[29], near[1] / 15.0, abs_tol=tol)
|
||||||
|
assert math.isclose(obs[30], far[0] / 15.0, abs_tol=tol)
|
||||||
|
assert math.isclose(obs[31], far[1] / 15.0, abs_tol=tol)
|
||||||
|
|
||||||
|
|
||||||
|
def test_obs_pen_vector_zero_at_pen_entry():
|
||||||
|
obs = build_obs((11.5, -15.0), 0.0, [(0.0, 0.0)], [False])
|
||||||
|
assert math.isclose(obs[14], 0.0) # distance to pen
|
||||||
@@ -0,0 +1,251 @@
|
|||||||
|
"""LiDAR simulation + perception pipeline + multi-target tracker."""
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from herding.perception.lidar_perception import (
|
||||||
|
STATIC_REJECT, detections_from_scan,
|
||||||
|
)
|
||||||
|
from herding.perception.lidar_sim import (
|
||||||
|
LIDAR_MAX_RANGE, LIDAR_N_RAYS, SHEEP_RADIUS, ray_angles, simulate_scan,
|
||||||
|
)
|
||||||
|
from herding.perception.sheep_tracker import (
|
||||||
|
FORGET_STEPS, GATE_M, MAX_ACTIVE_TRACKS, REACQUIRE_GATE_M,
|
||||||
|
REACQUIRE_MIN_AGE, SheepTracker,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Sim
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_simulate_scan_shape_and_dtype():
|
||||||
|
ranges = simulate_scan(0.0, 0.0, 0.0, [(5.0, 0.0)], noise=0.0)
|
||||||
|
assert ranges.shape == (LIDAR_N_RAYS,)
|
||||||
|
assert ranges.dtype == np.float32
|
||||||
|
|
||||||
|
|
||||||
|
def test_simulate_scan_no_sheep_far_from_walls():
|
||||||
|
# Dog at origin, no sheep, walls all ≥ 15 m away → all rays at max.
|
||||||
|
ranges = simulate_scan(0.0, 0.0, 0.0, [], noise=0.0)
|
||||||
|
# Walls (east/west at ±15) are beyond LIDAR_MAX_RANGE=12, so no hits.
|
||||||
|
assert (ranges == LIDAR_MAX_RANGE).all()
|
||||||
|
|
||||||
|
|
||||||
|
def test_simulate_scan_sheep_in_front_returns_centre_hit():
|
||||||
|
# Sheep dead ahead at 5 m. Centre ray should hit ~ 5 - SHEEP_RADIUS.
|
||||||
|
ranges = simulate_scan(0.0, 0.0, 0.0, [(5.0, 0.0)], noise=0.0)
|
||||||
|
centre = ranges[LIDAR_N_RAYS // 2]
|
||||||
|
assert math.isclose(float(centre), 5.0 - SHEEP_RADIUS, abs_tol=0.01)
|
||||||
|
|
||||||
|
|
||||||
|
def test_simulate_scan_sheep_behind_dog_not_hit():
|
||||||
|
# With 360° FOV, a sheep behind the dog IS now hit.
|
||||||
|
ranges = simulate_scan(0.0, 0.0, 0.0, [(-5.0, 0.0)], noise=0.0)
|
||||||
|
assert (ranges < LIDAR_MAX_RANGE).any()
|
||||||
|
# Verify the closest hit is near 5m (sheep at distance 5).
|
||||||
|
assert float(ranges.min()) < 5.3
|
||||||
|
|
||||||
|
|
||||||
|
def test_simulate_scan_wall_hit():
|
||||||
|
# Dog 1 m south of the north wall, facing north → centre ray ≈ 1 m.
|
||||||
|
ranges = simulate_scan(0.0, 14.0, math.pi / 2, [], noise=0.0)
|
||||||
|
centre = ranges[LIDAR_N_RAYS // 2]
|
||||||
|
assert math.isclose(float(centre), 1.0, abs_tol=0.01)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Perception
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_detections_recover_sheep_position():
|
||||||
|
sheep = [(5.0, 0.0), (3.0, 1.0)]
|
||||||
|
ranges = simulate_scan(0.0, 0.0, 0.0, sheep, noise=0.0)
|
||||||
|
det = detections_from_scan(ranges, 0.0, 0.0, 0.0)
|
||||||
|
assert len(det) == 2
|
||||||
|
# Centroid bias is corrected to within ~5 cm.
|
||||||
|
for truth in sheep:
|
||||||
|
assert any(math.hypot(d[0] - truth[0], d[1] - truth[1]) < 0.1
|
||||||
|
for d in det)
|
||||||
|
|
||||||
|
|
||||||
|
def test_detections_filter_gate_post():
|
||||||
|
# An empty scene at the dog right next to a gate post produces no
|
||||||
|
# detections — the static-feature filter drops the post return.
|
||||||
|
ranges = simulate_scan(11.5, -10.0, -math.pi / 2, [], noise=0.0)
|
||||||
|
det = detections_from_scan(ranges, 11.5, -10.0, -math.pi / 2)
|
||||||
|
for cx, cy in det:
|
||||||
|
assert math.hypot(cx - 10.0, cy + 15.0) > STATIC_REJECT
|
||||||
|
assert math.hypot(cx - 13.0, cy + 15.0) > STATIC_REJECT
|
||||||
|
|
||||||
|
|
||||||
|
def test_detections_empty_scan_returns_nothing():
|
||||||
|
assert detections_from_scan(np.array([], dtype=np.float32),
|
||||||
|
0.0, 0.0, 0.0) == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tracker
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_tracker_creates_track_for_new_detection():
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
assert t.n_active() == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_associates_close_detections():
|
||||||
|
"""A small movement within the gate keeps the same track."""
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
t.update([(5.5, 0.0)])
|
||||||
|
assert t.n_active() == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_spawns_new_track_far_detection():
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
t.update([(-5.0, 0.0)]) # well outside the gate
|
||||||
|
assert t.n_active() == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_reacquisition_for_stale_track():
|
||||||
|
"""A stale track within the wider re-acquisition gate rebinds rather
|
||||||
|
than spawning a duplicate."""
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(0.0, 0.0)])
|
||||||
|
# Let it go stale.
|
||||||
|
for _ in range(REACQUIRE_MIN_AGE):
|
||||||
|
t.update([])
|
||||||
|
# Re-emerges within REACQUIRE_GATE but outside the primary GATE.
|
||||||
|
offset = (GATE_M + REACQUIRE_GATE_M) / 2.0
|
||||||
|
t.update([(offset, 0.0)])
|
||||||
|
assert t.n_active() == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_forgets_stale_tracks():
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(0.0, 0.0)])
|
||||||
|
for _ in range(FORGET_STEPS + 1):
|
||||||
|
t.update([])
|
||||||
|
assert t.n_active() == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_penned_position_promotes_track():
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(11.5, -16.0)]) # spawn inside the pen column
|
||||||
|
# is_penned is True for this point.
|
||||||
|
assert t.n_penned() == 1
|
||||||
|
assert t.n_active() == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_penned_tracks_persist():
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(11.5, -16.0)])
|
||||||
|
for _ in range(FORGET_STEPS * 2):
|
||||||
|
t.update([])
|
||||||
|
# Penned tracks are not forgotten.
|
||||||
|
assert t.n_penned() == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_caps_active_set():
|
||||||
|
t = SheepTracker()
|
||||||
|
# Spawn more than the cap, each well outside the others' gates.
|
||||||
|
for k in range(MAX_ACTIVE_TRACKS + 5):
|
||||||
|
t.update([(k * (GATE_M + 1.0), 0.0)])
|
||||||
|
assert t.n_active() <= MAX_ACTIVE_TRACKS
|
||||||
|
|
||||||
|
|
||||||
|
def test_tracker_reset_clears_state():
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(0.0, 0.0)])
|
||||||
|
t.reset()
|
||||||
|
assert t.n_active() == 0
|
||||||
|
assert t.step == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Consensus promotion
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _tracker_with_consensus(k: int = 3, radius: float = 0.5, max_age: int = 8):
|
||||||
|
from herding.config import TrackerConfig
|
||||||
|
return SheepTracker(tracker_cfg=TrackerConfig(
|
||||||
|
consensus_k=k, consensus_radius_m=radius, consensus_max_age=max_age,
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
def test_consensus_default_disabled():
|
||||||
|
"""With consensus_k=1 (default) the first detection is immediately visible."""
|
||||||
|
t = SheepTracker()
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
assert t.n_active() == 1
|
||||||
|
assert len(t.get_positions()) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_consensus_hides_one_shot_detection():
|
||||||
|
"""K>=2: a single detection that never reappears is filtered out."""
|
||||||
|
t = _tracker_with_consensus(k=3)
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
assert t.n_active() == 0 # candidate, not promoted
|
||||||
|
assert t.n_candidate() == 1
|
||||||
|
assert t.get_positions() == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_consensus_promotes_after_k_matches():
|
||||||
|
"""A real sheep visible for K frames promotes and appears in get_positions."""
|
||||||
|
t = _tracker_with_consensus(k=3)
|
||||||
|
for _ in range(3):
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
assert t.n_active() == 1
|
||||||
|
assert t.n_candidate() == 0
|
||||||
|
assert len(t.get_positions()) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_consensus_candidate_expires_quickly():
|
||||||
|
"""A candidate that fails to re-confirm within consensus_max_age dies."""
|
||||||
|
t = _tracker_with_consensus(k=3, max_age=5)
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
assert t.n_candidate() == 1
|
||||||
|
for _ in range(6): # > max_age empty frames
|
||||||
|
t.update([])
|
||||||
|
assert t.n_candidate() == 0
|
||||||
|
assert t.n_active() == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_consensus_tracker_does_not_promote_phantom_pen():
|
||||||
|
"""A one-shot detection inside the pen column must not latch as penned
|
||||||
|
while it is still a candidate."""
|
||||||
|
t = _tracker_with_consensus(k=3)
|
||||||
|
t.update([(11.5, -16.0)]) # gate-area FP, inside the pen column
|
||||||
|
# Not promoted, not penned — just a candidate.
|
||||||
|
assert t.n_penned() == 0
|
||||||
|
assert t.n_candidate() == 1
|
||||||
|
# And after one expiry window it disappears entirely.
|
||||||
|
for _ in range(10):
|
||||||
|
t.update([])
|
||||||
|
assert t.n_penned() == 0
|
||||||
|
assert t.n_candidate() == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_consensus_distinguishes_real_sheep_from_phantom():
|
||||||
|
"""Real sheep (continuous detections) promote; phantom (intermittent
|
||||||
|
detections at jittered positions outside consensus_radius) does not
|
||||||
|
appear in get_positions even while individual candidates are still
|
||||||
|
within the max-age window."""
|
||||||
|
t = _tracker_with_consensus(k=3, radius=0.4, max_age=4)
|
||||||
|
# Real sheep visible at (5, 0) every frame; phantom jitters > radius.
|
||||||
|
phantom_positions = [(10.0, 5.0), (10.5, 5.6), (11.1, 5.0), (10.0, 5.7)]
|
||||||
|
for k in range(4):
|
||||||
|
t.update([(5.0, 0.0), phantom_positions[k]])
|
||||||
|
positions = t.get_positions()
|
||||||
|
assert len(positions) == 1
|
||||||
|
real_xy = next(iter(positions.values()))
|
||||||
|
assert math.hypot(real_xy[0] - 5.0, real_xy[1]) < 0.5
|
||||||
|
# And once the candidate window has elapsed, every phantom has died.
|
||||||
|
for _ in range(8):
|
||||||
|
t.update([(5.0, 0.0)])
|
||||||
|
assert t.n_candidate() == 0
|
||||||
|
assert len(t.get_positions()) == 1
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
"""Benchmark LiDAR perception improvements.
|
||||||
|
|
||||||
|
Measures success rate, mean steps, and tracker quality metrics for
|
||||||
|
demo collection across multiple seeds. Compares configurations.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
python -m tools.benchmark_lidar --n-sheep 5 --seeds 15
|
||||||
|
HERDING_WORLD=field_round python -m tools.benchmark_lidar --n-sheep 5
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
from training.bc.collect import collect_one
|
||||||
|
from herding.control.universal import compute_action
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(n_sheep: int, n_seeds: int, max_steps: int = 100000,
|
||||||
|
drive_mode: str = "differential"):
|
||||||
|
results = []
|
||||||
|
t0 = time.time()
|
||||||
|
for seed in range(n_seeds):
|
||||||
|
obs, actions, success, steps = collect_one(
|
||||||
|
n_sheep, seed, max_steps, 5, compute_action,
|
||||||
|
frame_stack=1, privileged=False, drive_mode=drive_mode,
|
||||||
|
)
|
||||||
|
results.append({
|
||||||
|
"seed": seed,
|
||||||
|
"success": success,
|
||||||
|
"steps": steps,
|
||||||
|
"logged": len(obs),
|
||||||
|
})
|
||||||
|
tag = "+" if success else "x"
|
||||||
|
print(f" [{tag}] seed={seed:>2d} steps={steps:>6d}")
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
|
||||||
|
successes = [r for r in results if r["success"]]
|
||||||
|
failures = [r for r in results if not r["success"]]
|
||||||
|
n_ok = len(successes)
|
||||||
|
rate = 100.0 * n_ok / len(results)
|
||||||
|
|
||||||
|
mean_steps_ok = (sum(r["steps"] for r in successes) / n_ok) if n_ok else 0
|
||||||
|
mean_steps_all = sum(r["steps"] for r in results) / len(results)
|
||||||
|
|
||||||
|
print(f"\n Results: {n_ok}/{len(results)} success ({rate:.0f}%)")
|
||||||
|
print(f" Mean steps (success): {mean_steps_ok:>8.0f}")
|
||||||
|
print(f" Mean steps (all): {mean_steps_all:>8.0f}")
|
||||||
|
print(f" Elapsed: {elapsed:.0f}s")
|
||||||
|
return {
|
||||||
|
"n_sheep": n_sheep,
|
||||||
|
"n_seeds": n_seeds,
|
||||||
|
"success_rate": rate,
|
||||||
|
"n_success": n_ok,
|
||||||
|
"mean_steps_success": mean_steps_ok,
|
||||||
|
"mean_steps_all": mean_steps_all,
|
||||||
|
"elapsed_s": elapsed,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--n-sheep", type=int, default=5)
|
||||||
|
parser.add_argument("--seeds", type=int, default=15)
|
||||||
|
parser.add_argument("--max-steps", type=int, default=100000)
|
||||||
|
parser.add_argument("--drive-mode", default="differential",
|
||||||
|
choices=["differential", "mecanum"])
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
from herding.world.geometry import FIELD_SHAPE
|
||||||
|
print(f"[bench] world={FIELD_SHAPE} n_sheep={args.n_sheep} "
|
||||||
|
f"seeds={args.seeds} drive={args.drive_mode}")
|
||||||
|
print()
|
||||||
|
result = run_benchmark(args.n_sheep, args.seeds, args.max_steps,
|
||||||
|
args.drive_mode)
|
||||||
|
print()
|
||||||
|
print("[bench] summary:", result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Executable
+57
@@ -0,0 +1,57 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Measure the actual velocity response of the Webots mecanum robot and
|
||||||
|
# compare against the gym's first-order kinematics prediction.
|
||||||
|
#
|
||||||
|
# Uses HERDING_MODE=calibrate in the shepherd_dog controller, which applies
|
||||||
|
# a known fixed action for N steps, records GPS displacement, and computes
|
||||||
|
# the relative error vs gym prediction.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash tools/calibrate_mecanum.sh [N_STEPS]
|
||||||
|
# N_STEPS : steps to hold each action (default 150, ≈ 2.4 s real-time)
|
||||||
|
#
|
||||||
|
# Output:
|
||||||
|
# calibrate_mecanum.log — per-axis results printed and written here
|
||||||
|
#
|
||||||
|
# Target: < 10% relative error on each axis.
|
||||||
|
# If errors are high, tune coulombFriction / forceDependentSlip in
|
||||||
|
# tools/run_webots.sh (mecanum contactProperties block).
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
N_STEPS="${1:-150}"
|
||||||
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||||
|
LOG="$ROOT/calibrate_mecanum.log"
|
||||||
|
source "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/setup_env.sh"
|
||||||
|
|
||||||
|
echo "Running mecanum calibration (N_STEPS=$N_STEPS)..."
|
||||||
|
echo "Results will be written to: $LOG"
|
||||||
|
truncate -s 0 "$LOG"
|
||||||
|
|
||||||
|
run_calib() {
|
||||||
|
local vx="$1" vy="$2" om="$3"
|
||||||
|
echo " Testing vx=$vx vy=$vy om=$om ..."
|
||||||
|
rm -f "$ROOT/training/.run_done"
|
||||||
|
timeout --kill-after=15s 60 \
|
||||||
|
xvfb-run -a \
|
||||||
|
env WEBOTS_HEADLESS=1 WEBOTS_EXTRA_ARGS="--stdout --stderr" \
|
||||||
|
HERDING_MODE=calibrate HERDING_DRIVE=mecanum HERDING_WORLD=field \
|
||||||
|
CALIB_VX="$vx" CALIB_VY="$vy" CALIB_OM="$om" \
|
||||||
|
CALIB_N_STEPS="$N_STEPS" \
|
||||||
|
bash "$ROOT/tools/run_webots.sh" 0 calibrate mecanum field \
|
||||||
|
2>&1 | grep -E "cmd=|gym|webots|error" || true
|
||||||
|
pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
|
||||||
|
sleep 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Three test vectors: pure-x, pure-y, diagonal
|
||||||
|
run_calib 0.5 0.0 0.0
|
||||||
|
run_calib 0.0 0.5 0.0
|
||||||
|
run_calib 0.35 0.35 0.0
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Calibration results ==="
|
||||||
|
cat "$LOG" 2>/dev/null || echo "(no results written — check controller output above)"
|
||||||
|
echo ""
|
||||||
|
echo "Target: <10% error on each axis."
|
||||||
|
echo "If errors are high, tune coulombFriction / forceDependentSlip in"
|
||||||
|
echo "tools/run_webots.sh (mecanum contactProperties block)."
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
"""
|
|
||||||
Viewpoint inspector — prints position, orientation and FOV to the console
|
|
||||||
once per second. Attach as the controller of a dummy supervisor robot to
|
|
||||||
copy-paste exact camera values into field.wbt.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from controller import Supervisor
|
|
||||||
|
|
||||||
robot = Supervisor()
|
|
||||||
timestep = int(robot.getBasicTimeStep())
|
|
||||||
vp = robot.getFromDef("VIEWPOINT")
|
|
||||||
|
|
||||||
step = 0
|
|
||||||
while robot.step(timestep) != -1:
|
|
||||||
if step % 60 == 0:
|
|
||||||
pos = vp.getField("position").getSFVec3f()
|
|
||||||
ori = vp.getField("orientation").getSFRotation()
|
|
||||||
fov = vp.getField("fieldOfView").getSFFloat()
|
|
||||||
print(f"position: {pos[0]:.3f} {pos[1]:.3f} {pos[2]:.3f}")
|
|
||||||
print(f"orientation: {ori[0]:.3f} {ori[1]:.3f} {ori[2]:.3f} {ori[3]:.3f}")
|
|
||||||
print(f"fieldOfView: {fov:.3f}\n")
|
|
||||||
step += 1
|
|
||||||
Executable
+67
@@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Run one DAgger round on a (drive, world) combo.
|
||||||
|
#
|
||||||
|
# Usage: tools/dagger_round.sh <drive> <world> [seeds_per_n] [round_idx]
|
||||||
|
#
|
||||||
|
# Collects DAgger demos using the current BC policy as the actor and the
|
||||||
|
# universal teacher as the labeller, in the HERDING_WEBOTS preset env
|
||||||
|
# (140° FOV, tight tracker — matches deployment). Concatenates with the
|
||||||
|
# original BC demos, re-trains BC, and saves to runs/bc_dagger_<combo>/.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||||
|
cd "$ROOT"
|
||||||
|
|
||||||
|
DRIVE="${1:-differential}"
|
||||||
|
WORLD="${2:-field}"
|
||||||
|
SEEDS="${3:-15}"
|
||||||
|
ROUND="${4:-1}"
|
||||||
|
|
||||||
|
TAG="${DRIVE}_${WORLD}"
|
||||||
|
ORIG_DEMOS="training/bc/demos_${TAG}.npz"
|
||||||
|
DAGGER_DEMOS="training/bc/dagger${ROUND}_${TAG}.npz"
|
||||||
|
COMBINED_DEMOS="training/bc/combined${ROUND}_${TAG}.npz"
|
||||||
|
BC_DIR="training/runs/bc_${TAG}"
|
||||||
|
OUT_DIR="training/runs/bc_dagger${ROUND}_${TAG}"
|
||||||
|
|
||||||
|
case "$WORLD" in
|
||||||
|
field_round)
|
||||||
|
EPOCHS=150
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
EPOCHS=60
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
echo "=== DAgger round ${ROUND}: ${DRIVE}/${WORLD} ==="
|
||||||
|
echo " Actor policy: ${BC_DIR}/policy.zip"
|
||||||
|
echo " Output: ${OUT_DIR}/policy.zip"
|
||||||
|
|
||||||
|
# 1. Collect DAgger demos: BC drives, teacher labels (privileged + HERDING_WEBOTS).
|
||||||
|
python -m training.bc.collect \
|
||||||
|
--teacher universal --out "$DAGGER_DEMOS" \
|
||||||
|
--seeds-per-n "$SEEDS" --subsample 3 \
|
||||||
|
--frame-stack 4 --drive-mode "$DRIVE" --world "$WORLD" \
|
||||||
|
--max-steps 30000 \
|
||||||
|
--privileged --use-webots-preset \
|
||||||
|
--fp-rate 0.0 --action-smooth 0.55 --wheel-slip-std 0.05 \
|
||||||
|
--dagger-policy "$BC_DIR"
|
||||||
|
|
||||||
|
# 2. Concatenate original demos + dagger demos.
|
||||||
|
python - <<PY
|
||||||
|
import numpy as np
|
||||||
|
orig = np.load("${ORIG_DEMOS}")
|
||||||
|
dag = np.load("${DAGGER_DEMOS}")
|
||||||
|
obs = np.concatenate([orig["obs"], dag["obs"]], axis=0)
|
||||||
|
act = np.concatenate([orig["actions"], dag["actions"]], axis=0)
|
||||||
|
np.savez("${COMBINED_DEMOS}", obs=obs, actions=act,
|
||||||
|
meta=np.concatenate([orig["meta"], dag["meta"]], axis=0))
|
||||||
|
print(f"[combine] orig={orig['obs'].shape[0]} + dagger={dag['obs'].shape[0]} = {obs.shape[0]}")
|
||||||
|
PY
|
||||||
|
|
||||||
|
# 3. Re-train BC on combined demos.
|
||||||
|
python -m training.bc.pretrain \
|
||||||
|
--demos "$COMBINED_DEMOS" --out "$OUT_DIR" \
|
||||||
|
--epochs "$EPOCHS" --net-arch 512,512
|
||||||
|
|
||||||
|
echo "=== DAgger round ${ROUND} done: ${OUT_DIR}/policy.zip ==="
|
||||||
Executable
+86
@@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Full retrain + eval + Webots-validate pipeline.
|
||||||
|
#
|
||||||
|
# Usage: bash tools/full_pipeline.sh
|
||||||
|
#
|
||||||
|
# Output logs are written to the repo root:
|
||||||
|
# full_pipeline.log — main pipeline log
|
||||||
|
# stage_train.log — make train_all output
|
||||||
|
# stage_eval.log — make eval_all output
|
||||||
|
# stage_webots.log — Webots validation sweep
|
||||||
|
#
|
||||||
|
# Total runtime estimate: 8–12 hours.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||||
|
cd "$ROOT"
|
||||||
|
source "$ROOT/tools/setup_env.sh"
|
||||||
|
|
||||||
|
PIPELINE_LOG="$ROOT/full_pipeline.log"
|
||||||
|
TRAIN_LOG="$ROOT/stage_train.log"
|
||||||
|
EVAL_LOG="$ROOT/stage_eval.log"
|
||||||
|
WEBOTS_LOG="$ROOT/stage_webots.log"
|
||||||
|
truncate -s 0 "$PIPELINE_LOG" "$TRAIN_LOG" "$EVAL_LOG" "$WEBOTS_LOG"
|
||||||
|
|
||||||
|
log() { echo "[pipeline $(date +%H:%M:%S)] $*" | tee -a "$PIPELINE_LOG"; }
|
||||||
|
|
||||||
|
log "=== START full pipeline $(date) ==="
|
||||||
|
log ""
|
||||||
|
log "Phase 1/4: clean_all"
|
||||||
|
make clean_all 2>&1 | tee -a "$PIPELINE_LOG"
|
||||||
|
log ""
|
||||||
|
|
||||||
|
log "Phase 2/4: train_all (4 combos, ~8h)"
|
||||||
|
make train_all 2>&1 | tee -a "$TRAIN_LOG"
|
||||||
|
log " train_all finished"
|
||||||
|
log ""
|
||||||
|
|
||||||
|
log "Phase 3/4: eval_all (gym eval, ~30min)"
|
||||||
|
make eval_all 2>&1 | tee -a "$EVAL_LOG"
|
||||||
|
log " eval_all finished"
|
||||||
|
log ""
|
||||||
|
|
||||||
|
log "Phase 4/4: Webots validation sweep (~90min)"
|
||||||
|
truncate -s 0 "$WEBOTS_LOG"
|
||||||
|
|
||||||
|
run_cell() {
|
||||||
|
local MODE="$1" DRIVE="$2" WORLD="$3" N="$4"
|
||||||
|
echo "" | tee -a "$WEBOTS_LOG"
|
||||||
|
echo "=== $MODE $DRIVE $WORLD n=$N ===" | tee -a "$WEBOTS_LOG"
|
||||||
|
rm -f "$ROOT/training/.run_done"
|
||||||
|
local STDOUT="$ROOT/pipeline_${MODE}_${DRIVE}_${WORLD}_n${N}.stdout"
|
||||||
|
timeout --kill-after=15s 320 \
|
||||||
|
xvfb-run -a \
|
||||||
|
env WEBOTS_HEADLESS=1 WEBOTS_EXTRA_ARGS="--stdout --stderr" \
|
||||||
|
HERDING_SEED=42 \
|
||||||
|
bash tools/run_webots.sh "$N" "$MODE" "$DRIVE" "$WORLD" > "$STDOUT" 2>&1 || true
|
||||||
|
BEST=$(grep "GT_penned=" "$STDOUT" 2>/dev/null | awk -F'GT_penned=' '{print $2}' | awk '{split($1,a,"/"); print a[1]"/"a[2]}' | sort -t/ -k1,1n | tail -1)
|
||||||
|
grep -E "\[results\]" "$STDOUT" 2>/dev/null | head -1 | tee -a "$WEBOTS_LOG"
|
||||||
|
echo " best GT_penned: $BEST" | tee -a "$WEBOTS_LOG"
|
||||||
|
pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
|
||||||
|
sleep 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Differential drive: 4 controllers × 2 worlds × 2 n
|
||||||
|
for M in bc rl strombom sequential; do
|
||||||
|
for W in field field_round; do
|
||||||
|
for N in 5 10; do
|
||||||
|
run_cell "$M" differential "$W" "$N"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# Mecanum drive: 2 controllers × 2 worlds × 2 n
|
||||||
|
for M in bc rl; do
|
||||||
|
for W in field field_round; do
|
||||||
|
for N in 5 10; do
|
||||||
|
run_cell "$M" mecanum "$W" "$N"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
log ""
|
||||||
|
log "=== FULL PIPELINE DONE $(date) ==="
|
||||||
|
log ""
|
||||||
|
log "Summary:"
|
||||||
|
grep -E "=== |best GT_penned" "$WEBOTS_LOG" | tee -a "$PIPELINE_LOG"
|
||||||
@@ -0,0 +1,210 @@
|
|||||||
|
"""Generate ShepherdDogMecanum.proto wheel blocks with physical rollers.
|
||||||
|
|
||||||
|
Each wheel becomes:
|
||||||
|
HingeJoint (motor, axis 0 1 0 = body lateral)
|
||||||
|
-> Solid (wheel hub, rotation 0 -1 0 π/2)
|
||||||
|
children:
|
||||||
|
- WHEEL_VIS (visual, kept as-is for appearance)
|
||||||
|
- 8x HingeJoint (passive roller, axis tilted ±45° from wheel rotation
|
||||||
|
axis, tangent to the wheel circumference at the mount
|
||||||
|
point)
|
||||||
|
-> Solid (capsule)
|
||||||
|
boundingObject: a small Cylinder for the hub (smaller radius than the
|
||||||
|
roller circle so the hub doesn't touch the ground)
|
||||||
|
|
||||||
|
X-pattern roller tilt assignment:
|
||||||
|
FR, RL -> -45° (wheel-axis-relative)
|
||||||
|
FL, RR -> +45°
|
||||||
|
|
||||||
|
All math is done in the WHEEL SOLID's local frame. The wheel solid's
|
||||||
|
rotation `0 -1 0 π/2` takes wheel-local x -> body +z (up),
|
||||||
|
wheel-local y -> body +y (lateral, = wheel rotation axis),
|
||||||
|
wheel-local z -> body -x (rearward). Conversely, a body-frame offset
|
||||||
|
(dx, dy, dz) becomes (dz, dy, -dx) in wheel-local coords.
|
||||||
|
|
||||||
|
For a wheel rotating about body y at angle θ (θ=0 = body +x = forward,
|
||||||
|
θ=π/2 = body +z = top), the roller mount in body frame is
|
||||||
|
(R*cos(θ), 0, R*sin(θ)) relative to wheel centre. Tangent (radial-perp,
|
||||||
|
in the wheel-spin plane) is (-sin(θ), 0, cos(θ)); the wheel rotation
|
||||||
|
axis is (0, 1, 0). Roller axis tilted +45° from tangent toward wheel
|
||||||
|
axis:
|
||||||
|
axis_body(+45°) = (1/√2) * (-sin(θ), +1, cos(θ))
|
||||||
|
axis_body(-45°) = (1/√2) * (-sin(θ), -1, cos(θ))
|
||||||
|
|
||||||
|
Transformed to wheel-local: (dz, dy, -dx) on each component gives
|
||||||
|
mount_local = (R*sin(θ), 0, -R*cos(θ))
|
||||||
|
axis_local(+45) = (cos(θ)/√2, +1/√2, sin(θ)/√2)
|
||||||
|
axis_local(-45) = (cos(θ)/√2, -1/√2, sin(θ)/√2)
|
||||||
|
|
||||||
|
The Solid's `rotation` field needs to align the Capsule's default
|
||||||
|
axis (+y) with that local axis. The minimal axis-angle that does this:
|
||||||
|
rotation_axis = (sin(θ), 0, -cos(θ)) (unit)
|
||||||
|
rotation_angle = π/4 for +45° tilt, 3π/4 for -45° tilt
|
||||||
|
"""
|
||||||
|
import math
|
||||||
|
|
||||||
|
WHEEL_NAMES = {
|
||||||
|
# Tilt sign refers to roller-axis tilt direction relative to the wheel
|
||||||
|
# rotation axis (body +y). X-pattern requires rollers on each wheel to
|
||||||
|
# tilt INWARD toward the body centre. For a wheel at +y body coord, that
|
||||||
|
# means tilting toward -y; for a wheel at -y, tilting toward +y.
|
||||||
|
"fr": ("front right", +0.14, -0.14, +1), # +1 = +45° tilt (toward +y, inward)
|
||||||
|
"fl": ("front left", +0.14, +0.14, -1), # -1 = -45° tilt (toward -y, inward)
|
||||||
|
"rr": ("rear right", -0.14, -0.14, -1), # -1 (toward -y, "outward"...
|
||||||
|
"rl": ("rear left", -0.14, +0.14, +1), # +1 (toward +y, "outward"...
|
||||||
|
# ...for the rear pair the X-pattern flips so diagonal pairs FL+RR have
|
||||||
|
# SAME tilt direction in body frame, FR+RL the other. The signs above
|
||||||
|
# encode that: FR/RL both +1, FL/RR both -1.
|
||||||
|
}
|
||||||
|
|
||||||
|
R_ROLLER_OFFSET = 0.031 # roller-centre distance from wheel hub centre
|
||||||
|
R_ROLLER_RADIUS = 0.007
|
||||||
|
R_ROLLER_HEIGHT = 0.020
|
||||||
|
ROLLER_MASS = 0.003
|
||||||
|
HUB_RADIUS = 0.020 # < R_ROLLER_OFFSET - R_ROLLER_RADIUS so hub doesn't touch
|
||||||
|
HUB_HEIGHT = 0.022
|
||||||
|
HUB_MASS = 0.045
|
||||||
|
N_ROLLERS = 8
|
||||||
|
|
||||||
|
|
||||||
|
def wheel_block(key):
|
||||||
|
name, ax, ay, tilt_sign = WHEEL_NAMES[key]
|
||||||
|
contact_mat = "MecanumWheelA" if tilt_sign > 0 else "MecanumWheelB"
|
||||||
|
safe = name.replace(" ", "_").upper()
|
||||||
|
|
||||||
|
rollers = []
|
||||||
|
for k in range(N_ROLLERS):
|
||||||
|
theta = 2.0 * math.pi * k / N_ROLLERS
|
||||||
|
s, c = math.sin(theta), math.cos(theta)
|
||||||
|
# Mount position in wheel-local frame.
|
||||||
|
mx = R_ROLLER_OFFSET * s
|
||||||
|
my = 0.0
|
||||||
|
mz = -R_ROLLER_OFFSET * c
|
||||||
|
# Hinge axis in wheel-local frame.
|
||||||
|
ax_l = c / math.sqrt(2.0)
|
||||||
|
ay_l = tilt_sign / math.sqrt(2.0)
|
||||||
|
az_l = s / math.sqrt(2.0)
|
||||||
|
# Rotation that maps Capsule default axis (0,1,0) to (ax_l, ay_l, az_l).
|
||||||
|
rot_axis = (s, 0.0, -c)
|
||||||
|
rot_angle = math.pi / 4.0 if tilt_sign > 0 else 3.0 * math.pi / 4.0
|
||||||
|
rollers.append(f"""\
|
||||||
|
# Mecanum roller {k+1} (θ={math.degrees(theta):.0f}°)
|
||||||
|
HingeJoint {{
|
||||||
|
jointParameters HingeJointParameters {{
|
||||||
|
axis {ax_l:.6f} {ay_l:.6f} {az_l:.6f}
|
||||||
|
anchor {mx:.6f} {my:.6f} {mz:.6f}
|
||||||
|
}}
|
||||||
|
endPoint Solid {{
|
||||||
|
translation {mx:.6f} {my:.6f} {mz:.6f}
|
||||||
|
rotation {rot_axis[0]:.6f} {rot_axis[1]:.6f} {rot_axis[2]:.6f} {rot_angle:.6f}
|
||||||
|
children [
|
||||||
|
Shape {{
|
||||||
|
appearance PBRAppearance {{
|
||||||
|
baseColor 0.12 0.12 0.12
|
||||||
|
roughness 0.7
|
||||||
|
metalness 0.1
|
||||||
|
}}
|
||||||
|
geometry Capsule {{
|
||||||
|
height {R_ROLLER_HEIGHT}
|
||||||
|
radius {R_ROLLER_RADIUS}
|
||||||
|
subdivision 8
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
name "{name} roller {k+1}"
|
||||||
|
contactMaterial "{contact_mat}"
|
||||||
|
boundingObject Capsule {{
|
||||||
|
height {R_ROLLER_HEIGHT}
|
||||||
|
radius {R_ROLLER_RADIUS}
|
||||||
|
subdivision 8
|
||||||
|
}}
|
||||||
|
physics Physics {{
|
||||||
|
density -1
|
||||||
|
mass {ROLLER_MASS}
|
||||||
|
centerOfMass [
|
||||||
|
0 0 0
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}""")
|
||||||
|
rollers_str = "\n".join(rollers)
|
||||||
|
|
||||||
|
return f"""\
|
||||||
|
# ========== {name.upper()} WHEEL ==========
|
||||||
|
DEF {safe}_WHEEL_JOINT HingeJoint {{
|
||||||
|
jointParameters HingeJointParameters {{
|
||||||
|
axis 0 1 0
|
||||||
|
anchor {ax} {ay} 0.038
|
||||||
|
}}
|
||||||
|
device [
|
||||||
|
RotationalMotor {{
|
||||||
|
name "{name} wheel motor"
|
||||||
|
maxVelocity 70.0
|
||||||
|
maxTorque 20.0
|
||||||
|
}}
|
||||||
|
PositionSensor {{
|
||||||
|
name "{name} wheel sensor"
|
||||||
|
resolution 0.00628
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
endPoint Solid {{
|
||||||
|
translation {ax} {ay} 0.038
|
||||||
|
rotation 0 -1 0 1.570796
|
||||||
|
children [
|
||||||
|
# Visual hub only — the rollers below provide ground contact.
|
||||||
|
Pose {{
|
||||||
|
rotation 1 0 0 -1.5708
|
||||||
|
children [
|
||||||
|
Shape {{
|
||||||
|
appearance PBRAppearance {{
|
||||||
|
baseColor 0.5 0.5 0.5
|
||||||
|
roughness 0.3
|
||||||
|
metalness 0.7
|
||||||
|
}}
|
||||||
|
geometry Cylinder {{
|
||||||
|
height 0.018
|
||||||
|
radius {HUB_RADIUS - 0.002}
|
||||||
|
subdivision 16
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
Shape {{
|
||||||
|
appearance PBRAppearance {{
|
||||||
|
baseColor 0.6 0.6 0.6
|
||||||
|
roughness 0.2
|
||||||
|
metalness 0.8
|
||||||
|
}}
|
||||||
|
geometry Cylinder {{
|
||||||
|
height 0.022
|
||||||
|
radius 0.008
|
||||||
|
subdivision 8
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
{rollers_str}
|
||||||
|
]
|
||||||
|
name "{name} wheel"
|
||||||
|
boundingObject Pose {{
|
||||||
|
rotation 1 0 0 -1.5708
|
||||||
|
children [
|
||||||
|
Cylinder {{
|
||||||
|
height {HUB_HEIGHT}
|
||||||
|
radius {HUB_RADIUS}
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
physics Physics {{
|
||||||
|
density -1
|
||||||
|
mass {HUB_MASS}
|
||||||
|
centerOfMass [
|
||||||
|
0 0 0
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for k in ("fr", "fl", "rr", "rl"):
|
||||||
|
print(wheel_block(k))
|
||||||
|
print()
|
||||||
Executable
+288
@@ -0,0 +1,288 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Launch Webots with N sheep enabled and the chosen controller mode.
|
||||||
|
# Generates a temporary world file in worlds/field_test.wbt with sheep
|
||||||
|
# beyond N commented out, sets the env vars the dog controller reads,
|
||||||
|
# then execs Webots on it.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# tools/run_webots.sh [N] [MODE] [DRIVE] [WORLD]
|
||||||
|
# N : number of active sheep (1..10), default 10
|
||||||
|
# MODE : "bc" | "rl" | "strombom" | "sequential", default "bc"
|
||||||
|
# DRIVE : "differential" | "mecanum", default "differential"
|
||||||
|
# WORLD : base world name (without .wbt), default "field"
|
||||||
|
# Supported: "field" (rectangular), "field_round" (circular)
|
||||||
|
#
|
||||||
|
# Examples:
|
||||||
|
# tools/run_webots.sh 10 bc # behaviour-cloned MLP, diff drive
|
||||||
|
# tools/run_webots.sh 10 rl mecanum # KL-PPO fine-tune, mecanum wheels
|
||||||
|
# tools/run_webots.sh 5 sequential field_round # analytic baseline, round field
|
||||||
|
# tools/run_webots.sh 3 strombom mecanum field_round # Strömbom, mecanum, round
|
||||||
|
#
|
||||||
|
# Notes:
|
||||||
|
# * bc loads training/runs/bc/policy.zip, rl loads training/runs/rl.
|
||||||
|
# Override via HERDING_POLICY_DIR=/path/to/run env var.
|
||||||
|
# * Conda env "tir" must be active (provides stable-baselines3 + torch).
|
||||||
|
#
|
||||||
|
# Headless-ish (no 3D view, fast sim, no modal dialogs):
|
||||||
|
# WEBOTS_HEADLESS=1 make webots N=10 MODE=rl DRIVE=mecanum
|
||||||
|
# WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
|
||||||
|
# This passes --no-rendering --minimize --mode=fast --batch to webots.
|
||||||
|
# Webots still needs a display (Qt); on a machine without one use e.g.:
|
||||||
|
# xvfb-run -a env WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl mecanum
|
||||||
|
# Optional extra CLI tokens (space-separated):
|
||||||
|
# WEBOTS_EXTRA_ARGS="--stdout --stderr" WEBOTS_HEADLESS=1 tools/run_webots.sh 10 rl
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Make sure HERDING_PYTHON is resolved and on PATH so Webots inherits
|
||||||
|
# the right interpreter (controllers/{shepherd_dog,sheep}/runtime.ini
|
||||||
|
# both read $HERDING_PYTHON via env-var expansion).
|
||||||
|
source "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/setup_env.sh"
|
||||||
|
|
||||||
|
N=${1:-10}
|
||||||
|
MODE=${2:-bc}
|
||||||
|
DRIVE=${3:-differential}
|
||||||
|
WORLD=${4:-field}
|
||||||
|
|
||||||
|
if (( N < 0 || N > 10 )); then
|
||||||
|
echo "N must be 0..10, got $N" >&2; exit 1
|
||||||
|
fi
|
||||||
|
case "$MODE" in
|
||||||
|
bc|rl|strombom|sequential|universal|calibrate) ;;
|
||||||
|
*) echo "MODE must be bc|rl|strombom|sequential|universal|calibrate, got '$MODE'" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
case "$DRIVE" in
|
||||||
|
differential|mecanum) ;;
|
||||||
|
*) echo "DRIVE must be differential|mecanum, got '$DRIVE'" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||||
|
SRC="$ROOT/worlds/${WORLD}.wbt"
|
||||||
|
if [[ ! -f "$SRC" ]]; then
|
||||||
|
echo "World file not found: $SRC" >&2; exit 1
|
||||||
|
fi
|
||||||
|
DST="$ROOT/worlds/${WORLD}_test.wbt"
|
||||||
|
|
||||||
|
if [[ -n "${HERDING_POLICY_DIR:-}" ]]; then
|
||||||
|
RESOLVED_POLICY_DIR="$HERDING_POLICY_DIR"
|
||||||
|
else
|
||||||
|
# The training pipeline writes policies to:
|
||||||
|
# training/runs/{bc,rl}_<drive>_<world>
|
||||||
|
# Try that first; fall back to the drive-only and finally the
|
||||||
|
# bare-mode legacy paths so older policy checkouts still load.
|
||||||
|
if [[ "$MODE" == "rl" ]]; then
|
||||||
|
BASE="rl"
|
||||||
|
else
|
||||||
|
BASE="bc"
|
||||||
|
fi
|
||||||
|
for CAND in \
|
||||||
|
"$ROOT/training/runs/${BASE}_${DRIVE}_${WORLD}" \
|
||||||
|
"$ROOT/training/runs/${BASE}_${DRIVE}" \
|
||||||
|
"$ROOT/training/runs/${BASE}"
|
||||||
|
do
|
||||||
|
if [[ -d "$CAND" ]]; then
|
||||||
|
RESOLVED_POLICY_DIR="$CAND"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
: "${RESOLVED_POLICY_DIR:=$ROOT/training/runs/${BASE}_${DRIVE}_${WORLD}}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
cp "$SRC" "$DST"
|
||||||
|
|
||||||
|
# LiDAR FOV variant. Mecanum defaults to 360° (the trained mecanum
|
||||||
|
# target); diff defaults to 140°. Override with HERDING_LIDAR=140 or
|
||||||
|
# HERDING_LIDAR=360 for ablations.
|
||||||
|
if [[ -z "${HERDING_LIDAR:-}" ]]; then
|
||||||
|
if [[ "$DRIVE" == "mecanum" ]]; then
|
||||||
|
LIDAR_VARIANT="360"
|
||||||
|
else
|
||||||
|
LIDAR_VARIANT="140"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
LIDAR_VARIANT="$HERDING_LIDAR"
|
||||||
|
fi
|
||||||
|
if [[ "$LIDAR_VARIANT" != "140" && "$LIDAR_VARIANT" != "360" ]]; then
|
||||||
|
echo "HERDING_LIDAR must be 140 or 360, got '$LIDAR_VARIANT'" >&2; exit 1
|
||||||
|
fi
|
||||||
|
export HERDING_LIDAR="$LIDAR_VARIANT"
|
||||||
|
|
||||||
|
# Swap robot proto based on drive mode + LiDAR variant.
|
||||||
|
# Base worlds reference ShepherdDog (diff-drive 140°). The four
|
||||||
|
# combinations the launcher supports:
|
||||||
|
# diff + 140° → ShepherdDog.proto (default)
|
||||||
|
# diff + 360° → ShepherdDog360.proto (FOV ablation for diff)
|
||||||
|
# mecanum+ 140° → ShepherdDogMecanum.proto
|
||||||
|
# mecanum+ 360° → ShepherdDogMecanum360.proto (the trained mecanum target)
|
||||||
|
if [[ "$DRIVE" == "mecanum" && "$LIDAR_VARIANT" == "360" ]]; then
|
||||||
|
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum360.proto"|' "$DST"
|
||||||
|
sed -i 's|^ShepherdDog {|ShepherdDogMecanum360 {|' "$DST"
|
||||||
|
elif [[ "$DRIVE" == "mecanum" ]]; then
|
||||||
|
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDogMecanum.proto"|' "$DST"
|
||||||
|
sed -i 's|^ShepherdDog {|ShepherdDogMecanum {|' "$DST"
|
||||||
|
elif [[ "$LIDAR_VARIANT" == "360" ]]; then
|
||||||
|
sed -i 's|"../protos/ShepherdDog.proto"|"../protos/ShepherdDog360.proto"|' "$DST"
|
||||||
|
sed -i 's|^ShepherdDog {|ShepherdDog360 {|' "$DST"
|
||||||
|
fi
|
||||||
|
if [[ "$DRIVE" == "mecanum" ]]; then
|
||||||
|
# Wheel-ground friction. The chassis is driven kinematically by
|
||||||
|
# the Supervisor (see drive_mecanum in controllers/shepherd_dog),
|
||||||
|
# so these properties only affect wheel-spin visuals, not the
|
||||||
|
# robot's motion. coulombFriction 2.0 plus a small
|
||||||
|
# forceDependentSlip keeps the rollers from locking up against
|
||||||
|
# the ground.
|
||||||
|
python3 -c "
|
||||||
|
with open('$DST', 'r') as f:
|
||||||
|
txt = f.read()
|
||||||
|
mec = ''' ContactProperties {
|
||||||
|
material1 \"MecanumWheelA\"
|
||||||
|
coulombFriction [
|
||||||
|
2.0
|
||||||
|
]
|
||||||
|
bounce 0
|
||||||
|
forceDependentSlip [
|
||||||
|
0.005
|
||||||
|
]
|
||||||
|
softCFM 0.0001
|
||||||
|
}
|
||||||
|
ContactProperties {
|
||||||
|
material1 \"MecanumWheelB\"
|
||||||
|
coulombFriction [
|
||||||
|
2.0
|
||||||
|
]
|
||||||
|
bounce 0
|
||||||
|
forceDependentSlip [
|
||||||
|
0.005
|
||||||
|
]
|
||||||
|
softCFM 0.0001
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
# The contactProperties array closes with ' ]\n}' (2-space indent ] then WorldInfo }).
|
||||||
|
# Insert the new block just before that closing ].
|
||||||
|
txt = txt.replace('\n ]\n}', '\n' + mec + ' ]\n}', 1)
|
||||||
|
with open('$DST', 'w') as f:
|
||||||
|
f.write(txt)
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Comment out sheep N+1..10 by prefixing the matching Sheep { ... } line.
|
||||||
|
for i in $(seq $((N+1)) 10); do
|
||||||
|
sed -i "s|^Sheep .* \"sheep${i}\".*|# &|" "$DST"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Dual-dog axis split. When HERDING_NDOGS=2 the launcher replaces the
|
||||||
|
# single dog node in the world with two named dogs whose customData
|
||||||
|
# carries the axis assignment (x or y); the controller masks the
|
||||||
|
# off-axis component of every action.
|
||||||
|
NDOGS="${HERDING_NDOGS:-1}"
|
||||||
|
if [[ "$NDOGS" != "1" && "$NDOGS" != "2" ]]; then
|
||||||
|
echo "HERDING_NDOGS must be 1 or 2, got '$NDOGS'" >&2; exit 1
|
||||||
|
fi
|
||||||
|
if [[ "$NDOGS" == "2" ]]; then
|
||||||
|
DOG_NODE_NAME="ShepherdDog"
|
||||||
|
if [[ "$DRIVE" == "mecanum" ]]; then
|
||||||
|
DOG_NODE_NAME="ShepherdDogMecanum"
|
||||||
|
elif [[ "$LIDAR_VARIANT" == "360" ]]; then
|
||||||
|
DOG_NODE_NAME="ShepherdDog360"
|
||||||
|
fi
|
||||||
|
python3 - "$DST" "$DOG_NODE_NAME" <<'PY'
|
||||||
|
import re, sys
|
||||||
|
path, node = sys.argv[1], sys.argv[2]
|
||||||
|
with open(path) as f:
|
||||||
|
txt = f.read()
|
||||||
|
# Match the single existing dog block from "ShepherdDog{,360,Mecanum} {"
|
||||||
|
# up to its closing "}" on a line by itself.
|
||||||
|
pattern = re.compile(rf"^{re.escape(node)} \{{\n(.*?\n)^\}}\n", re.MULTILINE | re.DOTALL)
|
||||||
|
m = pattern.search(txt)
|
||||||
|
if m is None:
|
||||||
|
sys.exit(f"[run_webots] could not locate single-dog block ({node}) for split")
|
||||||
|
two_dogs = (
|
||||||
|
f"{node} {{\n"
|
||||||
|
f" translation -4 -10 0.5\n"
|
||||||
|
f" rotation 0 0 1 1.5708\n"
|
||||||
|
f' name "ShepherdDogX"\n'
|
||||||
|
f' customData "axis=x"\n'
|
||||||
|
f' controller "shepherd_dog"\n'
|
||||||
|
f"}}\n"
|
||||||
|
f"{node} {{\n"
|
||||||
|
f" translation 4 -10 0.5\n"
|
||||||
|
f" rotation 0 0 1 1.5708\n"
|
||||||
|
f' name "ShepherdDogY"\n'
|
||||||
|
f' customData "axis=y"\n'
|
||||||
|
f' controller "shepherd_dog"\n'
|
||||||
|
f"}}\n"
|
||||||
|
)
|
||||||
|
with open(path, 'w') as f:
|
||||||
|
f.write(txt[:m.start()] + two_dogs + txt[m.end():])
|
||||||
|
PY
|
||||||
|
fi
|
||||||
|
export HERDING_NDOGS="$NDOGS"
|
||||||
|
|
||||||
|
active=$(grep -c '^Sheep' "$DST" || true)
|
||||||
|
ndog=$(grep -cE '^(ShepherdDog|ShepherdDog360|ShepherdDogMecanum) \{' "$DST" || true)
|
||||||
|
echo "------------------------------------------------------------"
|
||||||
|
echo "World : $DST"
|
||||||
|
echo "Mode : $MODE"
|
||||||
|
echo "Drive : $DRIVE"
|
||||||
|
echo "LiDAR : ${LIDAR_VARIANT}°"
|
||||||
|
echo "Dogs : $ndog (axis-split=${NDOGS})"
|
||||||
|
echo "Sheep : $active active"
|
||||||
|
echo "Policy dir : $RESOLVED_POLICY_DIR"
|
||||||
|
echo "------------------------------------------------------------"
|
||||||
|
|
||||||
|
# Webots strips HERDING_* env vars from controller subprocesses in some
|
||||||
|
# setups, so we also write a runtime config file the controller reads.
|
||||||
|
cat > "$ROOT/herding_runtime.cfg" <<EOF
|
||||||
|
HERDING_MODE=$MODE
|
||||||
|
HERDING_POLICY_DIR=$RESOLVED_POLICY_DIR
|
||||||
|
HERDING_DRIVE=$DRIVE
|
||||||
|
HERDING_WORLD=$WORLD
|
||||||
|
HERDING_LIDAR=$LIDAR_VARIANT
|
||||||
|
HERDING_NDOGS=$NDOGS
|
||||||
|
HERDING_AXIS_LEAK=${HERDING_AXIS_LEAK:-0.3}
|
||||||
|
HERDING_USE_GT=${HERDING_USE_GT:-0}
|
||||||
|
HERDING_SEED=${HERDING_SEED:-}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
export HERDING_MODE="$MODE"
|
||||||
|
export HERDING_POLICY_DIR="$RESOLVED_POLICY_DIR"
|
||||||
|
export HERDING_DRIVE="$DRIVE"
|
||||||
|
export HERDING_WORLD="$WORLD"
|
||||||
|
export HERDING_LIDAR="$LIDAR_VARIANT"
|
||||||
|
|
||||||
|
# The controller writes this sentinel when all GT sheep are penned. We
|
||||||
|
# poll for it and kill Webots so the run finishes cleanly instead of
|
||||||
|
# idling for minutes after the task is done.
|
||||||
|
DONE_FILE="$ROOT/training/.run_done"
|
||||||
|
mkdir -p "$(dirname "$DONE_FILE")"
|
||||||
|
rm -f "$DONE_FILE"
|
||||||
|
|
||||||
|
if [[ "${WEBOTS_HEADLESS:-}" == "1" ]]; then
|
||||||
|
echo "[run_webots] headless flags: --no-rendering --minimize --mode=fast --batch"
|
||||||
|
# shellcheck disable=SC2086
|
||||||
|
webots --no-rendering --minimize --mode=fast --batch ${WEBOTS_EXTRA_ARGS:-} "$DST" &
|
||||||
|
else
|
||||||
|
# shellcheck disable=SC2086
|
||||||
|
webots ${WEBOTS_EXTRA_ARGS:-} "$DST" &
|
||||||
|
fi
|
||||||
|
WEBOTS_PID=$!
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
kill "$WEBOTS_PID" 2>/dev/null || true
|
||||||
|
wait "$WEBOTS_PID" 2>/dev/null || true
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
trap cleanup INT TERM
|
||||||
|
|
||||||
|
# Poll for the sentinel; bail when Webots exits on its own or when the
|
||||||
|
# user closes the window.
|
||||||
|
while kill -0 "$WEBOTS_PID" 2>/dev/null; do
|
||||||
|
if [[ -f "$DONE_FILE" ]]; then
|
||||||
|
echo "[run_webots] all sheep penned — closing Webots"
|
||||||
|
sleep 1 # let the controller print its line
|
||||||
|
kill "$WEBOTS_PID" 2>/dev/null || true
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
wait "$WEBOTS_PID" 2>/dev/null || true
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
# Source this from your shell before running the launchers:
|
||||||
|
#
|
||||||
|
# source tools/setup_env.sh
|
||||||
|
#
|
||||||
|
# The launchers (`tools/run_webots.sh`, `tools/webots_sweep*.sh`,
|
||||||
|
# `tools/calibrate_mecanum.sh`) and the Webots controllers (via
|
||||||
|
# `controllers/*/runtime.ini`) all read $HERDING_PYTHON to decide
|
||||||
|
# which Python interpreter to use. The default below points at the
|
||||||
|
# project author's conda env — edit this file or override the var in
|
||||||
|
# your shell to match your own setup.
|
||||||
|
|
||||||
|
: "${HERDING_PYTHON:=/home/jalf/miniconda3/envs/tir/bin/python3}"
|
||||||
|
export HERDING_PYTHON
|
||||||
|
|
||||||
|
# Prepend the Python's bin/ to PATH so subprocesses pick up the same
|
||||||
|
# interpreter (used by Webots when it doesn't read runtime.ini, and
|
||||||
|
# by any Python tooling launched by the bash scripts).
|
||||||
|
export PATH="$(dirname "$HERDING_PYTHON"):$PATH"
|
||||||
|
|
||||||
|
if [[ ! -x "$HERDING_PYTHON" ]]; then
|
||||||
|
echo "[setup_env] WARNING: HERDING_PYTHON=$HERDING_PYTHON is not executable." >&2
|
||||||
|
echo "[setup_env] Edit tools/setup_env.sh or 'export HERDING_PYTHON=...' yourself." >&2
|
||||||
|
fi
|
||||||
Executable
+197
@@ -0,0 +1,197 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Interactive Webots launcher. Prompts for the experiment knobs
|
||||||
|
# (mode, drive, world, LiDAR FOV, number of dogs, flock size, GT
|
||||||
|
# bypass) and then dispatches to tools/run_webots.sh with the
|
||||||
|
# selected configuration.
|
||||||
|
#
|
||||||
|
# Usage: bash tools/webots_menu.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
|
ROOT="$( cd "$SCRIPT_DIR/.." && pwd )"
|
||||||
|
|
||||||
|
# Resolve HERDING_PYTHON the same way every other launcher does.
|
||||||
|
source "$SCRIPT_DIR/setup_env.sh"
|
||||||
|
|
||||||
|
# ----- Cosmetics ----------------------------------------------------
|
||||||
|
if [[ -t 1 ]]; then
|
||||||
|
B=$'\e[1m'; D=$'\e[2m'; R=$'\e[0m'
|
||||||
|
G=$'\e[32m'; Y=$'\e[33m'; C=$'\e[36m'
|
||||||
|
else
|
||||||
|
B=""; D=""; R=""; G=""; Y=""; C=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
banner () {
|
||||||
|
cat <<EOF
|
||||||
|
${B}${C}┌──────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Shepherd-dog Webots launcher (interactive) │
|
||||||
|
└──────────────────────────────────────────────────────────────────┘${R}
|
||||||
|
|
||||||
|
${Y}⚠ Python interpreter${R}
|
||||||
|
This script and the Webots controllers read ${B}\$HERDING_PYTHON${R} to
|
||||||
|
decide which interpreter to start. Current value:
|
||||||
|
${G}$HERDING_PYTHON${R}
|
||||||
|
${D}If that path is wrong on your machine, edit ${R}${B}tools/setup_env.sh${R}${D}
|
||||||
|
or export HERDING_PYTHON=/path/to/python3 in your shell.${R}
|
||||||
|
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
ask_choice () {
|
||||||
|
# ask_choice "Prompt" "default" "label1:val1" "label2:val2" ...
|
||||||
|
local prompt="$1" default="$2"; shift 2
|
||||||
|
local i=1 labels=() values=()
|
||||||
|
for opt in "$@"; do
|
||||||
|
labels+=("${opt%%:*}")
|
||||||
|
values+=("${opt#*:}")
|
||||||
|
done
|
||||||
|
while true; do
|
||||||
|
echo "${B}$prompt${R}"
|
||||||
|
for i in "${!labels[@]}"; do
|
||||||
|
local marker=" "
|
||||||
|
[[ "${values[$i]}" == "$default" ]] && marker="${G}*${R}"
|
||||||
|
printf " %s %d) ${B}%s${R}\n" "$marker" "$((i+1))" "${labels[$i]}"
|
||||||
|
done
|
||||||
|
printf " Choice [${G}1-${#labels[@]}${R}, default ${G}%s${R}]: " "$default"
|
||||||
|
local raw; read -r raw || true
|
||||||
|
raw="${raw:-}"
|
||||||
|
if [[ -z "$raw" ]]; then
|
||||||
|
CHOICE="$default"; return
|
||||||
|
fi
|
||||||
|
if [[ "$raw" =~ ^[0-9]+$ ]] && (( raw >= 1 && raw <= ${#labels[@]} )); then
|
||||||
|
CHOICE="${values[$((raw-1))]}"; return
|
||||||
|
fi
|
||||||
|
echo " ${Y}invalid — try again${R}"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
ask_int () {
|
||||||
|
# ask_int "Prompt" default min max
|
||||||
|
local prompt="$1" default="$2" lo="$3" hi="$4"
|
||||||
|
while true; do
|
||||||
|
printf "${B}%s${R} [${G}%s${R}-${G}%s${R}, default ${G}%s${R}]: " "$prompt" "$lo" "$hi" "$default"
|
||||||
|
local raw; read -r raw || true
|
||||||
|
raw="${raw:-$default}"
|
||||||
|
if [[ "$raw" =~ ^[0-9]+$ ]] && (( raw >= lo && raw <= hi )); then
|
||||||
|
CHOICE="$raw"; return
|
||||||
|
fi
|
||||||
|
echo " ${Y}must be an integer in [$lo, $hi]${R}"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# ----- Prompts ------------------------------------------------------
|
||||||
|
banner
|
||||||
|
|
||||||
|
ask_choice "Mode" "bc" \
|
||||||
|
"BC (behaviour-cloned MLP):bc" \
|
||||||
|
"RL (KL-PPO fine-tune):rl" \
|
||||||
|
"Strömbom (analytic):strombom" \
|
||||||
|
"Sequential (analytic):sequential" \
|
||||||
|
"Universal teacher (BC source):universal"
|
||||||
|
MODE="$CHOICE"
|
||||||
|
echo
|
||||||
|
|
||||||
|
ask_choice "Drive" "differential" \
|
||||||
|
"Differential (2-wheel):differential" \
|
||||||
|
"Mecanum (4-wheel, omnidirectional):mecanum"
|
||||||
|
DRIVE="$CHOICE"
|
||||||
|
echo
|
||||||
|
|
||||||
|
ask_choice "World" "field" \
|
||||||
|
"Rectangular (field):field" \
|
||||||
|
"Round (field_round):field_round"
|
||||||
|
WORLD="$CHOICE"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# LiDAR ablation only applies to differential (mecanum proto has its
|
||||||
|
# own 140° sensor that we don't fork).
|
||||||
|
if [[ "$DRIVE" == "differential" ]]; then
|
||||||
|
ask_choice "LiDAR FOV" "140" \
|
||||||
|
"140° (canonical, ShepherdDog.proto):140" \
|
||||||
|
"360° (FOV ablation, ShepherdDog360.proto):360"
|
||||||
|
LIDAR="$CHOICE"
|
||||||
|
else
|
||||||
|
LIDAR="140"
|
||||||
|
echo "${D}LiDAR: 140° (mecanum drive — no 360° proto variant available)${R}"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
ask_choice "Number of shepherd dogs" "1" \
|
||||||
|
"1 — solo:1" \
|
||||||
|
"2 — axis-split (X-dog + Y-dog):2"
|
||||||
|
NDOGS="$CHOICE"
|
||||||
|
echo
|
||||||
|
|
||||||
|
if [[ "$NDOGS" == "2" ]]; then
|
||||||
|
ask_choice "Axis-split leak (soft mask gain on the off-axis)" "0.3" \
|
||||||
|
"0.0 — strict (each dog only moves on its axis; tends to deadlock):0.0" \
|
||||||
|
"0.3 — default (off-axis at 30% gain; verified to pen):0.3" \
|
||||||
|
"0.5 — softer:0.5" \
|
||||||
|
"1.0 — no mask (both dogs run full policy):1.0"
|
||||||
|
AXIS_LEAK="$CHOICE"
|
||||||
|
echo
|
||||||
|
fi
|
||||||
|
|
||||||
|
ask_int "Flock size (number of sheep)" 5 1 10
|
||||||
|
N_SHEEP="$CHOICE"
|
||||||
|
echo
|
||||||
|
|
||||||
|
ask_choice "Perception" "lidar" \
|
||||||
|
"LiDAR (canonical):lidar" \
|
||||||
|
"Ground-truth bypass (HERDING_USE_GT=1):gt"
|
||||||
|
if [[ "$CHOICE" == "gt" ]]; then USE_GT=1; else USE_GT=0; fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
ask_choice "Seed" "random" \
|
||||||
|
"Random (different sheep wander each run):random" \
|
||||||
|
"Fixed seed (reproducible run — pick an integer):fixed"
|
||||||
|
if [[ "$CHOICE" == "fixed" ]]; then
|
||||||
|
ask_int " → Seed value" 0 0 1000000
|
||||||
|
SEED="$CHOICE"
|
||||||
|
else
|
||||||
|
SEED=""
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
ask_choice "Headless?" "no" \
|
||||||
|
"No — show the Webots window:no" \
|
||||||
|
"Yes — headless, fast simulation (xvfb-run):yes"
|
||||||
|
HEADLESS="$CHOICE"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ----- Summary ------------------------------------------------------
|
||||||
|
cat <<EOF
|
||||||
|
${B}${C}── Launch configuration ──────────────────────────────────────────${R}
|
||||||
|
Mode : ${B}$MODE${R}
|
||||||
|
Drive : ${B}$DRIVE${R}
|
||||||
|
World : ${B}$WORLD${R}
|
||||||
|
LiDAR FOV : ${B}${LIDAR}°${R}
|
||||||
|
Dogs : ${B}$NDOGS${R}$( [[ "$NDOGS" == "2" ]] && echo " (axis_leak=${B}$AXIS_LEAK${R})" )
|
||||||
|
Sheep : ${B}$N_SHEEP${R}
|
||||||
|
Perception : ${B}$( [[ "$USE_GT" == "1" ]] && echo "GT bypass" || echo "LiDAR" )${R}
|
||||||
|
Seed : ${B}$( [[ -n "$SEED" ]] && echo "$SEED" || echo "random" )${R}
|
||||||
|
Headless : ${B}$HEADLESS${R}
|
||||||
|
${C}──────────────────────────────────────────────────────────────────${R}
|
||||||
|
|
||||||
|
EOF
|
||||||
|
printf "${B}Launch? [Y/n] ${R}"
|
||||||
|
read -r confirm || true
|
||||||
|
if [[ "$confirm" =~ ^[Nn] ]]; then
|
||||||
|
echo "Aborted."; exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ----- Dispatch -----------------------------------------------------
|
||||||
|
export HERDING_LIDAR="$LIDAR"
|
||||||
|
export HERDING_NDOGS="$NDOGS"
|
||||||
|
export HERDING_USE_GT="$USE_GT"
|
||||||
|
[[ -n "${AXIS_LEAK:-}" ]] && export HERDING_AXIS_LEAK="$AXIS_LEAK"
|
||||||
|
[[ -n "$SEED" ]] && export HERDING_SEED="$SEED"
|
||||||
|
if [[ "$HEADLESS" == "yes" ]]; then
|
||||||
|
export WEBOTS_HEADLESS=1
|
||||||
|
export WEBOTS_EXTRA_ARGS="--stdout --stderr"
|
||||||
|
exec xvfb-run -a bash "$SCRIPT_DIR/run_webots.sh" \
|
||||||
|
"$N_SHEEP" "$MODE" "$DRIVE" "$WORLD"
|
||||||
|
else
|
||||||
|
exec bash "$SCRIPT_DIR/run_webots.sh" \
|
||||||
|
"$N_SHEEP" "$MODE" "$DRIVE" "$WORLD"
|
||||||
|
fi
|
||||||
Executable
+101
@@ -0,0 +1,101 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Headless Webots sweep across modes, drives, worlds, and flock sizes.
|
||||||
|
# Runs sequentially; each trial gets a hard 150s wall-clock timeout.
|
||||||
|
# Results are written to webots_sweep.log (tab-separated) and printed.
|
||||||
|
#
|
||||||
|
# Usage: bash tools/webots_sweep.sh [output_log]
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||||
|
OUT="${1:-$ROOT/webots_sweep.log}"
|
||||||
|
TIMEOUT_S=120 # ~80k steps in fast headless mode — covers slow-converging physics
|
||||||
|
|
||||||
|
# Source the project python path. Edit tools/setup_env.sh or override
|
||||||
|
# HERDING_PYTHON in your shell to point at a Python with SB3+PyTorch.
|
||||||
|
source "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/setup_env.sh"
|
||||||
|
|
||||||
|
# Columns: mode drive world n_sheep success steps
|
||||||
|
printf "%-12s %-14s %-12s %7s %7s %s\n" \
|
||||||
|
"mode" "drive" "world" "n_sheep" "success" "steps" | tee "$OUT"
|
||||||
|
printf '%s\n' "$(printf '%-12s %-14s %-12s %7s %7s %s' \
|
||||||
|
'----' '-----' '-----' '-------' '-------' '-----')" | tee -a "$OUT"
|
||||||
|
|
||||||
|
run_trial() {
|
||||||
|
local mode="$1" drive="$2" world="$3" n="$4" policy_dir="${5:-}"
|
||||||
|
|
||||||
|
local done_file="$ROOT/training/.run_done"
|
||||||
|
rm -f "$done_file"
|
||||||
|
|
||||||
|
local extra_env=()
|
||||||
|
extra_env+=(WEBOTS_HEADLESS=1)
|
||||||
|
extra_env+=(WEBOTS_EXTRA_ARGS="--stdout --stderr")
|
||||||
|
extra_env+=(HERDING_USE_GT=0)
|
||||||
|
if [[ -n "$policy_dir" ]]; then
|
||||||
|
extra_env+=(HERDING_POLICY_DIR="$ROOT/$policy_dir")
|
||||||
|
fi
|
||||||
|
|
||||||
|
local raw
|
||||||
|
raw=$(
|
||||||
|
timeout --kill-after=15s "$TIMEOUT_S" \
|
||||||
|
xvfb-run -a \
|
||||||
|
env "${extra_env[@]}" \
|
||||||
|
bash "$ROOT/tools/run_webots.sh" "$n" "$mode" "$drive" "$world" \
|
||||||
|
2>&1
|
||||||
|
) || true
|
||||||
|
# Webots-bin and Xvfb can survive the timeout; kill any orphans now.
|
||||||
|
pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
|
||||||
|
sleep 1
|
||||||
|
|
||||||
|
local success="FAIL"
|
||||||
|
local steps="timeout"
|
||||||
|
|
||||||
|
if echo "$raw" | grep -q "\[dog\] all .* sheep penned at step"; then
|
||||||
|
success="OK"
|
||||||
|
steps=$(echo "$raw" | grep "\[dog\] all .* sheep penned at step" \
|
||||||
|
| grep -oP 'step \K[0-9]+')
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf "%-12s %-14s %-12s %7s %7s %s\n" \
|
||||||
|
"$mode" "$drive" "$world" "$n" "$success" "$steps" | tee -a "$OUT"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Analytic baselines (differential only — that's the story context)
|
||||||
|
# strombom / sequential: canonical baselines
|
||||||
|
# universal: the actual teacher used to collect BC demos
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
for mode in strombom sequential universal; do
|
||||||
|
for world in field field_round; do
|
||||||
|
for n in 5 10; do
|
||||||
|
run_trial "$mode" differential "$world" "$n"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# BC — world-specific policies
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
for drive in differential mecanum; do
|
||||||
|
for world in field field_round; do
|
||||||
|
for n in 5 10; do
|
||||||
|
run_trial bc "$drive" "$world" "$n" \
|
||||||
|
"training/runs/bc_${drive}_${world}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# RL_FAST — MODE=rl with explicit HERDING_POLICY_DIR pointing to rl_fast dirs
|
||||||
|
# (run_webots.sh rejects "rl_fast" as a mode; "rl" + policy override is correct)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
for drive in differential mecanum; do
|
||||||
|
for world in field field_round; do
|
||||||
|
for n in 5 10; do
|
||||||
|
run_trial rl "$drive" "$world" "$n" \
|
||||||
|
"training/runs/rl_fast_${drive}_${world}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Sweep complete. Results saved to: $OUT"
|
||||||
Executable
+101
@@ -0,0 +1,101 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Headless Webots sweep across modes, drives, worlds, and flock sizes.
|
||||||
|
# Runs sequentially; each trial gets a hard 150s wall-clock timeout.
|
||||||
|
# Results are written to webots_sweep.log (tab-separated) and printed.
|
||||||
|
#
|
||||||
|
# Usage: bash tools/webots_sweep.sh [output_log]
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
|
||||||
|
OUT="${1:-$ROOT/webots_sweep.log}"
|
||||||
|
TIMEOUT_S=120 # ~80k steps in fast headless mode — covers slow-converging physics
|
||||||
|
|
||||||
|
# Source the project python path. Edit tools/setup_env.sh or override
|
||||||
|
# HERDING_PYTHON in your shell to point at a Python with SB3+PyTorch.
|
||||||
|
source "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/setup_env.sh"
|
||||||
|
|
||||||
|
# Columns: mode drive world n_sheep success steps
|
||||||
|
printf "%-12s %-14s %-12s %7s %7s %s\n" \
|
||||||
|
"mode" "drive" "world" "n_sheep" "success" "steps" | tee "$OUT"
|
||||||
|
printf '%s\n' "$(printf '%-12s %-14s %-12s %7s %7s %s' \
|
||||||
|
'----' '-----' '-----' '-------' '-------' '-----')" | tee -a "$OUT"
|
||||||
|
|
||||||
|
run_trial() {
|
||||||
|
local mode="$1" drive="$2" world="$3" n="$4" policy_dir="${5:-}"
|
||||||
|
|
||||||
|
local done_file="$ROOT/training/.run_done"
|
||||||
|
rm -f "$done_file"
|
||||||
|
|
||||||
|
local extra_env=()
|
||||||
|
extra_env+=(WEBOTS_HEADLESS=1)
|
||||||
|
extra_env+=(WEBOTS_EXTRA_ARGS="--stdout --stderr")
|
||||||
|
extra_env+=(HERDING_USE_GT=1)
|
||||||
|
if [[ -n "$policy_dir" ]]; then
|
||||||
|
extra_env+=(HERDING_POLICY_DIR="$ROOT/$policy_dir")
|
||||||
|
fi
|
||||||
|
|
||||||
|
local raw
|
||||||
|
raw=$(
|
||||||
|
timeout --kill-after=15s "$TIMEOUT_S" \
|
||||||
|
xvfb-run -a \
|
||||||
|
env "${extra_env[@]}" \
|
||||||
|
bash "$ROOT/tools/run_webots.sh" "$n" "$mode" "$drive" "$world" \
|
||||||
|
2>&1
|
||||||
|
) || true
|
||||||
|
# Webots-bin and Xvfb can survive the timeout; kill any orphans now.
|
||||||
|
pkill -9 -f "webots-bin|Xvfb" 2>/dev/null || true
|
||||||
|
sleep 1
|
||||||
|
|
||||||
|
local success="FAIL"
|
||||||
|
local steps="timeout"
|
||||||
|
|
||||||
|
if echo "$raw" | grep -q "\[dog\] all .* sheep penned at step"; then
|
||||||
|
success="OK"
|
||||||
|
steps=$(echo "$raw" | grep "\[dog\] all .* sheep penned at step" \
|
||||||
|
| grep -oP 'step \K[0-9]+')
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf "%-12s %-14s %-12s %7s %7s %s\n" \
|
||||||
|
"$mode" "$drive" "$world" "$n" "$success" "$steps" | tee -a "$OUT"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Analytic baselines (differential only — that's the story context)
|
||||||
|
# strombom / sequential: canonical baselines
|
||||||
|
# universal: the actual teacher used to collect BC demos
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
for mode in strombom sequential universal; do
|
||||||
|
for world in field field_round; do
|
||||||
|
for n in 5 10; do
|
||||||
|
run_trial "$mode" differential "$world" "$n"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# BC — world-specific policies
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
for drive in differential mecanum; do
|
||||||
|
for world in field field_round; do
|
||||||
|
for n in 5 10; do
|
||||||
|
run_trial bc "$drive" "$world" "$n" \
|
||||||
|
"training/runs/bc_${drive}_${world}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# RL_FAST — MODE=rl with explicit HERDING_POLICY_DIR pointing to rl_fast dirs
|
||||||
|
# (run_webots.sh rejects "rl_fast" as a mode; "rl" + policy override is correct)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
for drive in differential mecanum; do
|
||||||
|
for world in field field_round; do
|
||||||
|
for n in 5 10; do
|
||||||
|
run_trial rl "$drive" "$world" "$n" \
|
||||||
|
"training/runs/rl_fast_${drive}_${world}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Sweep complete. Results saved to: $OUT"
|
||||||
@@ -0,0 +1,118 @@
|
|||||||
|
# Training and evaluation details
|
||||||
|
|
||||||
|
Command-level companion to the root README. Covers demo collection,
|
||||||
|
behaviour cloning, PPO fine-tuning, and evaluation flags; use the root
|
||||||
|
README for the high-level architecture and Webots quick start.
|
||||||
|
|
||||||
|
The pipeline is two strictly-sequential stages per `(drive, world)`
|
||||||
|
combo:
|
||||||
|
|
||||||
|
```
|
||||||
|
sim demos (universal teacher on tracker output, K=4 frame stack)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
bc/pretrain.py ──► runs/bc_<drive>_<world> (MLP)
|
||||||
|
│
|
||||||
|
▼ KL-regularised PPO fine-tune
|
||||||
|
│
|
||||||
|
runs/rl_<drive>_<world> (deployed `rl` mode)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
```
|
||||||
|
herding_env.py — Gymnasium env (LiDAR raycast + tracker by default)
|
||||||
|
bc/collect.py — universal-teacher sim demos
|
||||||
|
bc/pretrain.py — MSE + cosine BC of (obs, action) demos into MlpPolicy
|
||||||
|
rl/train.py — KL-regularised PPO fine-tune of a BC checkpoint
|
||||||
|
rl/train_lstm.py — RecurrentPPO variant (ablation)
|
||||||
|
eval.py — multi-seed analytic / learned policy comparison
|
||||||
|
runs/ — checkpoints (gitignored except for policy.zip)
|
||||||
|
```
|
||||||
|
|
||||||
|
Unit + integration tests live in the top-level `tests/`. Run with
|
||||||
|
`make test` or `python -m pytest tests/`.
|
||||||
|
|
||||||
|
## End-to-end pipeline
|
||||||
|
|
||||||
|
The simplest way to train one combo is the project-root Makefile:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make DRIVE=differential WORLD=field # demos → bc → rl → eval
|
||||||
|
make DRIVE=differential WORLD=field_round
|
||||||
|
make train_all # all four combos sequentially
|
||||||
|
```
|
||||||
|
|
||||||
|
The individual stages below are kept explicit for cases where you
|
||||||
|
want to tune a single step.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Sim demos with the active-scan + universal teacher under LiDAR
|
||||||
|
# perception. K=4 frame stack so the MLP has temporal context.
|
||||||
|
python -m training.bc.collect \
|
||||||
|
--teacher universal --drive-mode differential --world field \
|
||||||
|
--out training/bc/demos_differential_field.npz \
|
||||||
|
--seeds-per-n 15 --subsample 3 --frame-stack 4
|
||||||
|
|
||||||
|
# 2. Behaviour-clone the demos.
|
||||||
|
python -m training.bc.pretrain \
|
||||||
|
--demos training/bc/demos_differential_field.npz \
|
||||||
|
--out training/runs/bc_differential_field \
|
||||||
|
--epochs 60 --net-arch 512,512
|
||||||
|
|
||||||
|
# 3. KL-regularised PPO fine-tune of bc.
|
||||||
|
python -m training.rl.train \
|
||||||
|
--bc training/runs/bc_differential_field \
|
||||||
|
--out training/runs/rl_differential_field \
|
||||||
|
--drive-mode differential --world field \
|
||||||
|
--total-timesteps 1000000
|
||||||
|
|
||||||
|
# 4. Multi-seed eval (env-side, fast).
|
||||||
|
python -m training.eval --policy training/runs/rl_differential_field \
|
||||||
|
--drive-mode differential --world field \
|
||||||
|
--max-flock 10 --max-steps 15000 --n-seeds 10
|
||||||
|
```
|
||||||
|
|
||||||
|
`bc/pretrain.py` saves the **best-val_cos** snapshot, not the final
|
||||||
|
epoch — multi-modal teachers make training noisy and the last epoch
|
||||||
|
is often worse than an earlier one.
|
||||||
|
|
||||||
|
`rl/train.py` loads BC weights into both a trainable policy and a
|
||||||
|
frozen reference, fixes `log_std` small, and adds `β · KL(π‖π_ref)` to
|
||||||
|
the loss so the policy can only move within a trust region around BC.
|
||||||
|
See the file header for hyperparameter rationale.
|
||||||
|
|
||||||
|
## Mecanum retraining
|
||||||
|
|
||||||
|
For mecanum runs, pass `--use-webots-preset`. Both `collect.py` and
|
||||||
|
`train.py` detect `--drive-mode mecanum` and switch to the
|
||||||
|
`HERDING_MEC_WEBOTS` preset, which matches the physical-roller
|
||||||
|
Webots proto's strafe efficiency (~0.4) and forward bleed (~−0.28).
|
||||||
|
Training without this preset produces a policy that herds in textbook
|
||||||
|
gym mecanum but not in Webots.
|
||||||
|
|
||||||
|
## Analytic teachers
|
||||||
|
|
||||||
|
| Name | What it does | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| `strombom` | Strömbom 2014 — collect when flock is scattered, drive CoM otherwise | Round-world aware (radially-inward fallback when natural target lies outside the curved boundary) |
|
||||||
|
| `sequential` | Three-phase: collect, drive, then single-target push for the last 1–2 stragglers | Alternative to strombom |
|
||||||
|
| `universal` | Strömbom core + mecanum omega + last-straggler recovery | Used as the BC demo teacher |
|
||||||
|
|
||||||
|
All three are wrapped at demo-collection time in
|
||||||
|
`herding/control/active_scan.py:ActiveScanTeacher`, which adds an
|
||||||
|
opening in-place rotation, walk-to-centre when the LiDAR sees
|
||||||
|
nothing, and near-sheep speed modulation (same modulation
|
||||||
|
`herding/control/modulation.py` applies to every dog mode at
|
||||||
|
inference).
|
||||||
|
|
||||||
|
## Evaluating analytic teachers directly
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m training.eval --policy strombom \
|
||||||
|
--drive-mode differential --world field \
|
||||||
|
--max-flock 10 --max-steps 15000 --n-seeds 10
|
||||||
|
python -m training.eval --policy sequential \
|
||||||
|
--drive-mode differential --world field_round \
|
||||||
|
--max-flock 10 --max-steps 15000 --n-seeds 10
|
||||||
|
```
|
||||||
@@ -0,0 +1,297 @@
|
|||||||
|
"""Collect (obs, action) demonstrations from an analytic teacher.
|
||||||
|
|
||||||
|
Runs the chosen teacher across a grid of ``(n_sheep, seed)`` combos at
|
||||||
|
full difficulty, logs every Nth ``(obs, action)`` pair, and saves
|
||||||
|
successful trajectories to ``.npz`` for behaviour cloning. The teacher
|
||||||
|
is wrapped in :class:`ActiveScanTeacher` by default so it operates on
|
||||||
|
the same partial-obs view the student will have at deployment.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
python -m training.bc.collect --teacher universal --drive-mode differential \\
|
||||||
|
--world field --out training/bc/demos_differential_field.npz --frame-stack 4
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Configure field geometry before other herding imports read it at module level.
|
||||||
|
from herding.world.geometry import configure_from_args as _configure_from_args
|
||||||
|
_configure_from_args()
|
||||||
|
|
||||||
|
from herding.control.active_scan import ActiveScanTeacher
|
||||||
|
from herding.world.geometry import PEN_ENTRY, FIELD_SHAPE
|
||||||
|
from herding.control.sequential import compute_action as sequential_action
|
||||||
|
from herding.control.strombom import compute_action as strombom_action
|
||||||
|
from herding.control.universal import compute_action as universal_action
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
|
||||||
|
|
||||||
|
TEACHERS = {
|
||||||
|
"sequential": sequential_action,
|
||||||
|
"strombom": strombom_action,
|
||||||
|
"universal": universal_action,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _call_teacher(fn, dog_xy, dog_heading, sheep_positions, pen_target,
|
||||||
|
drive_mode="differential"):
|
||||||
|
"""Call any teacher function and return (vx, vy, omega, mode).
|
||||||
|
|
||||||
|
Normalizes across 3-tuple teachers (vx, vy, mode) and 4-tuple
|
||||||
|
universal teacher (vx, vy, omega, mode). ActiveScanTeacher (when
|
||||||
|
invoked with drive_mode="mecanum") propagates the base teacher's
|
||||||
|
omega — see test_active_scan_preserves_mecanum_omega.
|
||||||
|
"""
|
||||||
|
# The universal teacher and ActiveScanTeacher accept the extended
|
||||||
|
# (dog_xy, heading, sheep, pen, drive_mode) signature. Older
|
||||||
|
# teachers accept (dog_xy, sheep, pen). Detect by trying the
|
||||||
|
# extended call first.
|
||||||
|
try:
|
||||||
|
result = fn(dog_xy, dog_heading, sheep_positions, pen_target,
|
||||||
|
drive_mode)
|
||||||
|
except TypeError:
|
||||||
|
try:
|
||||||
|
result = fn(dog_xy, dog_heading, sheep_positions, pen_target)
|
||||||
|
except TypeError:
|
||||||
|
result = fn(dog_xy, sheep_positions, pen_target)
|
||||||
|
|
||||||
|
if len(result) == 4:
|
||||||
|
return result # (vx, vy, omega, mode)
|
||||||
|
vx, vy, mode = result
|
||||||
|
return vx, vy, 0.0, mode
|
||||||
|
|
||||||
|
|
||||||
|
def collect_one(n_sheep: int, seed: int, max_steps: int, subsample: int,
|
||||||
|
teacher_fn, frame_stack: int = 1, privileged: bool = False,
|
||||||
|
drive_mode: str = "differential", herding_cfg=None,
|
||||||
|
actor_policy=None):
|
||||||
|
"""Collect (obs, teacher_action) pairs from one episode.
|
||||||
|
|
||||||
|
``actor_policy`` (DAgger mode): a callable ``policy(obs) -> action`` that
|
||||||
|
drives the env. The teacher still labels each visited state. If ``None``
|
||||||
|
(default), the teacher drives.
|
||||||
|
"""
|
||||||
|
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
||||||
|
difficulty=1.0, seed=seed, frame_stack=frame_stack,
|
||||||
|
drive_mode=drive_mode, herding_cfg=herding_cfg)
|
||||||
|
obs, _ = env.reset(seed=seed)
|
||||||
|
obs_list, action_list = [], []
|
||||||
|
scan_teacher = ActiveScanTeacher(teacher_fn)
|
||||||
|
for step in range(max_steps):
|
||||||
|
if privileged:
|
||||||
|
positions = {f"s{i}": (float(env.sheep_x[i]), float(env.sheep_y[i]))
|
||||||
|
for i in range(env.n_sheep) if not env.sheep_penned[i]}
|
||||||
|
if not positions:
|
||||||
|
break
|
||||||
|
vx, vy, omega, _mode = _call_teacher(
|
||||||
|
teacher_fn, (env.dog_x, env.dog_y), env.dog_heading,
|
||||||
|
positions, PEN_ENTRY, drive_mode,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
positions = env.perceived_positions()
|
||||||
|
result = _call_teacher(
|
||||||
|
scan_teacher, (env.dog_x, env.dog_y), env.dog_heading,
|
||||||
|
positions, PEN_ENTRY, drive_mode,
|
||||||
|
)
|
||||||
|
vx, vy, omega, _mode = result
|
||||||
|
if drive_mode == "mecanum":
|
||||||
|
teacher_action = np.array([vx, vy, omega], dtype=np.float32)
|
||||||
|
else:
|
||||||
|
teacher_action = np.array([vx, vy], dtype=np.float32)
|
||||||
|
if step % subsample == 0:
|
||||||
|
obs_list.append(obs.copy())
|
||||||
|
action_list.append(teacher_action.copy())
|
||||||
|
# In DAgger mode the policy drives; otherwise the teacher does.
|
||||||
|
step_action = (actor_policy(obs) if actor_policy is not None
|
||||||
|
else teacher_action)
|
||||||
|
obs, _r, term, trunc, _info = env.step(step_action)
|
||||||
|
if term or trunc:
|
||||||
|
break
|
||||||
|
success = bool(env.sheep_penned.all())
|
||||||
|
return (
|
||||||
|
np.asarray(obs_list, dtype=np.float32),
|
||||||
|
np.asarray(action_list, dtype=np.float32),
|
||||||
|
success,
|
||||||
|
env.steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--out", required=True,
|
||||||
|
help="Output .npz path (convention: "
|
||||||
|
"training/bc/demos_<drive>_<world>.npz).")
|
||||||
|
parser.add_argument("--n-sheep-list", default="1,2,3,5,8,10")
|
||||||
|
parser.add_argument("--seeds-per-n", type=int, default=15)
|
||||||
|
parser.add_argument("--max-steps", type=int, default=30000)
|
||||||
|
parser.add_argument("--subsample", type=int, default=5,
|
||||||
|
help="Keep every Nth (obs, action) pair.")
|
||||||
|
parser.add_argument("--keep-failures", action="store_true",
|
||||||
|
help="Include partial-success trajectories. Default off.")
|
||||||
|
parser.add_argument("--teacher", default="universal",
|
||||||
|
choices=list(TEACHERS.keys()),
|
||||||
|
help="Which analytic teacher to demonstrate.")
|
||||||
|
parser.add_argument("--frame-stack", type=int, default=1,
|
||||||
|
help="Concatenate the last K obs into a "
|
||||||
|
"(32·K)-D vector for the policy.")
|
||||||
|
parser.add_argument("--privileged", action="store_true",
|
||||||
|
help="Teacher reads ground truth instead of "
|
||||||
|
"tracker output (asymmetric BC).")
|
||||||
|
parser.add_argument("--drive-mode", default="differential",
|
||||||
|
choices=["differential", "mecanum"],
|
||||||
|
help="Drive mode for the dog robot.")
|
||||||
|
parser.add_argument("--world", default=None,
|
||||||
|
choices=["field", "field_round"],
|
||||||
|
help="World shape. If not set, uses HERDING_WORLD "
|
||||||
|
"env var or defaults to 'field'. Must be set "
|
||||||
|
"before geometry is imported.")
|
||||||
|
# Domain randomisation — applied to the gym env during collection so
|
||||||
|
# the teacher demonstrates under the same noise the policy will face.
|
||||||
|
parser.add_argument("--fp-rate", type=float, default=0.0,
|
||||||
|
help="Mean false-positive detections injected per "
|
||||||
|
"step (Poisson λ). 0 = clean sim (default).")
|
||||||
|
parser.add_argument("--action-smooth", type=float, default=0.0,
|
||||||
|
help="EMA coefficient on dog actions (0 = none). "
|
||||||
|
"Set to 0.55 to match the Webots controller.")
|
||||||
|
parser.add_argument("--wheel-slip-std", type=float, default=0.0,
|
||||||
|
help="Gaussian noise (rad/s) on wheel speeds for "
|
||||||
|
"mecanum dynamics domain randomisation.")
|
||||||
|
parser.add_argument("--dagger-policy", default=None,
|
||||||
|
help="Path to a BC/PPO policy directory. When set, "
|
||||||
|
"the policy drives the env (DAgger) while the "
|
||||||
|
"teacher labels every visited state.")
|
||||||
|
parser.add_argument("--use-webots-preset", action="store_true",
|
||||||
|
help="Use HERDING_WEBOTS preset (140° FOV + tight "
|
||||||
|
"tracker). Match this to deployment for DAgger.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Validate --world matches geometry (already configured by the
|
||||||
|
# early pre-parse above, or by HERDING_WORLD env var).
|
||||||
|
if args.world is not None and args.world != FIELD_SHAPE:
|
||||||
|
print(f"[demos] WARNING: --world={args.world} but geometry is "
|
||||||
|
f"'{FIELD_SHAPE}'. This should not happen — file a bug.")
|
||||||
|
|
||||||
|
from herding.config import (
|
||||||
|
HerdingConfig, HERDING_WEBOTS, HERDING_MEC_WEBOTS, HERDING_MEC_WEBOTS_360,
|
||||||
|
DomainRandomConfig, RobotConfig,
|
||||||
|
)
|
||||||
|
if args.use_webots_preset:
|
||||||
|
# Mecanum uses the 360° preset (the deployable mecanum target);
|
||||||
|
# diff drive keeps the canonical 140° preset.
|
||||||
|
if args.drive_mode == "mecanum":
|
||||||
|
base = HERDING_MEC_WEBOTS_360
|
||||||
|
preset_name = "HERDING_MEC_WEBOTS_360"
|
||||||
|
else:
|
||||||
|
base = HERDING_WEBOTS
|
||||||
|
preset_name = "HERDING_WEBOTS"
|
||||||
|
# Small compass noise for mecanum training (robustness margin
|
||||||
|
# for the Webots compass sensor).
|
||||||
|
compass_std = 0.1 if args.drive_mode == "mecanum" else 0.0
|
||||||
|
herding_cfg = base.replace(
|
||||||
|
domain_random=DomainRandomConfig(
|
||||||
|
fp_rate=args.fp_rate,
|
||||||
|
wheel_slip_std=args.wheel_slip_std,
|
||||||
|
compass_noise_std=compass_std,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(
|
||||||
|
action_smooth=args.action_smooth,
|
||||||
|
strafe_efficiency=base.robot.strafe_efficiency,
|
||||||
|
strafe_to_forward_bleed=base.robot.strafe_to_forward_bleed,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
print(f"[demos] {preset_name} preset + DR: fp_rate={args.fp_rate} "
|
||||||
|
f"action_smooth={args.action_smooth} wheel_slip_std={args.wheel_slip_std} "
|
||||||
|
f"strafe_eff={herding_cfg.robot.strafe_efficiency:.2f} "
|
||||||
|
f"compass_noise={compass_std}")
|
||||||
|
else:
|
||||||
|
herding_cfg = None
|
||||||
|
if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
|
||||||
|
herding_cfg = HerdingConfig(
|
||||||
|
domain_random=DomainRandomConfig(
|
||||||
|
fp_rate=args.fp_rate,
|
||||||
|
wheel_slip_std=args.wheel_slip_std,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(action_smooth=args.action_smooth),
|
||||||
|
)
|
||||||
|
print(f"[demos] domain-random: fp_rate={args.fp_rate} "
|
||||||
|
f"action_smooth={args.action_smooth} "
|
||||||
|
f"wheel_slip_std={args.wheel_slip_std}")
|
||||||
|
|
||||||
|
actor_policy = None
|
||||||
|
if args.dagger_policy is not None:
|
||||||
|
# DAgger: failures are the most valuable data (off-policy states
|
||||||
|
# where the student needs teacher correction). Always keep them.
|
||||||
|
args.keep_failures = True
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from pathlib import Path as _P
|
||||||
|
run = _P(args.dagger_policy)
|
||||||
|
for name in ("policy.zip", "final.zip"):
|
||||||
|
if (run / name).exists():
|
||||||
|
zip_path = run / name
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f"No policy found in {run} (tried policy.zip, final.zip)")
|
||||||
|
_model = PPO.load(str(zip_path), device="auto")
|
||||||
|
print(f"[demos] DAgger mode: actor = {zip_path}")
|
||||||
|
def actor_policy(obs):
|
||||||
|
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
|
||||||
|
a, _ = _model.predict(obs_b, deterministic=True)
|
||||||
|
return a[0]
|
||||||
|
|
||||||
|
teacher_fn = TEACHERS[args.teacher]
|
||||||
|
print(f"[demos] teacher: {args.teacher} world: {FIELD_SHAPE}")
|
||||||
|
|
||||||
|
n_sheep_list = [int(x) for x in args.n_sheep_list.split(",")]
|
||||||
|
print(f"[demos] grid: n_sheep={n_sheep_list}, seeds={args.seeds_per_n}, "
|
||||||
|
f"max_steps={args.max_steps}, subsample={args.subsample}")
|
||||||
|
|
||||||
|
all_obs, all_actions, all_meta = [], [], []
|
||||||
|
t_start = time.time()
|
||||||
|
n_success = 0; n_total = 0
|
||||||
|
|
||||||
|
for n in n_sheep_list:
|
||||||
|
for seed in range(args.seeds_per_n):
|
||||||
|
obs, actions, success, total_steps = collect_one(
|
||||||
|
n, seed, args.max_steps, args.subsample, teacher_fn,
|
||||||
|
frame_stack=args.frame_stack, privileged=args.privileged,
|
||||||
|
drive_mode=args.drive_mode, herding_cfg=herding_cfg,
|
||||||
|
actor_policy=actor_policy,
|
||||||
|
)
|
||||||
|
n_total += 1
|
||||||
|
if success:
|
||||||
|
n_success += 1
|
||||||
|
keep = success or args.keep_failures
|
||||||
|
if keep and len(obs) > 0:
|
||||||
|
all_obs.append(obs)
|
||||||
|
all_actions.append(actions)
|
||||||
|
all_meta.append((n, seed, len(obs), int(success), total_steps))
|
||||||
|
tag = "✓" if success else "✗"
|
||||||
|
print(f" [{tag}] n={n:>2d} seed={seed:>2d} steps={total_steps:>6d} "
|
||||||
|
f"logged={len(obs):>5d}")
|
||||||
|
|
||||||
|
if not all_obs:
|
||||||
|
raise RuntimeError("No trajectories kept — try --keep-failures.")
|
||||||
|
|
||||||
|
obs = np.concatenate(all_obs, axis=0)
|
||||||
|
actions = np.concatenate(all_actions, axis=0)
|
||||||
|
meta = np.array(all_meta, dtype=np.int32)
|
||||||
|
|
||||||
|
Path(args.out).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
np.savez(args.out, obs=obs, actions=actions, meta=meta)
|
||||||
|
|
||||||
|
elapsed = time.time() - t_start
|
||||||
|
print(f"\n=== {n_success}/{n_total} trajectories successful ({100*n_success/n_total:.0f}%) ===")
|
||||||
|
print(f"=== {len(obs)} transitions saved to {args.out} ===")
|
||||||
|
print(f"=== obs={obs.shape}, actions={actions.shape}, elapsed={elapsed:.0f}s ===")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,238 @@
|
|||||||
|
"""Behaviour cloning of an analytic teacher into an SB3 MlpPolicy.
|
||||||
|
|
||||||
|
Trains the mean-action head against ``(obs, action)`` demos from
|
||||||
|
``training.bc.collect`` using ``MSE + (1 − cos_sim)`` — the cosine
|
||||||
|
term prevents collapse toward zero against unit-vector targets. The
|
||||||
|
best-by-val_cos snapshot is restored at the end of training because
|
||||||
|
multi-modal teachers make the last epoch unreliable.
|
||||||
|
|
||||||
|
Output zip is loadable by ``PPO.load(...)`` and consumed by
|
||||||
|
``HERDING_MODE=bc`` in the dog controller.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
python -m training.bc.pretrain \\
|
||||||
|
--demos training/bc/demos_differential_field.npz \\
|
||||||
|
--out training/runs/bc_differential_field
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, TensorDataset
|
||||||
|
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||||
|
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
|
||||||
|
|
||||||
|
def build_model(net_arch_pi, net_arch_vf, log_std_init: float,
|
||||||
|
frame_stack: int = 1, drive_mode: str = "differential"):
|
||||||
|
"""Build a fresh SB3 PPO solely as a vehicle for the policy weights.
|
||||||
|
|
||||||
|
PPO's training-loop plumbing isn't used during BC. ``frame_stack``
|
||||||
|
must match the demo file so the env's obs space agrees with the
|
||||||
|
recorded obs shape.
|
||||||
|
"""
|
||||||
|
env = DummyVecEnv([lambda: HerdingEnv(frame_stack=frame_stack,
|
||||||
|
drive_mode=drive_mode)])
|
||||||
|
model = PPO(
|
||||||
|
"MlpPolicy", env,
|
||||||
|
policy_kwargs=dict(
|
||||||
|
net_arch=dict(pi=net_arch_pi, vf=net_arch_vf),
|
||||||
|
log_std_init=log_std_init,
|
||||||
|
),
|
||||||
|
verbose=0,
|
||||||
|
)
|
||||||
|
return model, env
|
||||||
|
|
||||||
|
|
||||||
|
def forward_mean(policy, obs_batch):
|
||||||
|
"""Return the deterministic mean action for an obs batch.
|
||||||
|
|
||||||
|
SB3's ActorCriticPolicy routes ``forward`` through a Distribution
|
||||||
|
wrapper; we replicate the underlying chain
|
||||||
|
``extract_features → mlp_extractor → action_net``.
|
||||||
|
"""
|
||||||
|
features = policy.extract_features(obs_batch)
|
||||||
|
pi_features = features[0] if isinstance(features, tuple) else features
|
||||||
|
latent_pi, _ = policy.mlp_extractor(pi_features)
|
||||||
|
return policy.action_net(latent_pi)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--demos", required=True,
|
||||||
|
help="Path to demos .npz collected by training.bc.collect.")
|
||||||
|
parser.add_argument("--out", required=True,
|
||||||
|
help="Output directory (convention: "
|
||||||
|
"training/runs/bc_<drive>_<world>).")
|
||||||
|
parser.add_argument("--epochs", type=int, default=60)
|
||||||
|
parser.add_argument("--batch-size", type=int, default=256)
|
||||||
|
parser.add_argument("--lr", type=float, default=1e-3)
|
||||||
|
parser.add_argument("--val-split", type=float, default=0.1)
|
||||||
|
parser.add_argument("--net-arch", default="256,256",
|
||||||
|
help="Comma-separated hidden layer widths.")
|
||||||
|
parser.add_argument("--log-std-init", type=float, default=0.5)
|
||||||
|
parser.add_argument("--cos-weight", type=float, default=1.0,
|
||||||
|
help="Weight of the (1 - cosine_similarity) loss "
|
||||||
|
"term; balances against MSE.")
|
||||||
|
parser.add_argument("--seed", type=int, default=0)
|
||||||
|
parser.add_argument("--device", default="cpu")
|
||||||
|
parser.add_argument("--drive-mode", default=None,
|
||||||
|
choices=["differential", "mecanum"],
|
||||||
|
help="Drive mode. If not set, inferred from "
|
||||||
|
"demo action dimension (2→differential, 3→mecanum).")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
np.random.seed(args.seed)
|
||||||
|
|
||||||
|
# --- Load demos ---
|
||||||
|
print(f"[bc] loading demos from {args.demos}")
|
||||||
|
data = np.load(args.demos)
|
||||||
|
obs = data["obs"].astype(np.float32)
|
||||||
|
actions = data["actions"].astype(np.float32)
|
||||||
|
meta = data["meta"]
|
||||||
|
print(f"[bc] obs={obs.shape} actions={actions.shape} trajectories={len(meta)}")
|
||||||
|
if obs.size == 0:
|
||||||
|
raise RuntimeError("Empty demo file.")
|
||||||
|
|
||||||
|
a_norms = np.linalg.norm(actions, axis=1)
|
||||||
|
print(f"[bc] action L2 norm: mean={a_norms.mean():.3f} "
|
||||||
|
f"min={a_norms.min():.3f} max={a_norms.max():.3f}")
|
||||||
|
|
||||||
|
# --- Train/val split ---
|
||||||
|
n = len(obs)
|
||||||
|
perm = np.random.permutation(n)
|
||||||
|
n_val = int(n * args.val_split)
|
||||||
|
val_idx, train_idx = perm[:n_val], perm[n_val:]
|
||||||
|
print(f"[bc] train={len(train_idx)} val={len(val_idx)}")
|
||||||
|
|
||||||
|
obs_t = torch.from_numpy(obs)
|
||||||
|
act_t = torch.from_numpy(actions)
|
||||||
|
train_loader = DataLoader(
|
||||||
|
TensorDataset(obs_t[train_idx], act_t[train_idx]),
|
||||||
|
batch_size=args.batch_size, shuffle=True,
|
||||||
|
)
|
||||||
|
val_loader = DataLoader(
|
||||||
|
TensorDataset(obs_t[val_idx], act_t[val_idx]),
|
||||||
|
batch_size=args.batch_size, shuffle=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
net_arch_pi = [int(x) for x in args.net_arch.split(",")]
|
||||||
|
net_arch_vf = net_arch_pi[:]
|
||||||
|
# Frame stack is inferred from the demo obs dim.
|
||||||
|
obs_dim = obs.shape[1]
|
||||||
|
from herding.perception.obs import OBS_DIM as _SINGLE
|
||||||
|
if obs_dim % _SINGLE != 0:
|
||||||
|
raise RuntimeError(f"demo obs dim {obs_dim} is not a multiple of {_SINGLE}")
|
||||||
|
frame_stack = obs_dim // _SINGLE
|
||||||
|
if frame_stack > 1:
|
||||||
|
print(f"[bc] inferred frame_stack={frame_stack} from demo obs dim {obs_dim}")
|
||||||
|
|
||||||
|
# Infer drive mode from action dimension if not explicitly set.
|
||||||
|
action_dim = actions.shape[1]
|
||||||
|
if args.drive_mode is not None:
|
||||||
|
drive_mode = args.drive_mode
|
||||||
|
elif action_dim == 3:
|
||||||
|
drive_mode = "mecanum"
|
||||||
|
else:
|
||||||
|
drive_mode = "differential"
|
||||||
|
print(f"[bc] drive_mode={drive_mode} (action_dim={action_dim})")
|
||||||
|
|
||||||
|
model, _env = build_model(net_arch_pi, net_arch_vf, args.log_std_init,
|
||||||
|
frame_stack=frame_stack, drive_mode=drive_mode)
|
||||||
|
policy = model.policy.to(args.device)
|
||||||
|
optimizer = optim.Adam(policy.parameters(), lr=args.lr)
|
||||||
|
|
||||||
|
# --- Train ---
|
||||||
|
print(f"[bc] training: epochs={args.epochs} batch={args.batch_size} "
|
||||||
|
f"lr={args.lr} device={args.device}")
|
||||||
|
t_start = time.time()
|
||||||
|
best_val = float("inf")
|
||||||
|
best_cos = -1.0
|
||||||
|
best_state = None # restored at the end so noisy last epochs don't win
|
||||||
|
|
||||||
|
def combined_loss(pred, target):
|
||||||
|
mse = nn.functional.mse_loss(pred, target)
|
||||||
|
p_norm = pred.norm(dim=1).clamp_min(1e-6)
|
||||||
|
t_norm = target.norm(dim=1).clamp_min(1e-6)
|
||||||
|
cos_sim = (pred * target).sum(dim=1) / (p_norm * t_norm)
|
||||||
|
cos_loss = (1.0 - cos_sim).mean()
|
||||||
|
return mse + args.cos_weight * cos_loss, mse.item(), cos_sim.mean().item()
|
||||||
|
|
||||||
|
for epoch in range(args.epochs):
|
||||||
|
policy.train()
|
||||||
|
train_loss_total, train_mse_total, train_cos_total, train_count = 0.0, 0.0, 0.0, 0
|
||||||
|
for ob_batch, act_batch in train_loader:
|
||||||
|
ob_batch = ob_batch.to(args.device)
|
||||||
|
act_batch = act_batch.to(args.device)
|
||||||
|
optimizer.zero_grad()
|
||||||
|
mean_action = forward_mean(policy, ob_batch)
|
||||||
|
loss, mse_val, cos_val = combined_loss(mean_action, act_batch)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
bs = ob_batch.size(0)
|
||||||
|
train_loss_total += loss.item() * bs
|
||||||
|
train_mse_total += mse_val * bs
|
||||||
|
train_cos_total += cos_val * bs
|
||||||
|
train_count += bs
|
||||||
|
train_mse = train_mse_total / max(1, train_count)
|
||||||
|
train_cos = train_cos_total / max(1, train_count)
|
||||||
|
|
||||||
|
policy.eval()
|
||||||
|
val_total, val_count = 0.0, 0
|
||||||
|
cos_sim_total = 0.0
|
||||||
|
with torch.no_grad():
|
||||||
|
for ob_batch, act_batch in val_loader:
|
||||||
|
ob_batch = ob_batch.to(args.device)
|
||||||
|
act_batch = act_batch.to(args.device)
|
||||||
|
mean_action = forward_mean(policy, ob_batch)
|
||||||
|
bs = ob_batch.size(0)
|
||||||
|
val_total += nn.functional.mse_loss(
|
||||||
|
mean_action, act_batch, reduction="sum",
|
||||||
|
).item()
|
||||||
|
m_norm = mean_action.norm(dim=1).clamp_min(1e-6)
|
||||||
|
a_norm = act_batch.norm(dim=1).clamp_min(1e-6)
|
||||||
|
cos = (mean_action * act_batch).sum(dim=1) / (m_norm * a_norm)
|
||||||
|
cos_sim_total += cos.sum().item()
|
||||||
|
val_count += bs
|
||||||
|
val_mse = val_total / max(1, val_count) / actions.shape[1]
|
||||||
|
cos_sim = cos_sim_total / max(1, val_count)
|
||||||
|
print(f" epoch {epoch+1:>2d}/{args.epochs} "
|
||||||
|
f"train_mse={train_mse:.4f} train_cos={train_cos:+.3f} "
|
||||||
|
f"val_mse={val_mse:.4f} val_cos={cos_sim:+.3f}")
|
||||||
|
if val_mse < best_val:
|
||||||
|
best_val = val_mse
|
||||||
|
if cos_sim > best_cos:
|
||||||
|
best_cos = cos_sim
|
||||||
|
best_state = {k: v.detach().cpu().clone()
|
||||||
|
for k, v in policy.state_dict().items()}
|
||||||
|
|
||||||
|
if best_state is not None:
|
||||||
|
policy.load_state_dict(best_state)
|
||||||
|
print(f"[bc] restored best-val_cos snapshot (cos={best_cos:.3f})")
|
||||||
|
|
||||||
|
elapsed = time.time() - t_start
|
||||||
|
print(f"[bc] done in {elapsed:.0f}s best_val_mse={best_val:.4f}")
|
||||||
|
|
||||||
|
# --- Save ---
|
||||||
|
out_dir = Path(args.out)
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
model.save(out_dir / "policy.zip")
|
||||||
|
print(f"[bc] saved policy to {out_dir / 'policy.zip'}")
|
||||||
|
print(f"\n[bc] verify with: "
|
||||||
|
f"python -m training.eval --policy {out_dir}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
+27157
File diff suppressed because one or more lines are too long
@@ -0,0 +1,194 @@
|
|||||||
|
"""Env-side evaluation of analytic or learned policies.
|
||||||
|
|
||||||
|
Reports success rate, mean steps and mean penned per flock size for
|
||||||
|
``n_sheep ∈ 1..max_flock`` across ``--n-seeds`` seeds each.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
python -m training.eval --policy training/runs/rl --n-seeds 10
|
||||||
|
python -m training.eval --policy strombom
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from statistics import mean
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Configure field geometry before other herding imports read it at module level.
|
||||||
|
from herding.world.geometry import configure_from_args as _configure_from_args
|
||||||
|
_configure_from_args()
|
||||||
|
|
||||||
|
from herding.world.geometry import MAX_SHEEP, PEN_ENTRY
|
||||||
|
from herding.control.sequential import compute_action as sequential_action
|
||||||
|
from herding.control.strombom import compute_action as strombom_action
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
|
||||||
|
|
||||||
|
def rollout(env: HerdingEnv, predict_fn, max_steps: int) -> dict:
|
||||||
|
obs, _ = env.reset()
|
||||||
|
for t in range(max_steps):
|
||||||
|
action = predict_fn(env, obs)
|
||||||
|
obs, _r, terminated, truncated, info = env.step(action)
|
||||||
|
if terminated or truncated:
|
||||||
|
return {
|
||||||
|
"success": bool(info.get("is_success", False)),
|
||||||
|
"steps": info.get("steps", t + 1),
|
||||||
|
"n_penned": info.get("n_penned", 0),
|
||||||
|
}
|
||||||
|
return {"success": False, "steps": max_steps,
|
||||||
|
"n_penned": int(env.sheep_penned.sum())}
|
||||||
|
|
||||||
|
|
||||||
|
def make_analytic_predictor(action_fn, drive_mode: str = "differential"):
|
||||||
|
"""Wrap an analytic teacher so it runs on the env's exposed
|
||||||
|
perception (tracker in LiDAR mode, GT in privileged mode)."""
|
||||||
|
def _predict(env, _obs):
|
||||||
|
positions = env.perceived_positions()
|
||||||
|
vx, vy, _mode = action_fn((env.dog_x, env.dog_y), positions, PEN_ENTRY)
|
||||||
|
if drive_mode == "mecanum":
|
||||||
|
return np.array([vx, vy, 0.0], dtype=np.float32)
|
||||||
|
return np.array([vx, vy], dtype=np.float32)
|
||||||
|
return _predict
|
||||||
|
|
||||||
|
|
||||||
|
def make_strombom_predictor(drive_mode: str = "differential"):
|
||||||
|
return make_analytic_predictor(strombom_action, drive_mode)
|
||||||
|
|
||||||
|
|
||||||
|
def make_policy_predictor(model, vecnorm, recurrent: bool = False):
|
||||||
|
state = {"lstm": None, "first": True}
|
||||||
|
def _predict(_env, obs):
|
||||||
|
obs_b = np.asarray(obs, dtype=np.float32).reshape(1, -1)
|
||||||
|
if vecnorm is not None:
|
||||||
|
obs_b = vecnorm.normalize_obs(obs_b)
|
||||||
|
if recurrent:
|
||||||
|
episode_start = np.array([state["first"]], dtype=bool)
|
||||||
|
action, new_state = model.predict(
|
||||||
|
obs_b, state=state["lstm"], episode_start=episode_start,
|
||||||
|
deterministic=True,
|
||||||
|
)
|
||||||
|
state["lstm"] = new_state
|
||||||
|
state["first"] = False
|
||||||
|
else:
|
||||||
|
action, _ = model.predict(obs_b, deterministic=True)
|
||||||
|
return action[0]
|
||||||
|
return _predict
|
||||||
|
|
||||||
|
|
||||||
|
def _reset_recurrent(predict_fn):
|
||||||
|
"""Reset the recurrent state between episodes."""
|
||||||
|
# The closure stores `state` dict; reach in via __closure__.
|
||||||
|
for cell in predict_fn.__closure__ or []:
|
||||||
|
if isinstance(cell.cell_contents, dict) and "lstm" in cell.cell_contents:
|
||||||
|
cell.cell_contents["lstm"] = None
|
||||||
|
cell.cell_contents["first"] = True
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--policy", required=True,
|
||||||
|
help="'strombom', 'sequential', or path to a "
|
||||||
|
"policy directory / zip.")
|
||||||
|
parser.add_argument("--n-seeds", type=int, default=10)
|
||||||
|
parser.add_argument("--max-steps", type=int, default=5000)
|
||||||
|
parser.add_argument("--max-flock", type=int, default=MAX_SHEEP)
|
||||||
|
parser.add_argument("--difficulty", type=float, default=1.0,
|
||||||
|
help="0 = sheep spawn near the gate (easy); "
|
||||||
|
"1 = full field (deployment distribution).")
|
||||||
|
parser.add_argument("--drive-mode", default="differential",
|
||||||
|
choices=["differential", "mecanum"],
|
||||||
|
help="Drive mode for the dog robot.")
|
||||||
|
parser.add_argument("--world", default=None,
|
||||||
|
choices=["field", "field_round"],
|
||||||
|
help="World shape. If not set, uses HERDING_WORLD "
|
||||||
|
"env var or defaults to 'field'.")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
drive_mode = args.drive_mode
|
||||||
|
frame_stack = 1
|
||||||
|
if args.policy == "strombom":
|
||||||
|
predict = make_analytic_predictor(strombom_action, drive_mode)
|
||||||
|
elif args.policy == "sequential":
|
||||||
|
predict = make_analytic_predictor(sequential_action, drive_mode)
|
||||||
|
else:
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
run = Path(args.policy)
|
||||||
|
if run.is_file():
|
||||||
|
zip_path = run
|
||||||
|
else:
|
||||||
|
for name in ("policy.zip", "final.zip"):
|
||||||
|
if (run / name).exists():
|
||||||
|
zip_path = run / name
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise FileNotFoundError(
|
||||||
|
f"No checkpoint found in {run} "
|
||||||
|
f"(tried policy.zip, final.zip)"
|
||||||
|
)
|
||||||
|
# Try RecurrentPPO first (sb3-contrib) for LSTM policies, then
|
||||||
|
# fall back to PPO for MLP policies.
|
||||||
|
recurrent = False
|
||||||
|
model = None
|
||||||
|
try:
|
||||||
|
from sb3_contrib import RecurrentPPO
|
||||||
|
model = RecurrentPPO.load(str(zip_path), device="auto")
|
||||||
|
recurrent = True
|
||||||
|
print(f"[eval] loaded RecurrentPPO (LSTM) policy")
|
||||||
|
except Exception:
|
||||||
|
model = PPO.load(str(zip_path), device="auto")
|
||||||
|
from herding.perception.obs import OBS_DIM as _SINGLE
|
||||||
|
policy_obs_dim = int(model.observation_space.shape[0])
|
||||||
|
if policy_obs_dim % _SINGLE == 0 and policy_obs_dim // _SINGLE >= 1:
|
||||||
|
frame_stack = policy_obs_dim // _SINGLE
|
||||||
|
if frame_stack > 1:
|
||||||
|
print(f"[eval] policy expects frame_stack={frame_stack}")
|
||||||
|
vecnorm = None
|
||||||
|
vn_path = run / "vecnormalize.pkl"
|
||||||
|
if not vn_path.exists() and run.parent.name != "best":
|
||||||
|
vn_path = run.parent / "vecnormalize.pkl"
|
||||||
|
if vn_path.exists():
|
||||||
|
import pickle
|
||||||
|
with open(vn_path, "rb") as f:
|
||||||
|
vecnorm = pickle.load(f)
|
||||||
|
vecnorm.training = False
|
||||||
|
vecnorm.norm_reward = False
|
||||||
|
predict = make_policy_predictor(model, vecnorm, recurrent=recurrent)
|
||||||
|
|
||||||
|
# Infer drive_mode from policy action dim if using a learned policy.
|
||||||
|
if args.policy not in ("strombom", "sequential"):
|
||||||
|
policy_action_dim = int(model.action_space.shape[0])
|
||||||
|
if policy_action_dim == 2 and drive_mode == "mecanum":
|
||||||
|
drive_mode = "differential"
|
||||||
|
print(f"[eval] policy has 2D actions — overriding drive_mode "
|
||||||
|
f"to differential")
|
||||||
|
elif policy_action_dim == 3 and drive_mode == "differential":
|
||||||
|
drive_mode = "mecanum"
|
||||||
|
print(f"[eval] policy has 3D actions — overriding drive_mode "
|
||||||
|
f"to mecanum")
|
||||||
|
|
||||||
|
print(f"{'n_sheep':>8} {'success%':>10} {'mean_steps':>12} {'mean_penned':>12}")
|
||||||
|
print("-" * 46)
|
||||||
|
for n in range(1, args.max_flock + 1):
|
||||||
|
successes, steps, penned = [], [], []
|
||||||
|
for seed in range(args.n_seeds):
|
||||||
|
env = HerdingEnv(n_sheep=n, max_steps=args.max_steps,
|
||||||
|
difficulty=args.difficulty, seed=seed,
|
||||||
|
frame_stack=frame_stack, drive_mode=drive_mode)
|
||||||
|
_reset_recurrent(predict)
|
||||||
|
r = rollout(env, predict, args.max_steps)
|
||||||
|
successes.append(int(r["success"]))
|
||||||
|
steps.append(r["steps"])
|
||||||
|
penned.append(r["n_penned"])
|
||||||
|
sr = 100.0 * mean(successes)
|
||||||
|
ms = mean(steps)
|
||||||
|
mp = mean(penned)
|
||||||
|
print(f"{n:>8d} {sr:>9.1f}% {ms:>12.0f} {mp:>12.2f}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,143 +0,0 @@
|
|||||||
"""
|
|
||||||
Evaluation script for a trained herding policy.
|
|
||||||
|
|
||||||
Runs N episodes and reports the three project metrics:
|
|
||||||
1. Success rate — fraction of episodes where all sheep are penned
|
|
||||||
2. Time-to-pen — mean steps across successful episodes (per sheep)
|
|
||||||
3. Flock dispersion — mean pairwise distance among active sheep, averaged
|
|
||||||
over all timesteps (lower = tighter herding)
|
|
||||||
|
|
||||||
Usage
|
|
||||||
-----
|
|
||||||
python evaluate.py --model runs/ppo_herding/best_model/best_model.zip \
|
|
||||||
--vecnorm runs/ppo_herding/vecnorm.pkl \
|
|
||||||
--n-sheep 5 --episodes 100
|
|
||||||
|
|
||||||
Add --render to watch the first episode in a matplotlib window.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from stable_baselines3 import PPO
|
|
||||||
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
|
|
||||||
|
|
||||||
from herding_env import HerdingEnv
|
|
||||||
|
|
||||||
|
|
||||||
def make_single_env(n_sheep: int, max_steps: int, render_mode: str = None):
|
|
||||||
def _init():
|
|
||||||
return HerdingEnv(n_sheep=n_sheep, max_steps=max_steps,
|
|
||||||
render_mode=render_mode)
|
|
||||||
return _init
|
|
||||||
|
|
||||||
|
|
||||||
def pairwise_mean(positions: np.ndarray, n_active: int) -> float:
|
|
||||||
"""Mean pairwise distance among the first n_active sheep."""
|
|
||||||
if n_active < 2:
|
|
||||||
return 0.0
|
|
||||||
pts = positions[:n_active]
|
|
||||||
dists = []
|
|
||||||
for i in range(n_active):
|
|
||||||
for j in range(i + 1, n_active):
|
|
||||||
dists.append(float(np.linalg.norm(pts[i] - pts[j])))
|
|
||||||
return float(np.mean(dists))
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
p.add_argument("--model", required=True,
|
|
||||||
help="Path to saved model .zip")
|
|
||||||
p.add_argument("--vecnorm", default=None,
|
|
||||||
help="Path to VecNormalize stats .pkl (optional)")
|
|
||||||
p.add_argument("--n-sheep", type=int, default=1)
|
|
||||||
p.add_argument("--episodes", type=int, default=50)
|
|
||||||
p.add_argument("--max-steps", type=int, default=2000)
|
|
||||||
p.add_argument("--render", action="store_true",
|
|
||||||
help="Render first episode in matplotlib")
|
|
||||||
p.add_argument("--seed", type=int, default=42)
|
|
||||||
return p.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
args = parse_args()
|
|
||||||
|
|
||||||
render_mode = "human" if args.render else None
|
|
||||||
raw_env = DummyVecEnv([make_single_env(args.n_sheep, args.max_steps,
|
|
||||||
render_mode)])
|
|
||||||
if args.vecnorm:
|
|
||||||
env = VecNormalize.load(args.vecnorm, raw_env)
|
|
||||||
env.training = False
|
|
||||||
env.norm_reward = False
|
|
||||||
else:
|
|
||||||
env = raw_env
|
|
||||||
|
|
||||||
model = PPO.load(args.model, env=env)
|
|
||||||
|
|
||||||
successes = []
|
|
||||||
steps_to_pen = [] # steps for successful episodes
|
|
||||||
dispersions = [] # per-episode mean flock dispersion
|
|
||||||
|
|
||||||
for ep in range(args.episodes):
|
|
||||||
obs = env.reset()
|
|
||||||
done = False
|
|
||||||
ep_steps = 0
|
|
||||||
ep_dispersion = []
|
|
||||||
first_ep = ep == 0
|
|
||||||
|
|
||||||
while not done:
|
|
||||||
action, _ = model.predict(obs, deterministic=True)
|
|
||||||
obs, _, dones, infos = env.step(action)
|
|
||||||
done = dones[0]
|
|
||||||
ep_steps += 1
|
|
||||||
|
|
||||||
# Access the underlying HerdingEnv for dispersion calculation
|
|
||||||
inner = env.envs[0] if hasattr(env, "envs") else env.venv.envs[0]
|
|
||||||
if not inner.penned[:inner.n_sheep].all():
|
|
||||||
ep_dispersion.append(
|
|
||||||
pairwise_mean(inner.sheep_pos, inner.n_sheep)
|
|
||||||
)
|
|
||||||
|
|
||||||
if first_ep and render_mode == "human":
|
|
||||||
pass # render() is called inside step()
|
|
||||||
|
|
||||||
info = infos[0]
|
|
||||||
n_penned = info.get("n_penned", 0)
|
|
||||||
n_sheep = info.get("n_sheep", args.n_sheep)
|
|
||||||
success = n_penned == n_sheep
|
|
||||||
|
|
||||||
successes.append(int(success))
|
|
||||||
if success:
|
|
||||||
steps_to_pen.append(ep_steps / n_sheep)
|
|
||||||
if ep_dispersion:
|
|
||||||
dispersions.append(float(np.mean(ep_dispersion)))
|
|
||||||
|
|
||||||
if (ep + 1) % 10 == 0:
|
|
||||||
print(f" Episode {ep + 1:>4}/{args.episodes} "
|
|
||||||
f"success={int(success)} steps={ep_steps}")
|
|
||||||
|
|
||||||
env.close()
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
|
||||||
# Report
|
|
||||||
# -----------------------------------------------------------------------
|
|
||||||
success_rate = float(np.mean(successes))
|
|
||||||
mean_ttp = float(np.mean(steps_to_pen)) if steps_to_pen else float("nan")
|
|
||||||
mean_disp = float(np.mean(dispersions)) if dispersions else float("nan")
|
|
||||||
|
|
||||||
print("\n" + "=" * 50)
|
|
||||||
print(f" Model : {args.model}")
|
|
||||||
print(f" Sheep : {args.n_sheep}")
|
|
||||||
print(f" Episodes : {args.episodes}")
|
|
||||||
print("-" * 50)
|
|
||||||
print(f" Success rate : {success_rate * 100:.1f}%"
|
|
||||||
f" ({sum(successes)}/{args.episodes})")
|
|
||||||
print(f" Time-to-pen : {mean_ttp:.1f} steps/sheep"
|
|
||||||
f" (successful episodes only)")
|
|
||||||
print(f" Flock dispersion: {mean_disp:.2f} m"
|
|
||||||
f" (mean pairwise distance while active)")
|
|
||||||
print("=" * 50)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
+521
-306
@@ -1,353 +1,568 @@
|
|||||||
"""
|
"""Gymnasium environment for the shepherd-dog herding task.
|
||||||
2D herding environment for PPO training (Gymnasium-compatible).
|
|
||||||
|
|
||||||
The dog agent (action: 2D velocity vector) must herd n_sheep into the
|
Single-agent: the dog is the policy; sheep are env-controlled flocking
|
||||||
quarantine pen. Sheep dynamics mirror the Webots controller exactly:
|
agents (``herding.world.flocking_sim``). Kinematics match the proto specs
|
||||||
flee (quadratic ramp), separation (inverse-distance), cohesion, wall
|
(``herding.world.diffdrive``) so a policy trained here transfers to Webots
|
||||||
avoidance, and wander.
|
without re-tuning.
|
||||||
|
|
||||||
Coordinate system matches the Webots world file:
|
* **Action** (differential): ``Box(-1, 1, (2,))`` — ``(vx, vy)`` intent.
|
||||||
field : x ∈ [-15, 15], y ∈ [-15, 15]
|
* **Action** (mecanum): ``Box(-1, 1, (3,))`` — ``(vx, vy, omega)`` intent.
|
||||||
pen : x ∈ [10, 13], y ∈ [-15, -8] (SE corner, open north)
|
* **Observation**: ``Box(-inf, inf, (32·K,))`` from ``herding.perception.obs.build_obs``
|
||||||
|
with optional frame stacking K (concatenated oldest → newest).
|
||||||
Observation is always sized for MAX_SHEEP (currently 5) regardless of
|
* **Reset**: ``options["n_sheep"]`` overrides flock size; otherwise
|
||||||
how many sheep are active. Inactive slots are pre-penned at the pen
|
sampled uniformly from ``[1, max_n_sheep]``.
|
||||||
centre with flag=1. This keeps the model input dimension fixed across
|
* **Reward**: dense shaping (per-sheep distance progress, time
|
||||||
curriculum stages so VecNormalize statistics are preserved throughout.
|
penalty, Strömbom-imitation cosine bonus) + sparse pen/done jackpots.
|
||||||
|
Weights live as class attributes on :class:`HerdingEnv`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import numpy as np
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
|
import numpy as np
|
||||||
from gymnasium import spaces
|
from gymnasium import spaces
|
||||||
|
|
||||||
|
from herding.world.diffdrive import (
|
||||||
|
heading_speed_to_wheels, kinematics_step,
|
||||||
|
mecanum_step, velocity_to_mecanum_wheels, velocity_to_wheels,
|
||||||
|
)
|
||||||
|
from herding.world.flocking_sim import (
|
||||||
|
FLEE_SPEED, MAX_SPEED, WANDER_SPEED, compute_heading_speed,
|
||||||
|
)
|
||||||
|
from herding.world.geometry import (
|
||||||
|
DOG_MAX_LINEAR, DOG_MAX_WHEEL_OMEGA,
|
||||||
|
DOG_SOUTH_LIMIT, DOG_WHEEL_BASE, DOG_WHEEL_BASE_X, DOG_WHEEL_BASE_Y,
|
||||||
|
DOG_WHEEL_RADIUS, FIELD_SHAPE, FIELD_ROUND_R, FIELD_X, FIELD_Y,
|
||||||
|
GATE_X, GATE_Y, MAX_SHEEP,
|
||||||
|
PEN_ENTRY, PEN_X, PEN_Y,
|
||||||
|
SHEEP_MAX_WHEEL_OMEGA, SHEEP_WHEEL_BASE, SHEEP_WHEEL_RADIUS,
|
||||||
|
WEBOTS_DT, clip_to_field, is_penned,
|
||||||
|
)
|
||||||
|
from herding.perception.lidar_perception import detections_from_scan
|
||||||
|
from herding.perception.lidar_sim import simulate_scan
|
||||||
|
from herding.perception.obs import OBS_DIM, build_obs
|
||||||
|
from herding.perception.sheep_tracker import SheepTracker
|
||||||
|
from herding.control.strombom import compute_action as strombom_action
|
||||||
|
from herding.config import HerdingConfig
|
||||||
|
|
||||||
|
|
||||||
class HerdingEnv(gym.Env):
|
class HerdingEnv(gym.Env):
|
||||||
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}
|
"""Single-agent shepherd-dog herding env.
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
Each step is one Webots ``basicTimeStep`` (16 ms). Episodes terminate
|
||||||
# World constants — must match Webots world file
|
when all sheep are penned, or after ``max_steps`` steps (truncation).
|
||||||
# -----------------------------------------------------------------------
|
"""
|
||||||
MAX_SHEEP = 5
|
|
||||||
FIELD = 15.0 # half-size; positions ∈ [-FIELD, FIELD]
|
|
||||||
PEN_X = (10.0, 13.0) # quarantine pen x bounds
|
|
||||||
PEN_Y = (-15.0, -8.0) # quarantine pen y bounds
|
|
||||||
PEN_CENTER = np.array([11.5, -11.5], dtype=np.float32)
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
metadata = {"render_modes": []}
|
||||||
# Dynamics — calibrated to match Webots robot specs
|
|
||||||
# wheel radius 0.031 m; sheep FLEE_SPEED 20 rad/s → 0.62 m/s
|
|
||||||
# wheel radius 0.038 m; dog maxVelocity 70 rad/s → 2.66 m/s
|
|
||||||
# -----------------------------------------------------------------------
|
|
||||||
DOG_SPEED = 2.5 # m/s
|
|
||||||
SHEEP_FLEE_V = 0.65 # m/s
|
|
||||||
SHEEP_WANDER_V = 0.20 # m/s
|
|
||||||
DT = 0.1 # seconds per step
|
|
||||||
|
|
||||||
# Boid parameters — identical to sheep.py
|
# Reward weights. Sparse jackpots (W_PEN_DELTA, W_DONE) dominate;
|
||||||
FLEE_DIST = 7.0
|
# dense shaping (W_PROGRESS on Δ mean-distance-to-pen) provides the
|
||||||
SEPARATION_DIST = 2.5
|
# gradient; W_IMITATE adds a small cosine bonus toward the analytic
|
||||||
COHESION_DIST = 8.0
|
# teacher's action; W_TIME is a per-step penalty (0 by default).
|
||||||
WALL_MARGIN = 3.5
|
W_PEN_DELTA = 100.0
|
||||||
|
W_PROGRESS = 20.0
|
||||||
|
W_IMITATE = 0.5
|
||||||
|
W_TIME = 0.0
|
||||||
|
W_WALL = 0.0
|
||||||
|
W_COLLISION = 0.0
|
||||||
|
W_DONE = 500.0
|
||||||
|
|
||||||
# -----------------------------------------------------------------------
|
# In-env action EMA. 0 = none; the Webots controller applies its own
|
||||||
# Reward weights
|
# EMA at inference, so the policy needn't learn smoothness.
|
||||||
# -----------------------------------------------------------------------
|
ACTION_SMOOTH = 0.0
|
||||||
W_ALIGN = 0.4 # dense: dog on anti-pen side of each active sheep
|
|
||||||
W_SHAPING = 0.5 # dense: mean sheep distance to pen
|
|
||||||
W_APPROACH = 0.1 # dense: dog within flee range of nearest sheep
|
|
||||||
W_PEN_BONUS = 5.0 # sparse: per sheep successfully penned
|
|
||||||
W_COMPLETE = 20.0 # bonus when ALL active sheep are penned
|
|
||||||
W_STEP_COST = 0.002 # penalty per step (encourages efficiency)
|
|
||||||
|
|
||||||
def __init__(self, n_sheep: int = 1, max_steps: int = 2000,
|
DEFAULT_MAX_STEPS = 5000
|
||||||
render_mode: str = None):
|
COLLISION_DIST = 0.30
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
n_sheep: Optional[int] = None,
|
||||||
|
max_n_sheep: int = MAX_SHEEP,
|
||||||
|
max_steps: int = DEFAULT_MAX_STEPS,
|
||||||
|
difficulty: float = 0.0,
|
||||||
|
seed: Optional[int] = None,
|
||||||
|
use_lidar: bool = True,
|
||||||
|
frame_stack: int = 1,
|
||||||
|
drive_mode: str = "differential",
|
||||||
|
herding_cfg: Optional[HerdingConfig] = None,
|
||||||
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
assert 1 <= n_sheep <= self.MAX_SHEEP
|
# Store the config; fall back to defaults when None.
|
||||||
self.n_sheep = n_sheep
|
self._herding_cfg = herding_cfg
|
||||||
self.max_steps = max_steps
|
|
||||||
self.render_mode = render_mode
|
|
||||||
|
|
||||||
# Observation: dog(x,y) + MAX_SHEEP×sheep(x,y) + MAX_SHEEP×penned
|
# Apply robot config overrides — these shadow the class attributes
|
||||||
# Fixed size across all curriculum stages.
|
# so that per-instance configuration is possible without touching
|
||||||
obs_dim = 2 + 2 * self.MAX_SHEEP + self.MAX_SHEEP
|
# the class-level defaults used by unconfigured instances.
|
||||||
|
if herding_cfg is not None:
|
||||||
|
self.ACTION_SMOOTH = herding_cfg.robot.action_smooth
|
||||||
|
|
||||||
|
# ``use_lidar=True`` (default): obs and imitation-reward teacher
|
||||||
|
# see only LiDAR-perceived positions via a tracker, matching the
|
||||||
|
# Webots controller. ``False`` exposes ground truth for ablation.
|
||||||
|
self._use_lidar = bool(use_lidar)
|
||||||
|
tracker_cfg = herding_cfg.tracker if herding_cfg is not None else None
|
||||||
|
self._tracker = SheepTracker(tracker_cfg=tracker_cfg) if self._use_lidar else None
|
||||||
|
self._np_rng_lidar: Optional[np.random.Generator] = None
|
||||||
|
|
||||||
|
# Frame stacking: the policy receives the last K obs concatenated,
|
||||||
|
# giving a memoryless MLP temporal context. K=1 → single frame.
|
||||||
|
self._frame_stack = max(1, int(frame_stack))
|
||||||
|
self._frame_buffer: list[np.ndarray] = []
|
||||||
|
|
||||||
|
# Drive mode: "differential" (2-wheel) or "mecanum" (4-wheel omni).
|
||||||
|
self._drive_mode = drive_mode.lower()
|
||||||
|
if self._drive_mode not in ("differential", "mecanum"):
|
||||||
|
raise ValueError(f"Unknown drive_mode: {drive_mode!r}")
|
||||||
|
action_dim = 3 if self._drive_mode == "mecanum" else 2
|
||||||
|
self.action_space = spaces.Box(-1.0, 1.0, shape=(action_dim,),
|
||||||
|
dtype=np.float32)
|
||||||
|
self._single_obs_dim = OBS_DIM
|
||||||
self.observation_space = spaces.Box(
|
self.observation_space = spaces.Box(
|
||||||
low=-1.0, high=1.0, shape=(obs_dim,), dtype=np.float32
|
low=-np.inf, high=np.inf,
|
||||||
|
shape=(OBS_DIM * self._frame_stack,), dtype=np.float32,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Action: desired velocity (vx, vy) ∈ [-1, 1]², scaled by DOG_SPEED
|
# n_sheep=None → sample uniformly from [1, max_n_sheep] each reset.
|
||||||
self.action_space = spaces.Box(
|
self._fixed_n_sheep = n_sheep
|
||||||
low=-1.0, high=1.0, shape=(2,), dtype=np.float32
|
self._max_n_sheep = max_n_sheep
|
||||||
)
|
self.max_steps = max_steps
|
||||||
|
# difficulty ∈ [0, 1]: 0 = sheep spawn near the gate (easy);
|
||||||
|
# 1 = sheep spawn anywhere in the field (deployment distribution).
|
||||||
|
self._difficulty = float(difficulty)
|
||||||
|
self._initial_seed = seed
|
||||||
|
self._initial_seed_used = False
|
||||||
|
|
||||||
# Runtime state (populated by reset)
|
# Env-owned RNG for wander jitter, re-seeded from np_random on reset.
|
||||||
self._step_count = 0
|
self._py_rng = random.Random()
|
||||||
self._prev_penned = 0
|
self._action_dim = action_dim
|
||||||
self.dog_pos = np.zeros(2, dtype=np.float32)
|
|
||||||
self.sheep_pos = np.zeros((self.MAX_SHEEP, 2), dtype=np.float32)
|
|
||||||
self.penned = np.ones(self.MAX_SHEEP, dtype=bool)
|
|
||||||
self.wander_ang = np.zeros(self.MAX_SHEEP, dtype=np.float32)
|
|
||||||
|
|
||||||
self._fig = None # lazy matplotlib figure
|
# State (initialised in reset)
|
||||||
|
self.dog_x = self.dog_y = self.dog_heading = 0.0
|
||||||
|
self.sheep_x = np.zeros(0, dtype=np.float32)
|
||||||
|
self.sheep_y = np.zeros(0, dtype=np.float32)
|
||||||
|
self.sheep_h = np.zeros(0, dtype=np.float32)
|
||||||
|
self.sheep_penned = np.zeros(0, dtype=bool)
|
||||||
|
self.sheep_wander = np.zeros(0, dtype=np.float32)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
self.prev_action = np.zeros(self._action_dim, dtype=np.float32)
|
||||||
# Curriculum interface
|
self.smoothed_action = np.zeros(self._action_dim, dtype=np.float32)
|
||||||
# ------------------------------------------------------------------
|
self.steps = 0
|
||||||
|
self.n_sheep = 0
|
||||||
|
self.prev_n_penned = 0
|
||||||
|
self.prev_d_pen = 0.0
|
||||||
|
self.prev_radius = 0.0
|
||||||
|
|
||||||
def set_n_sheep(self, n: int):
|
# --- Public knobs ---
|
||||||
"""Advance curriculum difficulty; takes effect on next reset()."""
|
def set_max_n_sheep(self, value: int) -> None:
|
||||||
assert 1 <= n <= self.MAX_SHEEP
|
self._max_n_sheep = int(np.clip(value, 1, MAX_SHEEP))
|
||||||
self.n_sheep = n
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
def set_difficulty(self, value: float) -> None:
|
||||||
# Gymnasium API
|
self._difficulty = float(np.clip(value, 0.0, 1.0))
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
def reset(self, seed=None, options=None):
|
def set_imitate_weight(self, value: float) -> None:
|
||||||
|
"""Override the instance W_IMITATE — used to disable Strömbom
|
||||||
|
imitation during PPO fine-tune."""
|
||||||
|
self.W_IMITATE = float(value)
|
||||||
|
|
||||||
|
def set_time_weight(self, value: float) -> None:
|
||||||
|
"""Override the instance W_TIME — set negative to penalise step
|
||||||
|
count and encourage faster time-to-pen during PPO fine-tune."""
|
||||||
|
self.W_TIME = float(value)
|
||||||
|
|
||||||
|
# --- gym API ---
|
||||||
|
def reset(self, *, seed=None, options=None):
|
||||||
|
if (seed is None and self._initial_seed is not None
|
||||||
|
and not self._initial_seed_used):
|
||||||
|
seed = self._initial_seed
|
||||||
|
self._initial_seed_used = True
|
||||||
super().reset(seed=seed)
|
super().reset(seed=seed)
|
||||||
self._step_count = 0
|
self._py_rng.seed(int(self.np_random.integers(0, 2**31 - 1)))
|
||||||
self._prev_penned = 0
|
opts = options or {}
|
||||||
|
|
||||||
# Active sheep (0 .. n_sheep-1): random non-pen positions
|
if "n_sheep" in opts and opts["n_sheep"] is not None:
|
||||||
self.sheep_pos[:] = self.PEN_CENTER
|
self.n_sheep = int(opts["n_sheep"])
|
||||||
self.penned[:] = True
|
elif self._fixed_n_sheep is not None:
|
||||||
|
self.n_sheep = int(self._fixed_n_sheep)
|
||||||
placed = 0
|
|
||||||
while placed < self.n_sheep:
|
|
||||||
p = self.np_random.uniform(-12.0, 12.0, size=(2,)).astype(np.float32)
|
|
||||||
if not self._in_pen(p):
|
|
||||||
self.sheep_pos[placed] = p
|
|
||||||
self.penned[placed] = False
|
|
||||||
placed += 1
|
|
||||||
|
|
||||||
# Dog: 50 % of the time start already on the anti-pen side of the
|
|
||||||
# nearest sheep (within flee range) so early training gets aligned
|
|
||||||
# starts; the other 50 % is fully random to ensure generalisation.
|
|
||||||
if self.np_random.random() < 0.5:
|
|
||||||
# Place dog behind the first active sheep relative to the pen
|
|
||||||
ref = self.sheep_pos[0]
|
|
||||||
away = ref - self.PEN_CENTER # sheep→anti-pen
|
|
||||||
dist = float(np.linalg.norm(away))
|
|
||||||
if dist > 0.1:
|
|
||||||
away = away / dist
|
|
||||||
offset = away * self.np_random.uniform(2.0, self.FLEE_DIST * 0.8)
|
|
||||||
self.dog_pos = np.clip(
|
|
||||||
(ref + offset).astype(np.float32), -self.FIELD, self.FIELD
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
self.dog_pos = self.np_random.uniform(
|
self.n_sheep = int(self.np_random.integers(1, self._max_n_sheep + 1))
|
||||||
-self.FIELD * 0.8, self.FIELD * 0.8, size=(2,)
|
|
||||||
).astype(np.float32)
|
|
||||||
|
|
||||||
# Inactive slots (n_sheep .. MAX_SHEEP-1): already at pen centre, penned=True
|
# Dog spawns near origin with random heading.
|
||||||
|
self.dog_x = float(self.np_random.uniform(-2.5, 2.5))
|
||||||
|
self.dog_y = float(self.np_random.uniform(-2.5, 2.5))
|
||||||
|
self.dog_heading = float(self.np_random.uniform(-math.pi, math.pi))
|
||||||
|
|
||||||
self.wander_ang = self.np_random.uniform(
|
# Sheep spawn region linearly interpolates with difficulty:
|
||||||
-np.pi, np.pi, size=(self.MAX_SHEEP,)
|
# 0 → small box near the gate, 1 → full field.
|
||||||
).astype(np.float32)
|
d = self._difficulty
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
# Round field: spawn in a sector near the gate (south),
|
||||||
|
# expanding to the full circle at difficulty=1.
|
||||||
|
spawn_r_lo = 3.0
|
||||||
|
spawn_r_hi = d * FIELD_ROUND_R * 0.8 + (1.0 - d) * 6.0
|
||||||
|
# Angle spread around south (±60° at d=0, full circle at d=1).
|
||||||
|
half_angle = math.radians(60) + d * math.radians(120)
|
||||||
|
angle_lo = math.pi / 2.0 - half_angle # from south = -π/2
|
||||||
|
angle_hi = math.pi / 2.0 + half_angle
|
||||||
|
else:
|
||||||
|
sx_lo = 7.0 - d * 20.0
|
||||||
|
sx_hi = 14.0 - d * 1.0
|
||||||
|
sy_lo = -12.0 + d * 0.0
|
||||||
|
sy_hi = -6.0 + d * 19.0
|
||||||
|
|
||||||
return self._obs(), {}
|
sxs, sys_, shs, sws = [], [], [], []
|
||||||
|
for _ in range(self.n_sheep):
|
||||||
|
for _try in range(100):
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
r_spawn = float(self.np_random.uniform(spawn_r_lo, spawn_r_hi))
|
||||||
|
a_spawn = float(self.np_random.uniform(angle_lo, angle_hi))
|
||||||
|
sx = r_spawn * math.cos(a_spawn)
|
||||||
|
sy = -r_spawn * math.sin(a_spawn)
|
||||||
|
else:
|
||||||
|
sx = float(self.np_random.uniform(sx_lo, sx_hi))
|
||||||
|
sy = float(self.np_random.uniform(sy_lo, sy_hi))
|
||||||
|
# Reject if too close to the dog or another sheep, or
|
||||||
|
# already in the gate column (would start "penned").
|
||||||
|
if math.hypot(sx - self.dog_x, sy - self.dog_y) < 3.0:
|
||||||
|
continue
|
||||||
|
if any(math.hypot(sx - x, sy - y) < 1.5
|
||||||
|
for x, y in zip(sxs, sys_)):
|
||||||
|
continue
|
||||||
|
if PEN_X[0] <= sx <= PEN_X[1] and sy < -8.0:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
sxs.append(sx); sys_.append(sy)
|
||||||
|
shs.append(float(self.np_random.uniform(-math.pi, math.pi)))
|
||||||
|
sws.append(float(self.np_random.uniform(-math.pi, math.pi)))
|
||||||
|
|
||||||
|
self.sheep_x = np.asarray(sxs, dtype=np.float32)
|
||||||
|
self.sheep_y = np.asarray(sys_, dtype=np.float32)
|
||||||
|
self.sheep_h = np.asarray(shs, dtype=np.float32)
|
||||||
|
self.sheep_wander = np.asarray(sws, dtype=np.float32)
|
||||||
|
self.sheep_penned = np.zeros(self.n_sheep, dtype=bool)
|
||||||
|
|
||||||
|
self.prev_action = np.zeros(self._action_dim, dtype=np.float32)
|
||||||
|
self.smoothed_action = np.zeros(self._action_dim, dtype=np.float32)
|
||||||
|
self.steps = 0
|
||||||
|
self.prev_n_penned = 0
|
||||||
|
self.prev_d_pen, self.prev_radius = self._flock_metrics()
|
||||||
|
|
||||||
|
if self._tracker is not None:
|
||||||
|
self._tracker.reset()
|
||||||
|
self._np_rng_lidar = np.random.default_rng(
|
||||||
|
int(self.np_random.integers(0, 2**31 - 1)))
|
||||||
|
self._update_tracker()
|
||||||
|
|
||||||
|
self._frame_buffer = []
|
||||||
|
|
||||||
|
obs = self._build_obs()
|
||||||
|
info = {"n_sheep": self.n_sheep}
|
||||||
|
return obs, info
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
self._step_count += 1
|
action = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
|
||||||
|
|
||||||
# Move dog — clip each axis independently so the agent can idle
|
self.smoothed_action = (
|
||||||
act = np.clip(np.asarray(action, dtype=np.float32), -1.0, 1.0)
|
self.ACTION_SMOOTH * self.prev_action
|
||||||
self.dog_pos = np.clip(
|
+ (1.0 - self.ACTION_SMOOTH) * action
|
||||||
self.dog_pos + act * self.DOG_SPEED * self.DT,
|
|
||||||
-self.FIELD, self.FIELD
|
|
||||||
)
|
)
|
||||||
|
self.prev_action = self.smoothed_action.copy()
|
||||||
|
vx, vy = float(self.smoothed_action[0]), float(self.smoothed_action[1])
|
||||||
|
omega = float(self.smoothed_action[2]) if self._action_dim >= 3 else 0.0
|
||||||
|
|
||||||
# Step sheep dynamics
|
# Domain randomisation: compass (heading) noise.
|
||||||
for i in range(self.n_sheep):
|
dr = (self._herding_cfg.domain_random
|
||||||
if self.penned[i]:
|
if self._herding_cfg is not None else None)
|
||||||
continue
|
slip_std = dr.wheel_slip_std if dr is not None else 0.0
|
||||||
self.sheep_pos[i] = self._step_sheep(i)
|
if dr is not None and dr.compass_noise_std > 0.0 and self._np_rng_lidar is not None:
|
||||||
if self._in_pen(self.sheep_pos[i]):
|
self.dog_heading = float(self.dog_heading + self._np_rng_lidar.normal(
|
||||||
self.penned[i] = True
|
0.0, dr.compass_noise_std))
|
||||||
|
|
||||||
n_penned = int(self.penned[:self.n_sheep].sum())
|
# Safety supervisor — dog stays north of the gate.
|
||||||
newly_penned = n_penned - self._prev_penned
|
if self.dog_y < DOG_SOUTH_LIMIT and vy < 0.0:
|
||||||
self._prev_penned = n_penned
|
vx, vy = 0.0, 1.0
|
||||||
|
|
||||||
reward = self._reward(n_penned, newly_penned)
|
# Step the dog.
|
||||||
terminated = n_penned == self.n_sheep
|
if self._drive_mode == "mecanum":
|
||||||
truncated = self._step_count >= self.max_steps
|
w_fl, w_fr, w_rl, w_rr = velocity_to_mecanum_wheels(
|
||||||
info = {"n_penned": n_penned, "n_sheep": self.n_sheep}
|
vx, vy, omega, self.dog_heading,
|
||||||
|
max_linear=DOG_MAX_LINEAR,
|
||||||
if self.render_mode == "human":
|
wheel_radius=DOG_WHEEL_RADIUS,
|
||||||
self.render()
|
lx=DOG_WHEEL_BASE_X / 2.0, ly=DOG_WHEEL_BASE_Y / 2.0,
|
||||||
|
max_wheel_omega=DOG_MAX_WHEEL_OMEGA,
|
||||||
return self._obs(), float(reward), terminated, truncated, info
|
k_turn=4.0,
|
||||||
|
wheel_base=DOG_WHEEL_BASE,
|
||||||
def render(self):
|
)
|
||||||
import matplotlib.pyplot as plt
|
robot_cfg = (self._herding_cfg.robot
|
||||||
import matplotlib.patches as mpatches
|
if self._herding_cfg is not None else None)
|
||||||
|
strafe_efficiency = (robot_cfg.strafe_efficiency
|
||||||
if self._fig is None:
|
if robot_cfg is not None else 1.0)
|
||||||
plt.ion()
|
strafe_bleed = (robot_cfg.strafe_to_forward_bleed
|
||||||
self._fig, self._ax = plt.subplots(figsize=(6, 6))
|
if robot_cfg is not None else 0.0)
|
||||||
|
self.dog_x, self.dog_y, self.dog_heading = mecanum_step(
|
||||||
ax = self._ax
|
self.dog_x, self.dog_y, self.dog_heading,
|
||||||
ax.clear()
|
w_fl, w_fr, w_rl, w_rr,
|
||||||
ax.set_xlim(-16, 16)
|
DOG_WHEEL_RADIUS,
|
||||||
ax.set_ylim(-16, 16)
|
DOG_WHEEL_BASE_X / 2.0, DOG_WHEEL_BASE_Y / 2.0,
|
||||||
ax.set_aspect("equal")
|
WEBOTS_DT,
|
||||||
ax.set_facecolor("#dcedc8")
|
slip_std=slip_std,
|
||||||
|
rng=self._np_rng_lidar,
|
||||||
# Field boundary
|
strafe_efficiency=strafe_efficiency,
|
||||||
ax.add_patch(mpatches.Rectangle(
|
strafe_to_forward_bleed=strafe_bleed,
|
||||||
(-15, -15), 30, 30, fill=False, edgecolor="#795548", linewidth=2
|
)
|
||||||
))
|
|
||||||
# Pen
|
|
||||||
pw = self.PEN_X[1] - self.PEN_X[0]
|
|
||||||
ph = self.PEN_Y[1] - self.PEN_Y[0]
|
|
||||||
ax.add_patch(mpatches.Rectangle(
|
|
||||||
(self.PEN_X[0], self.PEN_Y[0]), pw, ph,
|
|
||||||
facecolor="#ffe082", edgecolor="#795548", linewidth=2
|
|
||||||
))
|
|
||||||
ax.text(11.5, -11.5, "pen", ha="center", va="center",
|
|
||||||
fontsize=8, color="#795548")
|
|
||||||
|
|
||||||
# Sheep
|
|
||||||
for i in range(self.MAX_SHEEP):
|
|
||||||
if i >= self.n_sheep:
|
|
||||||
continue # inactive slot — not shown
|
|
||||||
color = "deeppink" if self.penned[i] else "white"
|
|
||||||
ax.plot(*self.sheep_pos[i], "o", color=color, markersize=11,
|
|
||||||
markeredgecolor="#555", markeredgewidth=1.5)
|
|
||||||
|
|
||||||
# Dog
|
|
||||||
ax.plot(*self.dog_pos, "s", color="#4e342e", markersize=13,
|
|
||||||
markeredgecolor="black", markeredgewidth=1.5)
|
|
||||||
|
|
||||||
ax.set_title(
|
|
||||||
f"step {self._step_count} | "
|
|
||||||
f"penned {int(self.penned[:self.n_sheep].sum())}/{self.n_sheep}",
|
|
||||||
fontsize=11
|
|
||||||
)
|
|
||||||
self._fig.canvas.draw()
|
|
||||||
self._fig.canvas.flush_events()
|
|
||||||
plt.pause(0.001)
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
if self._fig is not None:
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
plt.close(self._fig)
|
|
||||||
self._fig = None
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Internals
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _in_pen(self, pos: np.ndarray) -> bool:
|
|
||||||
return (self.PEN_X[0] < pos[0] < self.PEN_X[1] and
|
|
||||||
self.PEN_Y[0] < pos[1] < self.PEN_Y[1])
|
|
||||||
|
|
||||||
def _obs(self) -> np.ndarray:
|
|
||||||
scale = 1.0 / self.FIELD
|
|
||||||
return np.concatenate([
|
|
||||||
self.dog_pos * scale, # 2
|
|
||||||
(self.sheep_pos * scale).flatten(), # 2 * MAX_SHEEP
|
|
||||||
self.penned.astype(np.float32), # MAX_SHEEP
|
|
||||||
]).astype(np.float32)
|
|
||||||
|
|
||||||
def _reward(self, n_penned: int, newly_penned: int) -> float:
|
|
||||||
active_mask = ~self.penned[:self.n_sheep]
|
|
||||||
if active_mask.any():
|
|
||||||
active_pos = self.sheep_pos[:self.n_sheep][active_mask]
|
|
||||||
dists_pen = np.linalg.norm(active_pos - self.PEN_CENTER, axis=1)
|
|
||||||
dists_dog = np.linalg.norm(active_pos - self.dog_pos, axis=1)
|
|
||||||
|
|
||||||
# Sheep-to-pen shaping
|
|
||||||
shaping = -(dists_pen.mean() / (2 * self.FIELD))
|
|
||||||
|
|
||||||
# Approach: dog penalised for being far from nearest sheep
|
|
||||||
approach = -(dists_dog.min() / (2 * self.FIELD))
|
|
||||||
|
|
||||||
# Alignment: reward dog for being on the anti-pen side of each sheep.
|
|
||||||
# When the dog is opposite the pen relative to a sheep, that sheep
|
|
||||||
# flees toward the pen. Score ∈ [-1, 1] per sheep, weighted by
|
|
||||||
# a proximity gate so only nearby dogs count.
|
|
||||||
align_scores = []
|
|
||||||
for s_pos, d_pen, d_dog in zip(active_pos, dists_pen, dists_dog):
|
|
||||||
if d_pen < 0.1 or d_dog < 0.1:
|
|
||||||
continue
|
|
||||||
pen_dir = (self.PEN_CENTER - s_pos) / d_pen # sheep → pen
|
|
||||||
dog_dir = (self.dog_pos - s_pos) / d_dog # sheep → dog
|
|
||||||
# cos(angle): +1 → dog behind sheep, -1 → dog on pen side
|
|
||||||
cosine = -float(np.dot(pen_dir, dog_dir))
|
|
||||||
# gate: full credit inside flee range, fades beyond
|
|
||||||
proximity = max(0.0, 1.0 - d_dog / self.FLEE_DIST)
|
|
||||||
align_scores.append(cosine * proximity)
|
|
||||||
alignment = float(np.mean(align_scores)) if align_scores else 0.0
|
|
||||||
else:
|
else:
|
||||||
shaping = approach = alignment = 0.0
|
wL, wR = velocity_to_wheels(
|
||||||
|
vx, vy, self.dog_heading,
|
||||||
|
max_linear=DOG_MAX_LINEAR,
|
||||||
|
wheel_radius=DOG_WHEEL_RADIUS,
|
||||||
|
max_wheel_omega=DOG_MAX_WHEEL_OMEGA,
|
||||||
|
k_turn=4.0,
|
||||||
|
)
|
||||||
|
self.dog_x, self.dog_y, self.dog_heading = kinematics_step(
|
||||||
|
self.dog_x, self.dog_y, self.dog_heading,
|
||||||
|
wL, wR, DOG_WHEEL_RADIUS, DOG_WHEEL_BASE, WEBOTS_DT,
|
||||||
|
slip_std=slip_std,
|
||||||
|
rng=self._np_rng_lidar,
|
||||||
|
)
|
||||||
|
self.dog_x, self.dog_y = clip_to_field(self.dog_x, self.dog_y, margin=0.3)
|
||||||
|
# Extra constraint: dog stays north of the gate area.
|
||||||
|
if self.dog_y < DOG_SOUTH_LIMIT:
|
||||||
|
self.dog_y = DOG_SOUTH_LIMIT
|
||||||
|
|
||||||
reward = shaping * self.W_SHAPING
|
# Step sheep and update penned flags (GT-based).
|
||||||
reward += approach * self.W_APPROACH
|
for i in range(self.n_sheep):
|
||||||
reward += alignment * self.W_ALIGN
|
self._step_one_sheep(i)
|
||||||
reward += newly_penned * self.W_PEN_BONUS
|
for i in range(self.n_sheep):
|
||||||
reward -= self.W_STEP_COST
|
if (not self.sheep_penned[i]
|
||||||
if n_penned == self.n_sheep:
|
and is_penned(self.sheep_x[i], self.sheep_y[i])):
|
||||||
reward += self.W_COMPLETE
|
self.sheep_penned[i] = True
|
||||||
return reward
|
|
||||||
|
|
||||||
def _step_sheep(self, i: int) -> np.ndarray:
|
# LiDAR perception runs after sheep move; feeds the obs and the
|
||||||
"""Apply one timestep of boid dynamics to sheep i."""
|
# imitation reward. Reward/termination still use GT.
|
||||||
pos = self.sheep_pos[i].copy()
|
if self._tracker is not None:
|
||||||
fx, fy = 0.0, 0.0
|
self._update_tracker()
|
||||||
fleeing = False
|
|
||||||
|
|
||||||
# Flee from dog — quadratic ramp (mirrors sheep.py)
|
d_pen, radius = self._flock_metrics()
|
||||||
diff = self.dog_pos - pos
|
reward = self._compute_reward(d_pen, radius, action=action)
|
||||||
dist = float(np.linalg.norm(diff))
|
self.prev_d_pen = d_pen
|
||||||
if 0.01 < dist < self.FLEE_DIST:
|
self.prev_radius = radius
|
||||||
t = 1.0 - dist / self.FLEE_DIST
|
self.prev_n_penned = int(self.sheep_penned.sum())
|
||||||
s = t * t * 5.0
|
|
||||||
fx -= (diff[0] / dist) * s
|
|
||||||
fy -= (diff[1] / dist) * s
|
|
||||||
fleeing = True
|
|
||||||
|
|
||||||
# Separation (inverse-distance) + Cohesion
|
self.steps += 1
|
||||||
cx, cy, cn = 0.0, 0.0, 0
|
all_penned = bool(self.sheep_penned.all())
|
||||||
for j in range(self.n_sheep):
|
terminated = all_penned
|
||||||
if j == i or self.penned[j]:
|
truncated = self.steps >= self.max_steps
|
||||||
continue
|
if all_penned:
|
||||||
dv = self.sheep_pos[j] - pos
|
reward += self.W_DONE
|
||||||
dj = float(np.linalg.norm(dv))
|
|
||||||
if 0.3 < dj < self.COHESION_DIST:
|
|
||||||
cx += self.sheep_pos[j][0]
|
|
||||||
cy += self.sheep_pos[j][1]
|
|
||||||
cn += 1
|
|
||||||
if 0.05 < dj < self.SEPARATION_DIST:
|
|
||||||
push = (self.SEPARATION_DIST - dj) / dj
|
|
||||||
fx -= (dv[0] / dj) * push * 2.5
|
|
||||||
fy -= (dv[1] / dj) * push * 2.5
|
|
||||||
if cn > 0:
|
|
||||||
w = 0.08 if fleeing else 0.15
|
|
||||||
fx += (cx / cn - pos[0]) * w
|
|
||||||
fy += (cy / cn - pos[1]) * w
|
|
||||||
|
|
||||||
# Wall avoidance
|
obs = self._build_obs()
|
||||||
m, F = self.WALL_MARGIN, self.FIELD
|
info = {
|
||||||
if pos[0] < -F + m: fx += ((-F + m - pos[0]) / m) * 6.0
|
"n_sheep": self.n_sheep,
|
||||||
if pos[0] > F - m: fx -= ((pos[0] - (F - m)) / m) * 6.0
|
"n_penned": self.prev_n_penned,
|
||||||
if pos[1] < -F + m: fy += ((-F + m - pos[1]) / m) * 6.0
|
"is_success": all_penned,
|
||||||
if pos[1] > F - m: fy -= ((pos[1] - (F - m)) / m) * 6.0
|
"steps": self.steps,
|
||||||
|
}
|
||||||
|
return obs, float(reward), terminated, truncated, info
|
||||||
|
|
||||||
# Wander — suppressed while fleeing
|
# --- Internals ---
|
||||||
if not fleeing:
|
def _step_one_sheep(self, i: int) -> None:
|
||||||
if self.np_random.random() < 0.02:
|
x, y = float(self.sheep_x[i]), float(self.sheep_y[i])
|
||||||
self.wander_ang[i] += float(self.np_random.uniform(-0.6, 0.6))
|
peers = [(float(self.sheep_x[j]), float(self.sheep_y[j]))
|
||||||
fx += float(np.cos(self.wander_ang[i])) * 0.5
|
for j in range(self.n_sheep) if j != i]
|
||||||
fy += float(np.sin(self.wander_ang[i])) * 0.5
|
heading, speed_motor, new_wander = compute_heading_speed(
|
||||||
|
x, y,
|
||||||
|
penned=bool(self.sheep_penned[i]),
|
||||||
|
dog_xy=(self.dog_x, self.dog_y),
|
||||||
|
peers=peers,
|
||||||
|
wander_angle=float(self.sheep_wander[i]),
|
||||||
|
rng=self._py_rng,
|
||||||
|
)
|
||||||
|
self.sheep_wander[i] = new_wander
|
||||||
|
|
||||||
# Integrate
|
wL, wR = heading_speed_to_wheels(
|
||||||
force = np.array([fx, fy])
|
heading, speed_motor, float(self.sheep_h[i]),
|
||||||
mag = float(np.linalg.norm(force))
|
max_wheel_omega=SHEEP_MAX_WHEEL_OMEGA, k_turn=4.0,
|
||||||
if mag > 0.01:
|
)
|
||||||
top_speed = self.SHEEP_FLEE_V if fleeing else self.SHEEP_WANDER_V
|
nx, ny, nh = kinematics_step(
|
||||||
speed = min(top_speed, mag * 0.3)
|
x, y, float(self.sheep_h[i]), wL, wR,
|
||||||
pos = np.clip(pos + (force / mag) * speed * self.DT,
|
SHEEP_WHEEL_RADIUS, SHEEP_WHEEL_BASE, WEBOTS_DT,
|
||||||
-self.FIELD, self.FIELD)
|
)
|
||||||
|
|
||||||
return pos.astype(np.float32)
|
# Wall clipping.
|
||||||
|
if FIELD_SHAPE == "field_round":
|
||||||
|
in_gate_col = PEN_X[0] <= nx <= PEN_X[1]
|
||||||
|
if in_gate_col:
|
||||||
|
# Allow passage through the gate column (south of the wall).
|
||||||
|
ny = float(np.clip(ny, PEN_Y[0] + 0.2, FIELD_Y[1] - 0.2))
|
||||||
|
else:
|
||||||
|
nx, ny = clip_to_field(nx, ny, margin=0.2)
|
||||||
|
else:
|
||||||
|
nx = float(np.clip(nx, FIELD_X[0] + 0.2, FIELD_X[1] - 0.2))
|
||||||
|
in_gate_col = PEN_X[0] <= nx <= PEN_X[1]
|
||||||
|
if in_gate_col:
|
||||||
|
ny = float(np.clip(ny, PEN_Y[0] + 0.2, FIELD_Y[1] - 0.2))
|
||||||
|
else:
|
||||||
|
ny = float(np.clip(ny, FIELD_Y[0] + 0.2, FIELD_Y[1] - 0.2))
|
||||||
|
|
||||||
|
self.sheep_x[i] = nx
|
||||||
|
self.sheep_y[i] = ny
|
||||||
|
self.sheep_h[i] = nh
|
||||||
|
|
||||||
|
def _flock_metrics(self):
|
||||||
|
"""Return (per-sheep mean distance to pen, max radius from CoM).
|
||||||
|
|
||||||
|
The per-sheep mean (not CoM distance) makes the progress signal
|
||||||
|
sensitive to stragglers: the dog can't game it by herding most of
|
||||||
|
the flock and abandoning one outlier.
|
||||||
|
"""
|
||||||
|
active_mask = ~self.sheep_penned
|
||||||
|
if not active_mask.any():
|
||||||
|
return 0.0, 0.0
|
||||||
|
xs = self.sheep_x[active_mask]
|
||||||
|
ys = self.sheep_y[active_mask]
|
||||||
|
per_sheep_d = np.hypot(xs - PEN_ENTRY[0], ys - PEN_ENTRY[1])
|
||||||
|
d_pen = float(per_sheep_d.mean())
|
||||||
|
com_x, com_y = float(xs.mean()), float(ys.mean())
|
||||||
|
if active_mask.sum() == 1:
|
||||||
|
radius = 0.0
|
||||||
|
else:
|
||||||
|
radius = float(np.hypot(xs - com_x, ys - com_y).max())
|
||||||
|
return d_pen, radius
|
||||||
|
|
||||||
|
def _compute_reward(self, d_pen: float, radius: float, action=None) -> float:
|
||||||
|
"""Sparse pen jackpot + dense progress shaping + Strömbom imitation."""
|
||||||
|
n_penned = int(self.sheep_penned.sum())
|
||||||
|
delta_pen = n_penned - self.prev_n_penned
|
||||||
|
|
||||||
|
d_progress = max(-5.0, min(5.0, self.prev_d_pen - d_pen))
|
||||||
|
r = (self.W_PEN_DELTA * delta_pen
|
||||||
|
+ self.W_PROGRESS * d_progress
|
||||||
|
+ self.W_TIME)
|
||||||
|
|
||||||
|
if action is not None and self.W_IMITATE > 0.0:
|
||||||
|
positions = self._perceived_positions()
|
||||||
|
if positions:
|
||||||
|
sx, sy, _mode = strombom_action(
|
||||||
|
(self.dog_x, self.dog_y), positions, PEN_ENTRY,
|
||||||
|
)
|
||||||
|
a_norm = math.hypot(float(action[0]), float(action[1]))
|
||||||
|
s_norm = math.hypot(sx, sy)
|
||||||
|
if a_norm > 1e-3 and s_norm > 1e-3:
|
||||||
|
cos_sim = (float(action[0]) * sx + float(action[1]) * sy) / (a_norm * s_norm)
|
||||||
|
r += self.W_IMITATE * cos_sim
|
||||||
|
|
||||||
|
return float(r)
|
||||||
|
|
||||||
|
def _build_single_obs(self) -> np.ndarray:
|
||||||
|
if self._tracker is not None:
|
||||||
|
# LiDAR mode: the obs sees only the tracker's active set.
|
||||||
|
active = self._tracker.get_positions()
|
||||||
|
sheep_xy_list = list(active.values())
|
||||||
|
sheep_penned_list = [False] * len(sheep_xy_list)
|
||||||
|
else:
|
||||||
|
sheep_xy_list = list(zip(self.sheep_x.tolist(), self.sheep_y.tolist()))
|
||||||
|
sheep_penned_list = self.sheep_penned.tolist()
|
||||||
|
return build_obs(
|
||||||
|
(self.dog_x, self.dog_y), self.dog_heading,
|
||||||
|
sheep_xy_list, sheep_penned_list,
|
||||||
|
n_max=self._max_n_sheep,
|
||||||
|
n_expected=self.n_sheep,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_obs(self) -> np.ndarray:
|
||||||
|
single = self._build_single_obs()
|
||||||
|
if self._frame_stack <= 1:
|
||||||
|
return single
|
||||||
|
# On reset the buffer is empty — pad with copies of the first frame.
|
||||||
|
if not self._frame_buffer:
|
||||||
|
self._frame_buffer = [single.copy() for _ in range(self._frame_stack)]
|
||||||
|
else:
|
||||||
|
self._frame_buffer.append(single)
|
||||||
|
if len(self._frame_buffer) > self._frame_stack:
|
||||||
|
self._frame_buffer = self._frame_buffer[-self._frame_stack:]
|
||||||
|
return np.concatenate(self._frame_buffer, axis=0).astype(np.float32)
|
||||||
|
|
||||||
|
# --- LiDAR perception ---
|
||||||
|
def _all_sheep_xy(self) -> list[tuple[float, float]]:
|
||||||
|
"""Every sheep, including penned (the LiDAR sees them)."""
|
||||||
|
return [(float(self.sheep_x[i]), float(self.sheep_y[i]))
|
||||||
|
for i in range(self.n_sheep)]
|
||||||
|
|
||||||
|
def _update_tracker(self) -> None:
|
||||||
|
lidar_cfg = (self._herding_cfg.lidar
|
||||||
|
if self._herding_cfg is not None else None)
|
||||||
|
detection_cfg = (self._herding_cfg.detection
|
||||||
|
if self._herding_cfg is not None else None)
|
||||||
|
ranges = simulate_scan(
|
||||||
|
self.dog_x, self.dog_y, self.dog_heading,
|
||||||
|
self._all_sheep_xy(),
|
||||||
|
rng=self._np_rng_lidar,
|
||||||
|
lidar_cfg=lidar_cfg,
|
||||||
|
)
|
||||||
|
detections = detections_from_scan(
|
||||||
|
ranges, self.dog_x, self.dog_y, self.dog_heading,
|
||||||
|
detection_cfg=detection_cfg,
|
||||||
|
lidar_cfg=lidar_cfg,
|
||||||
|
)
|
||||||
|
# Domain randomisation: inject false-positive detections near static
|
||||||
|
# features to mimic the spurious clusters Webots' physical LiDAR
|
||||||
|
# produces from real 3D geometry (walls, posts, fence rails).
|
||||||
|
dr = (self._herding_cfg.domain_random
|
||||||
|
if self._herding_cfg is not None else None)
|
||||||
|
if dr is not None and dr.fp_rate > 0.0 and self._np_rng_lidar is not None:
|
||||||
|
detections = list(detections)
|
||||||
|
detections.extend(
|
||||||
|
self._sample_false_positives(dr.fp_rate, dr.fp_std_pos))
|
||||||
|
self._tracker.update(detections)
|
||||||
|
|
||||||
|
# Static feature anchor points used for FP injection.
|
||||||
|
# The rectangular list covers gate posts and field corners; the round
|
||||||
|
# list covers just the gate posts (the circular wall is handled separately).
|
||||||
|
_FP_ANCHORS_RECT = (
|
||||||
|
(10.0, -15.0), (13.0, -15.0), # gate posts
|
||||||
|
(15.0, 15.0), (15.0, -15.0),
|
||||||
|
(-15.0, 15.0), (-15.0, -15.0), # field corners
|
||||||
|
(15.0, 0.0), (-15.0, 0.0), # mid-wall returns
|
||||||
|
(0.0, 15.0), (0.0, -15.0),
|
||||||
|
)
|
||||||
|
_FP_ANCHORS_ROUND = (
|
||||||
|
(0.0, -15.0), # gate centre
|
||||||
|
(-1.5, -15.0), (1.5, -15.0), # gate posts
|
||||||
|
(0.0, 15.0), # north wall
|
||||||
|
(10.6, -10.6), (-10.6, -10.6), # circular wall quadrants
|
||||||
|
)
|
||||||
|
|
||||||
|
def _sample_false_positives(
|
||||||
|
self, fp_rate: float, fp_std: float,
|
||||||
|
) -> list[tuple[float, float]]:
|
||||||
|
"""Return a list of spurious (x, y) detections for this step."""
|
||||||
|
from herding.world.geometry import FIELD_SHAPE
|
||||||
|
anchors = (self._FP_ANCHORS_ROUND
|
||||||
|
if FIELD_SHAPE == "field_round"
|
||||||
|
else self._FP_ANCHORS_RECT)
|
||||||
|
n_fps = int(self._np_rng_lidar.poisson(fp_rate))
|
||||||
|
if n_fps == 0:
|
||||||
|
return []
|
||||||
|
fps = []
|
||||||
|
chosen = self._np_rng_lidar.integers(0, len(anchors), size=n_fps)
|
||||||
|
noise = self._np_rng_lidar.normal(0.0, fp_std, size=(n_fps, 2))
|
||||||
|
for k in range(n_fps):
|
||||||
|
ax, ay = anchors[chosen[k]]
|
||||||
|
fps.append((float(ax + noise[k, 0]), float(ay + noise[k, 1])))
|
||||||
|
return fps
|
||||||
|
|
||||||
|
def perceived_positions(self) -> dict[str, tuple[float, float]]:
|
||||||
|
"""What the controller would "see" this step: tracker output in
|
||||||
|
LiDAR mode, ground truth in privileged mode. Used by demo
|
||||||
|
collection and analytic-policy eval so the teacher runs on the
|
||||||
|
same perception the deployed controller has.
|
||||||
|
"""
|
||||||
|
if self._tracker is not None:
|
||||||
|
return self._tracker.get_positions()
|
||||||
|
return {f"s{i}": (float(self.sheep_x[i]), float(self.sheep_y[i]))
|
||||||
|
for i in range(self.n_sheep) if not self.sheep_penned[i]}
|
||||||
|
|
||||||
|
_perceived_positions = perceived_positions
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
gymnasium>=0.29
|
# Pin major versions; SB3 2.x requires gymnasium and torch >= 1.13.
|
||||||
stable-baselines3>=2.3
|
gymnasium>=0.29,<2.0
|
||||||
torch>=2.2
|
stable-baselines3[extra]>=2.3,<3.0
|
||||||
numpy>=1.26
|
torch>=2.1
|
||||||
matplotlib>=3.8
|
numpy>=1.24
|
||||||
tensorboard>=2.16
|
pyyaml>=6.0
|
||||||
|
tensorboard>=2.14
|
||||||
|
tqdm>=4.66
|
||||||
|
pytest>=8.0
|
||||||
|
|||||||
@@ -0,0 +1,449 @@
|
|||||||
|
"""KL-regularised PPO fine-tune of a behaviour-cloned policy.
|
||||||
|
|
||||||
|
The trainable policy is initialised from ``runs/bc/policy.zip``. A
|
||||||
|
frozen copy of the same weights becomes the reference; each PPO loss
|
||||||
|
gets an extra ``β · KL(π ‖ π_ref)`` term so the policy can only move
|
||||||
|
within a trust region around BC. ``log_std`` is fixed small to keep
|
||||||
|
exploration tight.
|
||||||
|
|
||||||
|
Output: ``runs/rl/policy.zip`` — same SB3 format as the BC checkpoint,
|
||||||
|
loadable by ``HERDING_MODE=rl`` in the dog controller.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
python -m training.rl.train \\
|
||||||
|
--bc training/runs/bc \\
|
||||||
|
--out training/runs/rl \\
|
||||||
|
--total-timesteps 2000000
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Configure field geometry before other herding imports read it at module level.
|
||||||
|
from herding.world.geometry import configure_from_args as _configure_from_args
|
||||||
|
_configure_from_args()
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch as th
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
|
||||||
|
from stable_baselines3.common.monitor import Monitor
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
|
||||||
|
|
||||||
|
from herding.perception.obs import OBS_DIM
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
# Env factory
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _make_env(rank: int, seed: int, frame_stack: int,
|
||||||
|
drive_mode: str = "differential",
|
||||||
|
difficulty: float = 1.0,
|
||||||
|
max_n_sheep: int = 10,
|
||||||
|
herding_cfg=None):
|
||||||
|
def _thunk():
|
||||||
|
env = HerdingEnv(seed=seed + rank, frame_stack=frame_stack,
|
||||||
|
drive_mode=drive_mode, difficulty=difficulty,
|
||||||
|
max_n_sheep=max_n_sheep, herding_cfg=herding_cfg)
|
||||||
|
env = Monitor(env, info_keywords=("is_success", "n_sheep", "n_penned"))
|
||||||
|
return env
|
||||||
|
return _thunk
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
# KL-regularised PPO
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
|
class KLPPO(PPO):
|
||||||
|
"""PPO with an extra KL-to-reference penalty in the policy loss.
|
||||||
|
|
||||||
|
Overrides only ``train()``; rollout buffer, clipped surrogate, value
|
||||||
|
loss and entropy bonus are unchanged from stock SB3 PPO.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args, ref_policy=None, kl_coef: float = 0.05, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.ref_policy = ref_policy
|
||||||
|
if self.ref_policy is not None:
|
||||||
|
self.ref_policy.set_training_mode(False)
|
||||||
|
for p in self.ref_policy.parameters():
|
||||||
|
p.requires_grad = False
|
||||||
|
self.kl_coef = kl_coef
|
||||||
|
|
||||||
|
def train(self) -> None:
|
||||||
|
# Stock SB3 PPO.train() structure with the KL-to-reference term
|
||||||
|
# added inside the inner minibatch loop.
|
||||||
|
self.policy.set_training_mode(True)
|
||||||
|
self._update_learning_rate(self.policy.optimizer)
|
||||||
|
clip_range = self.clip_range(self._current_progress_remaining)
|
||||||
|
if self.clip_range_vf is not None:
|
||||||
|
clip_range_vf = self.clip_range_vf(self._current_progress_remaining)
|
||||||
|
|
||||||
|
entropy_losses, pg_losses, value_losses, kl_losses = [], [], [], []
|
||||||
|
clip_fractions = []
|
||||||
|
continue_training = True
|
||||||
|
|
||||||
|
for epoch in range(self.n_epochs):
|
||||||
|
approx_kl_divs = []
|
||||||
|
for rollout_data in self.rollout_buffer.get(self.batch_size):
|
||||||
|
actions = rollout_data.actions
|
||||||
|
if isinstance(self.action_space, th.distributions.Categorical.__bases__):
|
||||||
|
actions = rollout_data.actions.long().flatten()
|
||||||
|
|
||||||
|
values, log_prob, entropy = self.policy.evaluate_actions(
|
||||||
|
rollout_data.observations, actions)
|
||||||
|
values = values.flatten()
|
||||||
|
advantages = rollout_data.advantages
|
||||||
|
if self.normalize_advantage and len(advantages) > 1:
|
||||||
|
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
|
||||||
|
|
||||||
|
ratio = th.exp(log_prob - rollout_data.old_log_prob)
|
||||||
|
policy_loss_1 = advantages * ratio
|
||||||
|
policy_loss_2 = advantages * th.clamp(ratio, 1 - clip_range, 1 + clip_range)
|
||||||
|
policy_loss = -th.min(policy_loss_1, policy_loss_2).mean()
|
||||||
|
pg_losses.append(policy_loss.item())
|
||||||
|
clip_fraction = th.mean((th.abs(ratio - 1) > clip_range).float()).item()
|
||||||
|
clip_fractions.append(clip_fraction)
|
||||||
|
|
||||||
|
if self.clip_range_vf is None:
|
||||||
|
values_pred = values
|
||||||
|
else:
|
||||||
|
values_pred = rollout_data.old_values + th.clamp(
|
||||||
|
values - rollout_data.old_values, -clip_range_vf, clip_range_vf)
|
||||||
|
value_loss = F.mse_loss(rollout_data.returns, values_pred)
|
||||||
|
value_losses.append(value_loss.item())
|
||||||
|
|
||||||
|
if entropy is None:
|
||||||
|
entropy_loss = -th.mean(-log_prob)
|
||||||
|
else:
|
||||||
|
entropy_loss = -th.mean(entropy)
|
||||||
|
entropy_losses.append(entropy_loss.item())
|
||||||
|
|
||||||
|
# KL-to-reference: closed-form KL between two diagonal
|
||||||
|
# Gaussians, summed over the action axis, mean over batch.
|
||||||
|
if self.ref_policy is None:
|
||||||
|
raise RuntimeError("KLPPO.train called without ref_policy")
|
||||||
|
with th.no_grad():
|
||||||
|
ref_dist = self.ref_policy.get_distribution(rollout_data.observations)
|
||||||
|
pi_dist = self.policy.get_distribution(rollout_data.observations)
|
||||||
|
kl_div = th.distributions.kl.kl_divergence(
|
||||||
|
pi_dist.distribution, ref_dist.distribution).sum(dim=-1).mean()
|
||||||
|
kl_losses.append(kl_div.item())
|
||||||
|
|
||||||
|
loss = (policy_loss
|
||||||
|
+ self.ent_coef * entropy_loss
|
||||||
|
+ self.vf_coef * value_loss
|
||||||
|
+ self.kl_coef * kl_div)
|
||||||
|
|
||||||
|
with th.no_grad():
|
||||||
|
log_ratio = log_prob - rollout_data.old_log_prob
|
||||||
|
approx_kl_div = th.mean((th.exp(log_ratio) - 1) - log_ratio).cpu().numpy()
|
||||||
|
approx_kl_divs.append(approx_kl_div)
|
||||||
|
|
||||||
|
if self.target_kl is not None and approx_kl_div > 1.5 * self.target_kl:
|
||||||
|
continue_training = False
|
||||||
|
if self.verbose >= 1:
|
||||||
|
print(f"Early stopping at step {epoch} due to reaching max kl: {approx_kl_div:.2f}")
|
||||||
|
break
|
||||||
|
|
||||||
|
self.policy.optimizer.zero_grad()
|
||||||
|
loss.backward()
|
||||||
|
th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
|
||||||
|
self.policy.optimizer.step()
|
||||||
|
|
||||||
|
self._n_updates += 1
|
||||||
|
if not continue_training:
|
||||||
|
break
|
||||||
|
|
||||||
|
explained_var = self._explained_variance()
|
||||||
|
self.logger.record("train/entropy_loss", float(np.mean(entropy_losses)))
|
||||||
|
self.logger.record("train/policy_gradient_loss", float(np.mean(pg_losses)))
|
||||||
|
self.logger.record("train/value_loss", float(np.mean(value_losses)))
|
||||||
|
self.logger.record("train/kl_to_reference", float(np.mean(kl_losses)))
|
||||||
|
self.logger.record("train/approx_kl", float(np.mean(approx_kl_divs)))
|
||||||
|
self.logger.record("train/clip_fraction", float(np.mean(clip_fractions)))
|
||||||
|
self.logger.record("train/explained_variance", float(explained_var))
|
||||||
|
if hasattr(self.policy, "log_std"):
|
||||||
|
self.logger.record("train/std", th.exp(self.policy.log_std).mean().item())
|
||||||
|
|
||||||
|
def _explained_variance(self) -> float:
|
||||||
|
y_pred = self.rollout_buffer.values.flatten()
|
||||||
|
y_true = self.rollout_buffer.returns.flatten()
|
||||||
|
var_y = np.var(y_true)
|
||||||
|
return float("nan") if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
# Main
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--bc", default="training/runs/bc",
|
||||||
|
help="Directory containing the BC initialisation.")
|
||||||
|
parser.add_argument("--out", default="training/runs/rl",
|
||||||
|
help="Where to save the fine-tuned policy.")
|
||||||
|
parser.add_argument("--total-timesteps", type=int, default=2_000_000)
|
||||||
|
parser.add_argument("--n-envs", type=int, default=8)
|
||||||
|
parser.add_argument("--learning-rate", type=float, default=5e-5)
|
||||||
|
parser.add_argument("--kl-coef", type=float, default=0.05,
|
||||||
|
help="Coefficient of the KL-to-reference penalty.")
|
||||||
|
parser.add_argument("--log-std", type=float, default=-1.5,
|
||||||
|
help="Initial (and frozen) log_std for exploration.")
|
||||||
|
parser.add_argument("--freeze-log-std", action="store_true", default=True)
|
||||||
|
parser.add_argument("--n-steps", type=int, default=2048)
|
||||||
|
parser.add_argument("--batch-size", type=int, default=256)
|
||||||
|
parser.add_argument("--n-epochs", type=int, default=10)
|
||||||
|
parser.add_argument("--gamma", type=float, default=0.995)
|
||||||
|
parser.add_argument("--gae-lambda", type=float, default=0.95)
|
||||||
|
parser.add_argument("--clip-range", type=float, default=0.1)
|
||||||
|
parser.add_argument("--ent-coef", type=float, default=0.0)
|
||||||
|
parser.add_argument("--target-kl", type=float, default=0.02,
|
||||||
|
help="SB3 per-batch KL early-stop guard.")
|
||||||
|
parser.add_argument("--seed", type=int, default=0)
|
||||||
|
parser.add_argument("--device", default="cpu")
|
||||||
|
parser.add_argument("--drive-mode", default=None,
|
||||||
|
choices=["differential", "mecanum"],
|
||||||
|
help="Drive mode. If not set, inferred from "
|
||||||
|
"BC action dimension (2→differential, 3→mecanum).")
|
||||||
|
parser.add_argument("--imitate-weight", type=float, default=None,
|
||||||
|
help="Override env.W_IMITATE (e.g. 0.0 to drop "
|
||||||
|
"Strömbom imitation during fine-tune).")
|
||||||
|
parser.add_argument("--time-weight", type=float, default=None,
|
||||||
|
help="Override env.W_TIME (e.g. -0.1 for a "
|
||||||
|
"per-step time penalty).")
|
||||||
|
parser.add_argument("--difficulty", type=float, default=1.0,
|
||||||
|
help="HerdingEnv difficulty for PPO rollouts. "
|
||||||
|
"Must match eval (1.0) to avoid train/eval "
|
||||||
|
"distribution mismatch.")
|
||||||
|
parser.add_argument("--max-n-sheep", type=int, default=10,
|
||||||
|
help="Upper bound on flock size sampled each reset.")
|
||||||
|
parser.add_argument("--world", default=None,
|
||||||
|
choices=["field", "field_round"],
|
||||||
|
help="World shape. If not set, uses HERDING_WORLD "
|
||||||
|
"env var or defaults to 'field'.")
|
||||||
|
# Domain randomisation
|
||||||
|
parser.add_argument("--fp-rate", type=float, default=0.0,
|
||||||
|
help="Mean false-positive detections per step (Poisson λ).")
|
||||||
|
parser.add_argument("--action-smooth", type=float, default=0.0,
|
||||||
|
help="EMA on dog actions (0=none, 0.55=Webots match).")
|
||||||
|
parser.add_argument("--wheel-slip-std", type=float, default=0.0,
|
||||||
|
help="Gaussian wheel-speed noise std (rad/s).")
|
||||||
|
args = parser.parse_args()
|
||||||
|
# --world was already honoured in the early pre-parse above; here we
|
||||||
|
# just sanity-check that the final argparse view agrees.
|
||||||
|
if args.world is not None:
|
||||||
|
from herding.world.geometry import FIELD_SHAPE as _CURRENT_SHAPE
|
||||||
|
if args.world != _CURRENT_SHAPE:
|
||||||
|
print(f"[rl] WARNING: --world={args.world} but geometry is "
|
||||||
|
f"'{_CURRENT_SHAPE}'. File a bug.")
|
||||||
|
|
||||||
|
bc_zip = Path(args.bc) / "policy.zip"
|
||||||
|
if not bc_zip.exists():
|
||||||
|
raise SystemExit(
|
||||||
|
f"BC checkpoint not found at {bc_zip}. Train bc first with "
|
||||||
|
f"`python -m training.bc.pretrain`."
|
||||||
|
)
|
||||||
|
|
||||||
|
out = Path(args.out)
|
||||||
|
out.mkdir(parents=True, exist_ok=True)
|
||||||
|
(out / "checkpoints").mkdir(exist_ok=True)
|
||||||
|
(out / "best").mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# Infer frame_stack from the BC checkpoint's obs space.
|
||||||
|
ref_only = PPO.load(str(bc_zip), device=args.device)
|
||||||
|
obs_dim = int(ref_only.observation_space.shape[0])
|
||||||
|
if obs_dim % OBS_DIM != 0:
|
||||||
|
raise SystemExit(f"BC obs dim {obs_dim} is not a multiple of {OBS_DIM}.")
|
||||||
|
frame_stack = obs_dim // OBS_DIM
|
||||||
|
print(f"[rl] BC obs dim {obs_dim} → frame_stack={frame_stack}")
|
||||||
|
|
||||||
|
# Infer drive mode from BC action dim if not explicitly set.
|
||||||
|
bc_action_dim = int(ref_only.action_space.shape[0])
|
||||||
|
if args.drive_mode is not None:
|
||||||
|
drive_mode = args.drive_mode
|
||||||
|
elif bc_action_dim == 3:
|
||||||
|
drive_mode = "mecanum"
|
||||||
|
else:
|
||||||
|
drive_mode = "differential"
|
||||||
|
print(f"[rl] drive_mode={drive_mode} (BC action_dim={bc_action_dim})")
|
||||||
|
|
||||||
|
from herding.config import (
|
||||||
|
HerdingConfig, HERDING_MEC_WEBOTS_360, DomainRandomConfig, RobotConfig,
|
||||||
|
)
|
||||||
|
herding_cfg = None
|
||||||
|
# Mecanum trains under HERDING_MEC_WEBOTS_360 (360° LiDAR +
|
||||||
|
# kinematic-matched strafe scaling + small compass-noise DR).
|
||||||
|
is_mecanum = (drive_mode == "mecanum")
|
||||||
|
if is_mecanum or args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
|
||||||
|
if is_mecanum:
|
||||||
|
base = HERDING_MEC_WEBOTS_360
|
||||||
|
strafe_eff = base.robot.strafe_efficiency
|
||||||
|
strafe_bleed = base.robot.strafe_to_forward_bleed
|
||||||
|
compass_std = 0.1 # heading robustness DR
|
||||||
|
else:
|
||||||
|
base = None
|
||||||
|
strafe_eff = 1.0
|
||||||
|
strafe_bleed = 0.0
|
||||||
|
compass_std = 0.0
|
||||||
|
if is_mecanum:
|
||||||
|
herding_cfg = base.replace(
|
||||||
|
domain_random=DomainRandomConfig(
|
||||||
|
fp_rate=args.fp_rate,
|
||||||
|
wheel_slip_std=args.wheel_slip_std,
|
||||||
|
compass_noise_std=compass_std,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(
|
||||||
|
action_smooth=args.action_smooth,
|
||||||
|
strafe_efficiency=strafe_eff,
|
||||||
|
strafe_to_forward_bleed=strafe_bleed,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
herding_cfg = HerdingConfig(
|
||||||
|
domain_random=DomainRandomConfig(
|
||||||
|
fp_rate=args.fp_rate,
|
||||||
|
wheel_slip_std=args.wheel_slip_std,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(
|
||||||
|
action_smooth=args.action_smooth,
|
||||||
|
strafe_efficiency=strafe_eff,
|
||||||
|
strafe_to_forward_bleed=strafe_bleed,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
print(f"[rl] domain-random: fp_rate={args.fp_rate} "
|
||||||
|
f"action_smooth={args.action_smooth} "
|
||||||
|
f"wheel_slip_std={args.wheel_slip_std} "
|
||||||
|
f"strafe_eff={strafe_eff:.2f} strafe_bleed={strafe_bleed:.2f} "
|
||||||
|
f"compass_noise={compass_std}")
|
||||||
|
|
||||||
|
env_fns = [_make_env(i, args.seed, frame_stack, drive_mode,
|
||||||
|
difficulty=args.difficulty,
|
||||||
|
max_n_sheep=args.max_n_sheep,
|
||||||
|
herding_cfg=herding_cfg)
|
||||||
|
for i in range(args.n_envs)]
|
||||||
|
venv = SubprocVecEnv(env_fns) if args.n_envs > 1 else DummyVecEnv(env_fns)
|
||||||
|
eval_venv = DummyVecEnv([_make_env(99, args.seed + 999, frame_stack,
|
||||||
|
drive_mode,
|
||||||
|
difficulty=args.difficulty,
|
||||||
|
max_n_sheep=args.max_n_sheep,
|
||||||
|
herding_cfg=herding_cfg)])
|
||||||
|
print(f"[rl] difficulty={args.difficulty} max_n_sheep={args.max_n_sheep}")
|
||||||
|
|
||||||
|
# Reward-shaping overrides (broadcast to every env instance).
|
||||||
|
def _broadcast(method: str, value):
|
||||||
|
for v in (venv, eval_venv):
|
||||||
|
try:
|
||||||
|
v.env_method(method, value)
|
||||||
|
except AttributeError:
|
||||||
|
v.venv.env_method(method, value)
|
||||||
|
if args.imitate_weight is not None:
|
||||||
|
_broadcast("set_imitate_weight", args.imitate_weight)
|
||||||
|
print(f"[rl] W_IMITATE overridden to {args.imitate_weight}")
|
||||||
|
if args.time_weight is not None:
|
||||||
|
_broadcast("set_time_weight", args.time_weight)
|
||||||
|
print(f"[rl] W_TIME overridden to {args.time_weight}")
|
||||||
|
|
||||||
|
# Build a fresh KLPPO at the right obs/action shape, then copy BC
|
||||||
|
# weights into both the trainable policy and the frozen reference.
|
||||||
|
model = KLPPO(
|
||||||
|
"MlpPolicy", venv,
|
||||||
|
ref_policy=None, # filled in below
|
||||||
|
kl_coef=args.kl_coef,
|
||||||
|
learning_rate=args.learning_rate,
|
||||||
|
n_steps=args.n_steps,
|
||||||
|
batch_size=args.batch_size,
|
||||||
|
n_epochs=args.n_epochs,
|
||||||
|
gamma=args.gamma,
|
||||||
|
gae_lambda=args.gae_lambda,
|
||||||
|
clip_range=args.clip_range,
|
||||||
|
ent_coef=args.ent_coef,
|
||||||
|
target_kl=args.target_kl,
|
||||||
|
policy_kwargs=dict(
|
||||||
|
net_arch=dict(pi=[512, 512], vf=[512, 512]),
|
||||||
|
log_std_init=args.log_std,
|
||||||
|
),
|
||||||
|
verbose=1,
|
||||||
|
seed=args.seed,
|
||||||
|
device=args.device,
|
||||||
|
tensorboard_log=str(out / "tb"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# strict=False — the BC value head wasn't trained; PPO trains it.
|
||||||
|
bc_state = ref_only.policy.state_dict()
|
||||||
|
missing, unexpected = model.policy.load_state_dict(bc_state, strict=False)
|
||||||
|
print(f"[rl] BC → policy: missing={len(missing)} unexpected={len(unexpected)}")
|
||||||
|
|
||||||
|
ref_policy = type(model.policy)(
|
||||||
|
observation_space=model.observation_space,
|
||||||
|
action_space=model.action_space,
|
||||||
|
lr_schedule=lambda _: 0.0,
|
||||||
|
net_arch=dict(pi=[512, 512], vf=[512, 512]),
|
||||||
|
log_std_init=args.log_std,
|
||||||
|
).to(args.device)
|
||||||
|
ref_policy.load_state_dict(bc_state, strict=False)
|
||||||
|
model.ref_policy = ref_policy
|
||||||
|
model.ref_policy.set_training_mode(False)
|
||||||
|
for p in model.ref_policy.parameters():
|
||||||
|
p.requires_grad = False
|
||||||
|
|
||||||
|
# Force both policies to the same log_std so the KL term measures
|
||||||
|
# mean drift only, not a std mismatch carried over from BC.
|
||||||
|
with th.no_grad():
|
||||||
|
model.policy.log_std.fill_(args.log_std)
|
||||||
|
model.ref_policy.log_std.fill_(args.log_std)
|
||||||
|
if args.freeze_log_std:
|
||||||
|
model.policy.log_std.requires_grad = False
|
||||||
|
print(f"[rl] log_std frozen at {args.log_std} (σ ≈ {np.exp(args.log_std):.3f})")
|
||||||
|
|
||||||
|
ckpt_cb = CheckpointCallback(
|
||||||
|
save_freq=max(1, 50_000 // args.n_envs),
|
||||||
|
save_path=str(out / "checkpoints"),
|
||||||
|
name_prefix="ppo",
|
||||||
|
)
|
||||||
|
# EvalCallback writes <save_path>/best_model.zip on every new best
|
||||||
|
# eval reward. We send it straight to ``out/`` and rename to
|
||||||
|
# ``policy.zip`` after training so the deployed file lives at the
|
||||||
|
# canonical path.
|
||||||
|
eval_cb = EvalCallback(
|
||||||
|
eval_venv,
|
||||||
|
best_model_save_path=str(out),
|
||||||
|
log_path=str(out / "evals"),
|
||||||
|
eval_freq=max(1, 20_000 // args.n_envs),
|
||||||
|
n_eval_episodes=5,
|
||||||
|
deterministic=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"[rl] training: total_timesteps={args.total_timesteps} "
|
||||||
|
f"n_envs={args.n_envs} lr={args.learning_rate} kl_coef={args.kl_coef}")
|
||||||
|
model.learn(total_timesteps=args.total_timesteps,
|
||||||
|
callback=[ckpt_cb, eval_cb], progress_bar=True)
|
||||||
|
|
||||||
|
# Save the end-of-training state for debugging convergence behaviour.
|
||||||
|
model.save(out / "final.zip")
|
||||||
|
|
||||||
|
# Promote the EvalCallback's best-by-eval-reward snapshot to the
|
||||||
|
# canonical ``policy.zip`` (what the controller loads). Fall back
|
||||||
|
# to the final state if eval never recorded a "best".
|
||||||
|
import shutil
|
||||||
|
best_zip = out / "best_model.zip"
|
||||||
|
policy_zip = out / "policy.zip"
|
||||||
|
if best_zip.exists():
|
||||||
|
if policy_zip.exists():
|
||||||
|
policy_zip.unlink()
|
||||||
|
best_zip.rename(policy_zip)
|
||||||
|
print(f"[rl] best snapshot → {policy_zip} (final state kept at {out/'final.zip'})")
|
||||||
|
else:
|
||||||
|
shutil.copy(out / "final.zip", policy_zip)
|
||||||
|
print(f"[rl] no best snapshot recorded; using final → {policy_zip}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,174 @@
|
|||||||
|
"""Recurrent-PPO (LSTM) policy trainer for the herding env.
|
||||||
|
|
||||||
|
Motivation
|
||||||
|
----------
|
||||||
|
The MLP+frame-stack policy struggles with partial observability under
|
||||||
|
the 140° Webots LiDAR: the tracker briefly empties when the dog turns,
|
||||||
|
and sporadic FP tracks at static features confuse the policy. An LSTM
|
||||||
|
gives the policy unbounded temporal memory so it can:
|
||||||
|
|
||||||
|
* keep modelling sheep positions when the tracker briefly drops them,
|
||||||
|
* distinguish persistent (real) tracks from intermittent (phantom) ones.
|
||||||
|
|
||||||
|
This is the literature-correct fix for partial-observability + noisy
|
||||||
|
perception. Trains from scratch (no BC init) using vanilla PPO without
|
||||||
|
the KL-to-reference term (no reference exists when starting clean).
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
python -m training.rl.train_lstm \\
|
||||||
|
--out training/runs/lstm_differential_field \\
|
||||||
|
--drive-mode differential --world field \\
|
||||||
|
--total-timesteps 3000000 \\
|
||||||
|
--use-webots-preset --fp-rate 0.0 --action-smooth 0.55
|
||||||
|
|
||||||
|
Frame stack is forced to 1 since the LSTM provides its own memory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Configure field geometry before other herding imports read it at module level.
|
||||||
|
from herding.world.geometry import configure_from_args as _configure_from_args
|
||||||
|
_configure_from_args()
|
||||||
|
|
||||||
|
from sb3_contrib import RecurrentPPO
|
||||||
|
from stable_baselines3.common.callbacks import EvalCallback
|
||||||
|
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
|
||||||
|
|
||||||
|
from herding.world.geometry import MAX_SHEEP
|
||||||
|
from training.herding_env import HerdingEnv
|
||||||
|
|
||||||
|
|
||||||
|
def _make_env(rank: int, seed: int, drive_mode: str, difficulty: float,
|
||||||
|
max_n_sheep: int, herding_cfg):
|
||||||
|
def _init():
|
||||||
|
env = HerdingEnv(
|
||||||
|
max_n_sheep=max_n_sheep, difficulty=difficulty,
|
||||||
|
seed=seed + rank, frame_stack=1, drive_mode=drive_mode,
|
||||||
|
herding_cfg=herding_cfg,
|
||||||
|
)
|
||||||
|
return env
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--out", required=True,
|
||||||
|
help="Output directory for the LSTM policy.")
|
||||||
|
parser.add_argument("--total-timesteps", type=int, default=3_000_000)
|
||||||
|
parser.add_argument("--n-envs", type=int, default=8)
|
||||||
|
parser.add_argument("--n-steps", type=int, default=256)
|
||||||
|
parser.add_argument("--lstm-hidden", type=int, default=128)
|
||||||
|
parser.add_argument("--lr", type=float, default=3e-4)
|
||||||
|
parser.add_argument("--seed", type=int, default=0)
|
||||||
|
parser.add_argument("--max-n-sheep", type=int, default=MAX_SHEEP)
|
||||||
|
parser.add_argument("--difficulty", type=float, default=1.0)
|
||||||
|
parser.add_argument("--drive-mode", default="differential",
|
||||||
|
choices=["differential", "mecanum"])
|
||||||
|
parser.add_argument("--world", default=None,
|
||||||
|
choices=["field", "field_round"])
|
||||||
|
parser.add_argument("--fp-rate", type=float, default=0.0)
|
||||||
|
parser.add_argument("--action-smooth", type=float, default=0.55)
|
||||||
|
parser.add_argument("--wheel-slip-std", type=float, default=0.05)
|
||||||
|
parser.add_argument("--use-webots-preset", action="store_true",
|
||||||
|
help="Train in the HERDING_WEBOTS env (140° FOV + tight tracker).")
|
||||||
|
parser.add_argument("--device", default="cpu")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
from herding.config import HerdingConfig, HERDING_WEBOTS, DomainRandomConfig, RobotConfig
|
||||||
|
|
||||||
|
if args.use_webots_preset:
|
||||||
|
herding_cfg = HERDING_WEBOTS.replace(
|
||||||
|
domain_random=DomainRandomConfig(
|
||||||
|
fp_rate=args.fp_rate,
|
||||||
|
wheel_slip_std=args.wheel_slip_std,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(action_smooth=args.action_smooth),
|
||||||
|
)
|
||||||
|
print(f"[lstm] HERDING_WEBOTS preset + DR: fp_rate={args.fp_rate}")
|
||||||
|
else:
|
||||||
|
herding_cfg = None
|
||||||
|
if args.fp_rate > 0.0 or args.action_smooth > 0.0 or args.wheel_slip_std > 0.0:
|
||||||
|
herding_cfg = HerdingConfig(
|
||||||
|
domain_random=DomainRandomConfig(
|
||||||
|
fp_rate=args.fp_rate,
|
||||||
|
wheel_slip_std=args.wheel_slip_std,
|
||||||
|
),
|
||||||
|
robot=RobotConfig(action_smooth=args.action_smooth),
|
||||||
|
)
|
||||||
|
|
||||||
|
env_fns = [_make_env(i, args.seed, args.drive_mode, args.difficulty,
|
||||||
|
args.max_n_sheep, herding_cfg)
|
||||||
|
for i in range(args.n_envs)]
|
||||||
|
venv = SubprocVecEnv(env_fns) if args.n_envs > 1 else DummyVecEnv(env_fns)
|
||||||
|
eval_venv = DummyVecEnv([_make_env(99, args.seed + 999, args.drive_mode,
|
||||||
|
args.difficulty, args.max_n_sheep,
|
||||||
|
herding_cfg)])
|
||||||
|
|
||||||
|
out = Path(args.out)
|
||||||
|
out.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
print(f"[lstm] drive_mode={args.drive_mode} world={os.environ.get('HERDING_WORLD', 'field')}")
|
||||||
|
print(f"[lstm] total_timesteps={args.total_timesteps} n_envs={args.n_envs} "
|
||||||
|
f"lr={args.lr} lstm_hidden={args.lstm_hidden}")
|
||||||
|
|
||||||
|
model = RecurrentPPO(
|
||||||
|
"MlpLstmPolicy", venv,
|
||||||
|
learning_rate=args.lr,
|
||||||
|
n_steps=args.n_steps,
|
||||||
|
batch_size=args.n_steps, # full rollout = one batch (matches LSTM episode boundaries)
|
||||||
|
n_epochs=4,
|
||||||
|
gamma=0.99,
|
||||||
|
gae_lambda=0.95,
|
||||||
|
clip_range=0.2,
|
||||||
|
ent_coef=0.0,
|
||||||
|
max_grad_norm=0.5,
|
||||||
|
policy_kwargs=dict(
|
||||||
|
net_arch=dict(pi=[256, 256], vf=[256, 256]),
|
||||||
|
lstm_hidden_size=args.lstm_hidden,
|
||||||
|
n_lstm_layers=1,
|
||||||
|
shared_lstm=False,
|
||||||
|
enable_critic_lstm=True,
|
||||||
|
),
|
||||||
|
device=args.device,
|
||||||
|
verbose=1,
|
||||||
|
seed=args.seed,
|
||||||
|
tensorboard_log=str(out / "tb"),
|
||||||
|
)
|
||||||
|
|
||||||
|
eval_cb = EvalCallback(
|
||||||
|
eval_venv,
|
||||||
|
best_model_save_path=str(out / "best"),
|
||||||
|
log_path=str(out / "evals"),
|
||||||
|
eval_freq=max(args.n_steps * args.n_envs, 20_000) // args.n_envs,
|
||||||
|
n_eval_episodes=5,
|
||||||
|
deterministic=True,
|
||||||
|
render=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
model.learn(total_timesteps=args.total_timesteps, callback=eval_cb,
|
||||||
|
progress_bar=True)
|
||||||
|
print(f"[lstm] training done in {time.time() - t0:.0f}s")
|
||||||
|
|
||||||
|
# Save best (by eval) if it exists; otherwise save final.
|
||||||
|
best = out / "best" / "best_model.zip"
|
||||||
|
if best.exists():
|
||||||
|
import shutil
|
||||||
|
shutil.copy(best, out / "policy.zip")
|
||||||
|
print(f"[lstm] best snapshot → {out / 'policy.zip'}")
|
||||||
|
else:
|
||||||
|
model.save(str(out / "policy.zip"))
|
||||||
|
print(f"[lstm] no eval beat init; final snapshot → {out / 'policy.zip'}")
|
||||||
|
model.save(str(out / "final.zip"))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,211 +0,0 @@
|
|||||||
"""
|
|
||||||
PPO training script for the herding task.
|
|
||||||
|
|
||||||
Usage examples
|
|
||||||
--------------
|
|
||||||
# Start fresh with curriculum (1 → 5 sheep):
|
|
||||||
python train.py --curriculum
|
|
||||||
|
|
||||||
# Resume from checkpoint, skip directly to 3 sheep:
|
|
||||||
python train.py --resume runs/ppo_herding/ckpt_200000_steps.zip --n-sheep 3
|
|
||||||
|
|
||||||
# Quick smoke-test (no curriculum, single env):
|
|
||||||
python train.py --n-envs 1 --total-steps 50000
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from stable_baselines3 import PPO
|
|
||||||
from stable_baselines3.common.callbacks import (
|
|
||||||
BaseCallback,
|
|
||||||
CallbackList,
|
|
||||||
CheckpointCallback,
|
|
||||||
EvalCallback,
|
|
||||||
)
|
|
||||||
from stable_baselines3.common.vec_env import SubprocVecEnv, VecNormalize
|
|
||||||
|
|
||||||
from herding_env import HerdingEnv
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Curriculum callback
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
class CurriculumCallback(BaseCallback):
|
|
||||||
"""
|
|
||||||
Advances the curriculum (number of active sheep) when the rolling mean
|
|
||||||
episode success rate exceeds a threshold.
|
|
||||||
|
|
||||||
Success = episode terminated (all sheep penned) rather than truncated.
|
|
||||||
"""
|
|
||||||
|
|
||||||
THRESHOLD = 0.75 # success rate to graduate
|
|
||||||
WINDOW = 100 # episodes to average over
|
|
||||||
MIN_EPISODES = 50 # don't graduate before seeing this many episodes
|
|
||||||
|
|
||||||
def __init__(self, start_sheep: int, max_sheep: int, verbose: int = 1):
|
|
||||||
super().__init__(verbose)
|
|
||||||
self.max_sheep = max_sheep
|
|
||||||
self._successes = []
|
|
||||||
self._cur_sheep = start_sheep
|
|
||||||
|
|
||||||
def _on_step(self) -> bool:
|
|
||||||
for info, done in zip(self.locals["infos"], self.locals["dones"]):
|
|
||||||
if done:
|
|
||||||
truncated = info.get("TimeLimit.truncated", False)
|
|
||||||
self._successes.append(0 if truncated else 1)
|
|
||||||
if len(self._successes) > self.WINDOW:
|
|
||||||
self._successes.pop(0)
|
|
||||||
|
|
||||||
if (self._cur_sheep < self.max_sheep
|
|
||||||
and len(self._successes) >= self.MIN_EPISODES
|
|
||||||
and np.mean(self._successes) >= self.THRESHOLD):
|
|
||||||
self._cur_sheep += 1
|
|
||||||
self.training_env.env_method("set_n_sheep", self._cur_sheep)
|
|
||||||
self._successes.clear()
|
|
||||||
if self.verbose:
|
|
||||||
print(f"\n[Curriculum] Advanced to {self._cur_sheep} sheep "
|
|
||||||
f"at step {self.num_timesteps}\n")
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Environment factory
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def make_env(n_sheep: int, seed: int, max_steps: int):
|
|
||||||
def _init():
|
|
||||||
env = HerdingEnv(n_sheep=n_sheep, max_steps=max_steps)
|
|
||||||
env.reset(seed=seed)
|
|
||||||
return env
|
|
||||||
return _init
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Main
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
p.add_argument("--n-sheep", type=int, default=1,
|
|
||||||
help="Starting number of sheep (or fixed count if no curriculum)")
|
|
||||||
p.add_argument("--max-sheep", type=int, default=5,
|
|
||||||
help="Maximum sheep for curriculum (ignored without --curriculum)")
|
|
||||||
p.add_argument("--n-envs", type=int, default=8,
|
|
||||||
help="Number of parallel environments")
|
|
||||||
p.add_argument("--total-steps", type=int, default=5_000_000,
|
|
||||||
help="Total environment steps to train for")
|
|
||||||
p.add_argument("--max-steps", type=int, default=2000,
|
|
||||||
help="Episode step limit inside each env")
|
|
||||||
p.add_argument("--curriculum", action="store_true",
|
|
||||||
help="Enable automatic curriculum advancement")
|
|
||||||
p.add_argument("--resume", type=str, default=None,
|
|
||||||
help="Path to a .zip checkpoint to resume training from")
|
|
||||||
p.add_argument("--run-dir", type=str, default="runs/ppo_herding",
|
|
||||||
help="Output directory for checkpoints and logs")
|
|
||||||
p.add_argument("--save-freq", type=int, default=100_000,
|
|
||||||
help="Checkpoint every N steps (per-env, not total)")
|
|
||||||
p.add_argument("--eval-freq", type=int, default=50_000,
|
|
||||||
help="Evaluate every N steps")
|
|
||||||
p.add_argument("--eval-eps", type=int, default=20,
|
|
||||||
help="Episodes per evaluation run")
|
|
||||||
return p.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
args = parse_args()
|
|
||||||
os.makedirs(args.run_dir, exist_ok=True)
|
|
||||||
ckpt_dir = os.path.join(args.run_dir, "checkpoints")
|
|
||||||
best_dir = os.path.join(args.run_dir, "best_model")
|
|
||||||
norm_path = os.path.join(args.run_dir, "vecnorm.pkl")
|
|
||||||
os.makedirs(ckpt_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# Training envs
|
|
||||||
train_env = SubprocVecEnv([
|
|
||||||
make_env(args.n_sheep, seed=i, max_steps=args.max_steps)
|
|
||||||
for i in range(args.n_envs)
|
|
||||||
])
|
|
||||||
if args.resume and os.path.exists(norm_path):
|
|
||||||
train_env = VecNormalize.load(norm_path, train_env)
|
|
||||||
train_env.training = True
|
|
||||||
train_env.norm_reward = True
|
|
||||||
else:
|
|
||||||
train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True,
|
|
||||||
clip_obs=10.0)
|
|
||||||
|
|
||||||
# Eval env (no reward normalisation, deterministic)
|
|
||||||
eval_env = SubprocVecEnv([
|
|
||||||
make_env(args.n_sheep, seed=1000 + i, max_steps=args.max_steps)
|
|
||||||
for i in range(2)
|
|
||||||
])
|
|
||||||
eval_env = VecNormalize(eval_env, norm_obs=True, norm_reward=False,
|
|
||||||
clip_obs=10.0, training=False)
|
|
||||||
|
|
||||||
# Callbacks
|
|
||||||
checkpoint_cb = CheckpointCallback(
|
|
||||||
save_freq=max(args.save_freq // args.n_envs, 1),
|
|
||||||
save_path=ckpt_dir,
|
|
||||||
name_prefix="ckpt",
|
|
||||||
save_vecnormalize=True,
|
|
||||||
)
|
|
||||||
eval_cb = EvalCallback(
|
|
||||||
eval_env,
|
|
||||||
best_model_save_path=best_dir,
|
|
||||||
log_path=args.run_dir,
|
|
||||||
eval_freq=max(args.eval_freq // args.n_envs, 1),
|
|
||||||
n_eval_episodes=args.eval_eps,
|
|
||||||
deterministic=True,
|
|
||||||
verbose=1,
|
|
||||||
)
|
|
||||||
callbacks = [checkpoint_cb, eval_cb]
|
|
||||||
if args.curriculum:
|
|
||||||
callbacks.append(CurriculumCallback(start_sheep=args.n_sheep,
|
|
||||||
max_sheep=args.max_sheep))
|
|
||||||
callback_list = CallbackList(callbacks)
|
|
||||||
|
|
||||||
# Model
|
|
||||||
ppo_kwargs = dict(
|
|
||||||
policy = "MlpPolicy",
|
|
||||||
env = train_env,
|
|
||||||
learning_rate = 3e-4,
|
|
||||||
n_steps = 2048,
|
|
||||||
batch_size = 256,
|
|
||||||
n_epochs = 10,
|
|
||||||
gamma = 0.995,
|
|
||||||
gae_lambda = 0.95,
|
|
||||||
clip_range = 0.2,
|
|
||||||
ent_coef = 0.005,
|
|
||||||
vf_coef = 0.5,
|
|
||||||
max_grad_norm = 0.5,
|
|
||||||
policy_kwargs = dict(net_arch=[256, 256]),
|
|
||||||
tensorboard_log = args.run_dir,
|
|
||||||
verbose = 1,
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.resume:
|
|
||||||
print(f"Resuming from {args.resume}")
|
|
||||||
model = PPO.load(args.resume, env=train_env, **{
|
|
||||||
k: v for k, v in ppo_kwargs.items()
|
|
||||||
if k not in ("policy", "env")
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
model = PPO(**ppo_kwargs)
|
|
||||||
|
|
||||||
model.learn(
|
|
||||||
total_timesteps=args.total_steps,
|
|
||||||
callback=callback_list,
|
|
||||||
reset_num_timesteps=args.resume is None,
|
|
||||||
tb_log_name="ppo",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save final artefacts
|
|
||||||
model.save(os.path.join(args.run_dir, "final_model"))
|
|
||||||
train_env.save(norm_path)
|
|
||||||
print(f"\nTraining complete. Artefacts saved to {args.run_dir}/")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
+69
-63
@@ -10,7 +10,7 @@ EXTERNPROTO "../protos/Sheep.proto"
|
|||||||
# World
|
# World
|
||||||
WorldInfo {
|
WorldInfo {
|
||||||
info [
|
info [
|
||||||
"RL-Based Autonomous Shepherd Robot"
|
"Autonomous Shepherd Robot (Strömbom)"
|
||||||
"Group G25"
|
"Group G25"
|
||||||
]
|
]
|
||||||
title "Shepherd Herding"
|
title "Shepherd Herding"
|
||||||
@@ -106,19 +106,26 @@ Solid { translation -2.5 -15 0.84 children [ Shape { appearance USE CAP geometry
|
|||||||
Solid { translation 14 -15 0.40 children [ Shape { appearance USE STONE_A geometry Box { size 2.0 0.16 0.80 } } ] boundingObject Box { size 2.0 0.16 0.80 } }
|
Solid { translation 14 -15 0.40 children [ Shape { appearance USE STONE_A geometry Box { size 2.0 0.16 0.80 } } ] boundingObject Box { size 2.0 0.16 0.80 } }
|
||||||
Solid { translation 14 -15 0.84 children [ Shape { appearance USE CAP geometry Box { size 2.1 0.26 0.07 } } ] boundingObject Box { size 2.1 0.26 0.07 } }
|
Solid { translation 14 -15 0.84 children [ Shape { appearance USE CAP geometry Box { size 2.1 0.26 0.07 } } ] boundingObject Box { size 2.1 0.26 0.07 } }
|
||||||
# Gate posts
|
# Gate posts
|
||||||
Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
Solid { translation 10 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
Solid { translation 13 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
# Outer gate (wooden, slightly ajar, Z-brace)
|
# Outer gate — fully open, hinged on the west gate post. Modeled as a swung-back
|
||||||
Solid { translation 11.5 -15.08 0.55 rotation 0 0 1 0.25 children [
|
# wooden gate parallel to the south wall, on the west side, so the 3m corridor
|
||||||
|
# between gate posts (x=10..13, y=-15) is unobstructed.
|
||||||
|
Solid { translation 8.6 -15.05 0.55 rotation 0 0 1 0 children [
|
||||||
Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
|
Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
|
||||||
Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [ Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } } ] }
|
# FPOST appearance DEF lives here so the external pen below can USE it.
|
||||||
|
Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [
|
||||||
|
Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } }
|
||||||
|
] }
|
||||||
] boundingObject Box { size 2.80 0.08 1.00 } }
|
] boundingObject Box { size 2.80 0.08 1.00 } }
|
||||||
|
|
||||||
# ==================== QUARANTINE PEN (wooden post-and-rail fence, inside field) ====================
|
# ==================== EXTERNAL PEN (south of field, accessed through south-wall gate) ====================
|
||||||
# Flow: main field → inner gate → quarantine area → outer gate → outside
|
# Flow: main field → south-wall gate (x ∈ [10, 13], y = -15) → external pen
|
||||||
|
# The pen is a wooden post-and-rail rectangle south of the field, x ∈ [10, 13],
|
||||||
|
# y ∈ [-22, -15], open on the north side (the gate hole is the entrance).
|
||||||
|
|
||||||
# West wall (x=10, ~7m along Y)
|
# Pen west wall (x=10, y from -22 to -15, length 7m)
|
||||||
Solid { translation 10 -11.46 0.55 children [
|
Solid { translation 10 -18.5 0.55 children [
|
||||||
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
@@ -130,8 +137,8 @@ Solid { translation 10 -11.46 0.55 children [
|
|||||||
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
||||||
] boundingObject Box { size 0.14 6.92 1.10 } }
|
] boundingObject Box { size 0.14 6.92 1.10 } }
|
||||||
|
|
||||||
# East wall (x=13)
|
# Pen east wall (x=13, y from -22 to -15, length 7m)
|
||||||
Solid { translation 13 -11.46 0.55 children [
|
Solid { translation 13 -18.5 0.55 children [
|
||||||
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
@@ -143,39 +150,50 @@ Solid { translation 13 -11.46 0.55 children [
|
|||||||
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
||||||
] boundingObject Box { size 0.14 6.92 1.10 } }
|
] boundingObject Box { size 0.14 6.92 1.10 } }
|
||||||
|
|
||||||
# North wall - open entrance (no wall, just corner posts)
|
# Pen south wall (y=-22, x from 10 to 13, length 3m, closes the back of the pen)
|
||||||
Solid { translation 10 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
|
Solid { translation 11.5 -22 0.55 children [
|
||||||
Solid { translation 13 -8 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] boundingObject Box { size 0.12 0.12 1.10 } }
|
Transform { translation -1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 1.5 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 2.92 0.06 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 2.92 0.14 0.04 } } ] }
|
||||||
|
] boundingObject Box { size 2.92 0.14 1.10 } }
|
||||||
|
|
||||||
|
# Pen north corner posts at the gate opening (no wall — sheep enter here from the field)
|
||||||
|
Solid { translation 10 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Solid { translation 13 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
|
||||||
# Corner pillars
|
# Corner pillars
|
||||||
Solid { translation 15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
Solid { translation 15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
Solid { translation 15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
Solid { translation 15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
Solid { translation -15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
Solid { translation -15 15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] boundingObject Box { size 0.44 0.44 1.12 } }
|
Solid { translation -15 -15 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
|
|
||||||
# Mid-pillars every 5 m — East
|
# Mid-pillars every 5 m — East
|
||||||
Solid { translation 15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation 15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation 15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation 15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation 15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
# West
|
# West
|
||||||
Solid { translation -15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -15 10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -15 5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -15 0 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -15 -5 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -15 -10 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
# North
|
# North
|
||||||
Solid { translation 10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation 5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation 0 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 0 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -5 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -10 15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
# South
|
# South
|
||||||
Solid { translation 5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation 0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation 0 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -5 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] boundingObject Box { size 0.34 0.34 1.06 } }
|
Solid { translation -10 -15 0.53 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
|
||||||
# ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
|
# ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
|
||||||
# Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
|
# Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
|
||||||
@@ -503,28 +521,16 @@ ShepherdDog {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# ==================== SHEEP ====================
|
# ==================== SHEEP ====================
|
||||||
Sheep {
|
# Up to 10 sheep, scattered through the field's central/north zone. Comment
|
||||||
translation 3 2 0.5
|
# out trailing slots to test smaller flock sizes; the dog policy is trained
|
||||||
name "sheep1"
|
# to handle 1..10 sheep so any prefix works.
|
||||||
controller "sheep"
|
Sheep { translation 3.0 2.0 0.5 name "sheep1" controller "sheep" }
|
||||||
}
|
Sheep { translation 3.0 -2.0 0.5 name "sheep2" controller "sheep" }
|
||||||
Sheep {
|
Sheep { translation 4.0 0.0 0.5 name "sheep3" controller "sheep" }
|
||||||
translation 3 -2 0.5
|
Sheep { translation -3.0 4.0 0.5 name "sheep4" controller "sheep" }
|
||||||
name "sheep2"
|
Sheep { translation -5.0 -2.0 0.5 name "sheep5" controller "sheep" }
|
||||||
controller "sheep"
|
Sheep { translation 6.0 5.0 0.5 name "sheep6" controller "sheep" }
|
||||||
}
|
Sheep { translation -6.0 6.0 0.5 name "sheep7" controller "sheep" }
|
||||||
Sheep {
|
Sheep { translation 0.0 8.0 0.5 name "sheep8" controller "sheep" }
|
||||||
translation 4 0 0.5
|
Sheep { translation -8.0 0.0 0.5 name "sheep9" controller "sheep" }
|
||||||
name "sheep3"
|
Sheep { translation 7.0 -4.0 0.5 name "sheep10" controller "sheep" }
|
||||||
controller "sheep"
|
|
||||||
}
|
|
||||||
Sheep {
|
|
||||||
translation 3.5 1 0.5
|
|
||||||
name "sheep4"
|
|
||||||
controller "sheep"
|
|
||||||
}
|
|
||||||
Sheep {
|
|
||||||
translation 3.5 -1 0.5
|
|
||||||
name "sheep5"
|
|
||||||
controller "sheep"
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -0,0 +1,537 @@
|
|||||||
|
#VRML_SIM R2025a utf8
|
||||||
|
|
||||||
|
EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/backgrounds/protos/TexturedBackground.proto"
|
||||||
|
EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/backgrounds/protos/TexturedBackgroundLight.proto"
|
||||||
|
EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/objects/floors/protos/UnevenTerrain.proto"
|
||||||
|
EXTERNPROTO "https://raw.githubusercontent.com/cyberbotics/webots/R2025a/projects/appearances/protos/Grass.proto"
|
||||||
|
EXTERNPROTO "../protos/ShepherdDog.proto"
|
||||||
|
EXTERNPROTO "../protos/Sheep.proto"
|
||||||
|
|
||||||
|
# World
|
||||||
|
WorldInfo {
|
||||||
|
info [
|
||||||
|
"Autonomous Shepherd Robot (Strömbom)"
|
||||||
|
"Group G25"
|
||||||
|
]
|
||||||
|
title "Shepherd Herding (Round)"
|
||||||
|
ERP 0.62
|
||||||
|
basicTimeStep 16
|
||||||
|
contactProperties [
|
||||||
|
ContactProperties {
|
||||||
|
coulombFriction [
|
||||||
|
12
|
||||||
|
]
|
||||||
|
softCFM 1e-05
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Viewpoint
|
||||||
|
DEF VIEWPOINT Viewpoint {
|
||||||
|
position 4.34 -100.99 41.73
|
||||||
|
orientation 0.199 -0.190 -0.961 4.624
|
||||||
|
fieldOfView 0.785
|
||||||
|
}
|
||||||
|
|
||||||
|
# Background
|
||||||
|
Background {
|
||||||
|
skyColor [0.55 0.75 0.95]
|
||||||
|
}
|
||||||
|
# Single sun (diagonal from SW)
|
||||||
|
DirectionalLight {
|
||||||
|
ambientIntensity 1
|
||||||
|
direction -0.3 0.5 -0.85
|
||||||
|
color 1 0.98 0.92
|
||||||
|
intensity 2.5
|
||||||
|
castShadows TRUE
|
||||||
|
}
|
||||||
|
|
||||||
|
# Grass terrain
|
||||||
|
UnevenTerrain {
|
||||||
|
rotation 0 0 1 -1.5708
|
||||||
|
size 100 100 0.3
|
||||||
|
xDimension 50
|
||||||
|
yDimension 50
|
||||||
|
appearance Grass {
|
||||||
|
colorOverride 0.78 0.88 0.68
|
||||||
|
textureTransform TextureTransform {
|
||||||
|
scale 100 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
perlinNOctaves 4
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==================== APPEARANCES ====================
|
||||||
|
Transform {
|
||||||
|
children [
|
||||||
|
Shape { appearance DEF STONE_A PBRAppearance { baseColor 0.48 0.45 0.40 roughness 0.95 metalness 0 } }
|
||||||
|
Shape { appearance DEF STONE_B PBRAppearance { baseColor 0.36 0.33 0.29 roughness 0.95 metalness 0 } }
|
||||||
|
Shape { appearance DEF STONE_C PBRAppearance { baseColor 0.58 0.55 0.50 roughness 0.90 metalness 0 } }
|
||||||
|
Shape { appearance DEF CAP PBRAppearance { baseColor 0.54 0.51 0.46 roughness 0.80 metalness 0 } }
|
||||||
|
Shape { appearance DEF BARN_RED PBRAppearance { baseColor 0.62 0.18 0.12 roughness 0.80 metalness 0 } }
|
||||||
|
Shape { appearance DEF BARN_ROOF PBRAppearance { baseColor 0.28 0.20 0.13 roughness 0.72 metalness 0 } }
|
||||||
|
Shape { appearance DEF WOOD PBRAppearance { baseColor 0.48 0.32 0.16 roughness 0.90 metalness 0 } }
|
||||||
|
Shape { appearance DEF TRUNK PBRAppearance { baseColor 0.38 0.24 0.11 roughness 0.90 metalness 0 } }
|
||||||
|
Shape { appearance DEF LEAF_A PBRAppearance { baseColor 0.22 0.52 0.16 roughness 0.85 metalness 0 } }
|
||||||
|
Shape { appearance DEF LEAF_B PBRAppearance { baseColor 0.16 0.42 0.10 roughness 0.85 metalness 0 } }
|
||||||
|
Shape { appearance DEF LEAF_C PBRAppearance { baseColor 0.30 0.60 0.20 roughness 0.80 metalness 0 } }
|
||||||
|
Shape { appearance DEF STRAW PBRAppearance { baseColor 0.85 0.75 0.35 roughness 0.95 metalness 0 } }
|
||||||
|
Shape { appearance DEF HAT PBRAppearance { baseColor 0.50 0.35 0.18 roughness 0.85 metalness 0 } }
|
||||||
|
Shape { appearance DEF SHIRT PBRAppearance { baseColor 0.60 0.30 0.30 roughness 0.80 metalness 0 } }
|
||||||
|
Shape { appearance DEF PANTS PBRAppearance { baseColor 0.25 0.25 0.30 roughness 0.80 metalness 0 } }
|
||||||
|
Shape { appearance DEF DOOR_MAT PBRAppearance { baseColor 0.55 0.38 0.20 roughness 0.72 metalness 0 } }
|
||||||
|
Shape { appearance DEF GLASS PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } }
|
||||||
|
Shape { appearance DEF HAY PBRAppearance { baseColor 0.82 0.72 0.32 roughness 0.95 metalness 0 } }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
DEF TRIM PBRAppearance { baseColor 0.90 0.88 0.82 roughness 0.70 metalness 0 }
|
||||||
|
|
||||||
|
# ==================== CIRCULAR STONE WALL (R=15 m) ====================
|
||||||
|
|
||||||
|
Solid { translation 15.00 0.00 0.40 rotation 0 0 1 -1.5708 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 15.00 0.00 0.84 rotation 0 0 1 -1.5708 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation 14.10 5.13 0.40 rotation 0 0 1 -1.2217 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 14.10 5.13 0.84 rotation 0 0 1 -1.2217 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation 11.49 9.64 0.40 rotation 0 0 1 -0.8727 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 11.49 9.64 0.84 rotation 0 0 1 -0.8727 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation 7.50 12.99 0.40 rotation 0 0 1 -0.5236 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 7.50 12.99 0.84 rotation 0 0 1 -0.5236 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation 2.60 14.77 0.40 rotation 0 0 1 -0.1745 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 2.60 14.77 0.84 rotation 0 0 1 -0.1745 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -2.60 14.77 0.40 rotation 0 0 1 0.1745 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -2.60 14.77 0.84 rotation 0 0 1 0.1745 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -7.50 12.99 0.40 rotation 0 0 1 0.5236 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -7.50 12.99 0.84 rotation 0 0 1 0.5236 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -11.49 9.64 0.40 rotation 0 0 1 0.8727 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -11.49 9.64 0.84 rotation 0 0 1 0.8727 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -14.10 5.13 0.40 rotation 0 0 1 1.2217 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -14.10 5.13 0.84 rotation 0 0 1 1.2217 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -15.00 0.00 0.40 rotation 0 0 1 1.5708 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -15.00 0.00 0.84 rotation 0 0 1 1.5708 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -14.10 -5.13 0.40 rotation 0 0 1 1.9199 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -14.10 -5.13 0.84 rotation 0 0 1 1.9199 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -11.49 -9.64 0.40 rotation 0 0 1 2.2689 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -11.49 -9.64 0.84 rotation 0 0 1 2.2689 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -7.50 -12.99 0.40 rotation 0 0 1 2.6180 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation -7.50 -12.99 0.84 rotation 0 0 1 2.6180 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation -3.37 -14.62 0.40 rotation 0 0 1 2.9671 children [ Shape { appearance USE STONE_A geometry Box { size 3.65 0.16 0.80 } } ] boundingObject Box { size 3.65 0.16 0.80 } }
|
||||||
|
Solid { translation -3.37 -14.62 0.84 rotation 0 0 1 2.9671 children [ Shape { appearance USE CAP geometry Box { size 3.7 0.26 0.07 } } ] boundingObject Box { size 3.7 0.26 0.07 } }
|
||||||
|
Solid { translation 3.37 -14.62 0.40 rotation 0 0 1 3.3161 children [ Shape { appearance USE STONE_A geometry Box { size 3.65 0.16 0.80 } } ] boundingObject Box { size 3.65 0.16 0.80 } }
|
||||||
|
Solid { translation 3.37 -14.62 0.84 rotation 0 0 1 3.3161 children [ Shape { appearance USE CAP geometry Box { size 3.7 0.26 0.07 } } ] boundingObject Box { size 3.7 0.26 0.07 } }
|
||||||
|
Solid { translation 7.50 -12.99 0.40 rotation 0 0 1 3.6652 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 7.50 -12.99 0.84 rotation 0 0 1 3.6652 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation 11.49 -9.64 0.40 rotation 0 0 1 4.0143 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 11.49 -9.64 0.84 rotation 0 0 1 4.0143 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
Solid { translation 14.10 -5.13 0.40 rotation 0 0 1 4.3633 children [ Shape { appearance USE STONE_A geometry Box { size 5.21 0.16 0.80 } } ] boundingObject Box { size 5.21 0.16 0.80 } }
|
||||||
|
Solid { translation 14.10 -5.13 0.84 rotation 0 0 1 4.3633 children [ Shape { appearance USE CAP geometry Box { size 5.2 0.26 0.07 } } ] boundingObject Box { size 5.2 0.26 0.07 } }
|
||||||
|
|
||||||
|
# Gate posts
|
||||||
|
Solid { translation -1.57 -14.92 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
|
Solid { translation 1.57 -14.92 0.56 children [ Shape { appearance USE STONE_B geometry Box { size 0.44 0.44 1.12 } } Shape { appearance USE CAP geometry Box { size 0.54 0.54 0.08 } } ] }
|
||||||
|
# Outer gate — swung-back beside west gate post
|
||||||
|
Solid { translation -2.97 -14.92 0.55 rotation 0 0 1 0 children [
|
||||||
|
Shape { appearance USE WOOD geometry Box { size 2.80 0.05 1.00 } }
|
||||||
|
Transform { translation 0 0.02 0 rotation 0 1 0 0.34 children [
|
||||||
|
Shape { appearance DEF FPOST PBRAppearance { baseColor 0.35 0.22 0.10 roughness 0.90 } geometry Box { size 2.97 0.04 0.06 } }
|
||||||
|
] }
|
||||||
|
] boundingObject Box { size 2.80 0.08 1.00 } }
|
||||||
|
|
||||||
|
# Pillars between wall sections
|
||||||
|
Solid { translation 14.97 2.64 0.53 rotation 0 0 1 0.9599 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 13.16 7.60 0.53 rotation 0 0 1 1.3090 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 9.77 11.64 0.53 rotation 0 0 1 1.6581 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 5.20 14.28 0.53 rotation 0 0 1 2.0071 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 0.00 15.20 0.53 rotation 0 0 1 2.3562 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -5.20 14.28 0.53 rotation 0 0 1 2.7053 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -9.77 11.64 0.53 rotation 0 0 1 3.0543 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -13.16 7.60 0.53 rotation 0 0 1 3.4034 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -14.97 2.64 0.53 rotation 0 0 1 3.7525 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -14.97 -2.64 0.53 rotation 0 0 1 4.1015 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -13.16 -7.60 0.53 rotation 0 0 1 4.4506 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -9.77 -11.64 0.53 rotation 0 0 1 4.7997 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation -5.20 -14.28 0.53 rotation 0 0 1 5.1487 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 5.20 -14.28 0.53 rotation 0 0 1 5.8469 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 9.77 -11.64 0.53 rotation 0 0 1 6.1959 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 13.16 -7.60 0.53 rotation 0 0 1 6.5450 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
Solid { translation 14.97 -2.64 0.53 rotation 0 0 1 6.8941 children [ Shape { appearance USE STONE_B geometry Box { size 0.34 0.34 1.06 } } Shape { appearance USE CAP geometry Box { size 0.44 0.44 0.07 } } ] }
|
||||||
|
|
||||||
|
# ==================== EXTERNAL PEN (south of round field gate) ====================
|
||||||
|
# Pen west wall
|
||||||
|
Solid { translation -1.57 -18.5 0.55 children [
|
||||||
|
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
||||||
|
] boundingObject Box { size 0.14 6.92 1.10 } }
|
||||||
|
# Pen east wall
|
||||||
|
Solid { translation 1.57 -18.5 0.55 children [
|
||||||
|
Transform { translation 0 -3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 -1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 1.73 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 3.46 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 0.06 6.92 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 0.14 6.92 0.04 } } ] }
|
||||||
|
] boundingObject Box { size 0.14 6.92 1.10 } }
|
||||||
|
# Pen south wall
|
||||||
|
Solid { translation 0.00 -22 0.55 children [
|
||||||
|
Transform { translation -1.52 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 1.52 0 0 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 0 0 -0.38 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 -0.05 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 3.16 0.06 0.08 } } ] }
|
||||||
|
Transform { translation 0 0 0.53 children [ Shape { appearance USE FPOST geometry Box { size 3.16 0.14 0.04 } } ] }
|
||||||
|
] boundingObject Box { size 3.16 0.14 1.10 } }
|
||||||
|
# Pen north corner posts at the gate opening
|
||||||
|
Solid { translation -1.57 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
Solid { translation 1.57 -15.0 0.55 children [ Shape { appearance USE FPOST geometry Box { size 0.12 0.12 1.10 } } ] }
|
||||||
|
|
||||||
|
# Gate width: 3.14 m (pen x: [-1.57, 1.57])
|
||||||
|
|
||||||
|
# ==================== BARN 1 — Gambrel/Dutch style (NE, outside fence) ====================
|
||||||
|
# Body 10×7×4, weathered gray-brown wood, gambrel roof, large double doors
|
||||||
|
Solid {
|
||||||
|
translation 18.5 25.49 2
|
||||||
|
children [
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 10 7 4 } }
|
||||||
|
# Gambrel roof
|
||||||
|
Transform { translation -3.5 0 3.05 rotation 0 1 0 -0.611 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 3.9 7.2 0.18 } } ] }
|
||||||
|
Transform { translation 3.5 0 3.05 rotation 0 1 0 0.611 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 3.9 7.2 0.18 } } ] }
|
||||||
|
Transform { translation -1.0 0 4.55 rotation 0 1 0 -0.422 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 2.5 7.2 0.18 } } ] }
|
||||||
|
Transform { translation 1.0 0 4.55 rotation 0 1 0 0.422 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 2.5 7.2 0.18 } } ] }
|
||||||
|
Transform { translation 0 0 5.04 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.18 0.16 roughness 0.82 metalness 0.02 } geometry Box { size 1.6 7.2 0.22 } } ] }
|
||||||
|
# South gable fill
|
||||||
|
Transform { translation 0 -3.57 2.40 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 8.8 0.16 0.80 } } ] }
|
||||||
|
Transform { translation 0 -3.57 3.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 6.8 0.16 0.70 } } ] }
|
||||||
|
Transform { translation 0 -3.57 3.70 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 5.1 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 -3.57 4.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 4.0 0.16 0.40 } } ] }
|
||||||
|
Transform { translation 0 -3.57 4.42 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 2.7 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 -3.57 4.84 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 0.9 0.16 0.36 } } ] }
|
||||||
|
# North gable fill
|
||||||
|
Transform { translation 0 3.57 2.40 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 8.8 0.16 0.80 } } ] }
|
||||||
|
Transform { translation 0 3.57 3.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 6.8 0.16 0.70 } } ] }
|
||||||
|
Transform { translation 0 3.57 3.70 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 5.1 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 3.57 4.10 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 4.0 0.16 0.40 } } ] }
|
||||||
|
Transform { translation 0 3.57 4.42 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 2.7 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 3.57 4.84 children [ Shape { appearance PBRAppearance { baseColor 0.52 0.42 0.30 roughness 0.92 metalness 0 } geometry Box { size 0.9 0.16 0.36 } } ] }
|
||||||
|
# Double barn doors (south face)
|
||||||
|
Transform {
|
||||||
|
translation 0 -3.51 -0.50
|
||||||
|
children [
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.44 0.30 0.14 roughness 0.88 metalness 0 } geometry Box { size 2.8 0.10 3.0 } }
|
||||||
|
Transform { rotation 0 0 1 0.83 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.10 0.12 3.75 } } ] }
|
||||||
|
Transform { rotation 0 0 1 -0.83 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.10 0.12 3.75 } } ] }
|
||||||
|
Transform { translation -1.45 0 0 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.12 0.14 3.24 } } ] }
|
||||||
|
Transform { translation 1.45 0 0 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 0.12 0.14 3.24 } } ] }
|
||||||
|
Transform { translation 0 0 1.62 children [ Shape { appearance PBRAppearance { baseColor 0.34 0.22 0.10 roughness 0.90 metalness 0 } geometry Box { size 3.04 0.14 0.14 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
# Windows
|
||||||
|
Transform { translation -3.6 -3.52 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 1.40 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 3.6 -3.52 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 1.40 0.12 1.10 } } ] }
|
||||||
|
Transform { translation 5.06 2.0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 0.12 1.20 1.0 } } ] }
|
||||||
|
Transform { translation 5.06 -2.0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.60 0.80 0.95 roughness 0.20 metalness 0.05 } geometry Box { size 0.12 1.20 1.0 } } ] }
|
||||||
|
Transform { translation 0 -3.52 3.90 children [ Shape { appearance PBRAppearance { baseColor 0.44 0.30 0.14 roughness 0.88 metalness 0 } geometry Box { size 1.30 0.12 1.00 } } ] }
|
||||||
|
]
|
||||||
|
boundingObject Box { size 10 7 7 }
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==================== BARN 3 — Red barn (NE, outside fence, gate facing fence) ====================
|
||||||
|
# Body 7×9×3.5, red walls, steep dark roof
|
||||||
|
Solid {
|
||||||
|
translation 29.76 9.52 1.75
|
||||||
|
rotation 0 0 1 -1.5708
|
||||||
|
children [
|
||||||
|
Shape { appearance USE BARN_RED geometry Box { size 7 9 3.5 } }
|
||||||
|
# Roof
|
||||||
|
Transform { translation -2.0 0 3.0 rotation 0 1 0 -0.70 children [ Shape { appearance USE BARN_ROOF geometry Box { size 4.2 9.2 0.20 } } ] }
|
||||||
|
Transform { translation 2.0 0 3.0 rotation 0 1 0 0.70 children [ Shape { appearance USE BARN_ROOF geometry Box { size 4.2 9.2 0.20 } } ] }
|
||||||
|
Transform { translation 0 0 4.28 children [ Shape { appearance USE BARN_ROOF geometry Box { size 2.0 9.2 0.24 } } ] }
|
||||||
|
# South gable fill
|
||||||
|
Transform { translation 0 -4.52 2.05 children [ Shape { appearance USE BARN_RED geometry Box { size 6.2 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 -4.52 2.65 children [ Shape { appearance USE BARN_RED geometry Box { size 4.5 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 -4.52 3.25 children [ Shape { appearance USE BARN_RED geometry Box { size 2.9 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 -4.52 3.85 children [ Shape { appearance USE BARN_RED geometry Box { size 1.2 0.16 0.60 } } ] }
|
||||||
|
# North gable fill
|
||||||
|
Transform { translation 0 4.52 2.05 children [ Shape { appearance USE BARN_RED geometry Box { size 6.2 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 4.52 2.65 children [ Shape { appearance USE BARN_RED geometry Box { size 4.5 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 4.52 3.25 children [ Shape { appearance USE BARN_RED geometry Box { size 2.9 0.16 0.60 } } ] }
|
||||||
|
Transform { translation 0 4.52 3.85 children [ Shape { appearance USE BARN_RED geometry Box { size 1.2 0.16 0.60 } } ] }
|
||||||
|
# Door
|
||||||
|
Transform {
|
||||||
|
translation 0 -4.52 -0.62
|
||||||
|
children [
|
||||||
|
Shape { appearance USE DOOR_MAT geometry Box { size 1.70 0.14 2.26 } }
|
||||||
|
Transform { translation 0 0 1.22 children [ Shape { appearance USE WOOD geometry Box { size 2.10 0.18 0.26 } } ] }
|
||||||
|
Transform { translation -0.90 0 0 children [ Shape { appearance USE WOOD geometry Box { size 0.24 0.18 2.52 } } ] }
|
||||||
|
Transform { translation 0.90 0 0 children [ Shape { appearance USE WOOD geometry Box { size 0.24 0.18 2.52 } } ] }
|
||||||
|
Transform { translation 0 0 -0.68 children [ Shape { appearance USE WOOD geometry Box { size 1.60 0.12 0.12 } } ] }
|
||||||
|
Transform { translation 0 0 0.30 children [ Shape { appearance USE WOOD geometry Box { size 1.60 0.12 0.12 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
# Windows — south face
|
||||||
|
Transform { translation -2.2 -4.53 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.80 0.14 0.70 } } ] }
|
||||||
|
Transform { translation 2.2 -4.53 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.80 0.14 0.70 } } ] }
|
||||||
|
# East-face windows
|
||||||
|
Transform { translation 3.52 3.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
|
||||||
|
Transform { translation 3.52 0.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
|
||||||
|
Transform { translation 3.52 -3.0 0.30 children [ Shape { appearance USE GLASS geometry Box { size 0.14 0.80 0.70 } } ] }
|
||||||
|
]
|
||||||
|
boundingObject Box { size 7 9 6 }
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==================== TREES (outside fence) ====================
|
||||||
|
|
||||||
|
# Tree A — large oak, SE
|
||||||
|
Solid {
|
||||||
|
translation 20 -18 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 2.0 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.0 radius 0.30 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 4.6 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.6 subdivision 4 } } ] }
|
||||||
|
Transform { translation 1.2 0.6 5.6 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.9 subdivision 4 } } ] }
|
||||||
|
Transform { translation -1.0 0.9 5.3 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.7 subdivision 4 } } ] }
|
||||||
|
Transform { translation 0.4 -1.1 5.1 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.5 subdivision 4 } } ] }
|
||||||
|
Transform { translation -0.5 -0.4 6.2 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.0 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tree B — medium, NE near barn
|
||||||
|
Solid {
|
||||||
|
translation -8 26 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 1.7 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.4 radius 0.25 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 3.8 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 2.2 subdivision 4 } } ] }
|
||||||
|
Transform { translation 0.9 -0.7 4.7 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.6 subdivision 4 } } ] }
|
||||||
|
Transform { translation -0.6 0.8 4.4 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.4 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tree C — large, NW
|
||||||
|
Solid {
|
||||||
|
translation -23 20 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 2.3 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.6 radius 0.36 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 5.2 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 2.9 subdivision 4 } } ] }
|
||||||
|
Transform { translation 1.3 0.9 6.3 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.1 subdivision 4 } } ] }
|
||||||
|
Transform { translation -1.1 1.1 6.0 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.9 subdivision 4 } } ] }
|
||||||
|
Transform { translation 0.6 -1.3 5.8 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.6 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tree D — small, SW
|
||||||
|
Solid {
|
||||||
|
translation -20 -23 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 1.4 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.8 radius 0.20 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 3.2 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.9 subdivision 4 } } ] }
|
||||||
|
Transform { translation -0.7 0.6 4.0 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.4 subdivision 4 } } ] }
|
||||||
|
Transform { translation 0.6 -0.5 3.8 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.2 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tree E — north cluster
|
||||||
|
Solid {
|
||||||
|
translation 7 23 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 1.9 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.8 radius 0.27 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 4.1 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.3 subdivision 4 } } ] }
|
||||||
|
Transform { translation 1.0 0.5 5.0 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.7 subdivision 4 } } ] }
|
||||||
|
Transform { translation -0.6 -0.9 4.8 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.4 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tree F — SW
|
||||||
|
Solid {
|
||||||
|
translation -2.98 -22.8 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 1.3 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.6 radius 0.19 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 2.9 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.7 subdivision 4 } } ] }
|
||||||
|
Transform { translation 0.6 0.4 3.7 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.2 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tree G — west side
|
||||||
|
Solid {
|
||||||
|
translation -23 -5 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 2.0 children [ Shape { appearance USE TRUNK geometry Cylinder { height 4.0 radius 0.29 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 4.4 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 2.4 subdivision 4 } } ] }
|
||||||
|
Transform { translation -1.0 0.8 5.3 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 1.8 subdivision 4 } } ] }
|
||||||
|
Transform { translation 0.9 -0.7 5.0 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.6 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tree H — east side
|
||||||
|
Solid {
|
||||||
|
translation 21.35 -1.05 0
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 1.5 children [ Shape { appearance USE TRUNK geometry Cylinder { height 3.0 radius 0.22 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0.0 0.0 3.4 children [ Shape { appearance USE LEAF_A geometry Sphere { radius 2.0 subdivision 4 } } ] }
|
||||||
|
Transform { translation 0.7 0.6 4.2 children [ Shape { appearance USE LEAF_C geometry Sphere { radius 1.4 subdivision 4 } } ] }
|
||||||
|
Transform { translation -0.5 -0.8 4.0 children [ Shape { appearance USE LEAF_B geometry Sphere { radius 1.2 subdivision 4 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==================== SCARECROW (east side, outside fence) ====================
|
||||||
|
Solid {
|
||||||
|
translation 20 -10 0
|
||||||
|
rotation 0 0 1 2.61799
|
||||||
|
children [
|
||||||
|
Transform { translation 0 0 1.22 children [ Shape { appearance USE TRUNK geometry Cylinder { height 2.44 radius 0.045 subdivision 8 } } ] }
|
||||||
|
Transform { translation 0 0 2.02 rotation 1 0 0 1.5708 children [ Shape { appearance USE TRUNK geometry Cylinder { height 1.60 radius 0.032 subdivision 8 } } ] }
|
||||||
|
Transform {
|
||||||
|
translation 0 0 2.44
|
||||||
|
children [
|
||||||
|
Shape { appearance USE STRAW geometry Sphere { radius 0.17 subdivision 3 } }
|
||||||
|
Transform { translation 0.13 0.05 0.06 children [ Shape { appearance PBRAppearance { baseColor 0.06 0.06 0.06 } geometry Sphere { radius 0.028 subdivision 2 } } ] }
|
||||||
|
Transform { translation 0.13 -0.05 0.06 children [ Shape { appearance PBRAppearance { baseColor 0.06 0.06 0.06 } geometry Sphere { radius 0.028 subdivision 2 } } ] }
|
||||||
|
Transform { translation 0.16 0 -0.02 rotation 0 1 0 1.5708 children [ Shape { appearance PBRAppearance { baseColor 0.75 0.50 0.30 } geometry Cone { height 0.07 bottomRadius 0.032 subdivision 6 } } ] }
|
||||||
|
Transform { translation 0.14 0.04 -0.06 children [ Shape { appearance PBRAppearance { baseColor 0.18 0.08 0.08 } geometry Box { size 0.01 0.04 0.01 } } ] }
|
||||||
|
Transform { translation 0.14 -0.04 -0.06 children [ Shape { appearance PBRAppearance { baseColor 0.18 0.08 0.08 } geometry Box { size 0.01 0.04 0.01 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
Transform { translation 0 0 2.62 children [ Shape { appearance USE HAT geometry Cylinder { height 0.04 radius 0.28 subdivision 12 } } ] }
|
||||||
|
Transform { translation 0 0 2.80 children [ Shape { appearance USE HAT geometry Cylinder { height 0.30 radius 0.17 subdivision 10 } } ] }
|
||||||
|
Transform { translation 0 0 1.60 children [ Shape { appearance USE SHIRT geometry Box { size 0.20 0.40 0.46 } } ] }
|
||||||
|
Transform { translation 0 0 1.14 children [ Shape { appearance USE PANTS geometry Box { size 0.17 0.32 0.34 } } ] }
|
||||||
|
Transform { translation 0 0.68 2.03 rotation 0 0 1 0.25 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.24 0.03 } } ] }
|
||||||
|
Transform { translation 0 -0.68 2.03 rotation 0 0 -1 0.25 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.24 0.03 } } ] }
|
||||||
|
Transform { translation 0.10 0.08 1.82 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.03 0.14 } } ] }
|
||||||
|
Transform { translation 0.10 -0.08 1.82 children [ Shape { appearance USE STRAW geometry Box { size 0.03 0.03 0.14 } } ] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==================== HAY BALES (near barn) ====================
|
||||||
|
Solid { translation 25.75 13.76 0.62 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
|
||||||
|
Solid { translation 24.34 12.32 0.62 rotation -1 0 0 1.5708 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
|
||||||
|
Solid { translation 24.28 13.79 0.62 children [ Transform { rotation 1 0 0 1.5708 children [ Shape { appearance USE HAY geometry Cylinder { height 1.30 radius 0.62 subdivision 14 } } ] } ] boundingObject Box { size 1.30 1.24 1.24 } }
|
||||||
|
|
||||||
|
# ==================== TRACTOR (near barn) ====================
|
||||||
|
Solid {
|
||||||
|
translation 17 19 0.18
|
||||||
|
rotation 0 0 1 1.9
|
||||||
|
children [
|
||||||
|
# Chassis
|
||||||
|
Transform { translation 0 0 0.35 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.20 0.20 roughness 0.6 metalness 0.3 } geometry Box { size 2.0 0.90 0.12 } } ] }
|
||||||
|
# Engine hood
|
||||||
|
Transform { translation 0.60 0 0.60 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.65 0.80 0.45 } } ] }
|
||||||
|
# Main body
|
||||||
|
Transform { translation -0.15 0 0.60 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.80 0.85 0.45 } } ] }
|
||||||
|
# Cabin
|
||||||
|
Transform { translation -0.20 0 0.95 children [ Shape { appearance PBRAppearance { baseColor 0.15 0.50 0.12 roughness 0.7 metalness 0.1 } geometry Box { size 0.75 0.80 0.45 } } ] }
|
||||||
|
# Cabin roof
|
||||||
|
Transform { translation -0.20 0 1.22 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.85 0.90 0.06 } } ] }
|
||||||
|
# Windshield
|
||||||
|
Transform { translation 0.12 0 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.02 0.55 0.35 } } ] }
|
||||||
|
# Rear window
|
||||||
|
Transform { translation -0.58 0 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.02 0.55 0.35 } } ] }
|
||||||
|
# Side windows
|
||||||
|
Transform { translation -0.20 0.40 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.55 0.02 0.30 } } ] }
|
||||||
|
Transform { translation -0.20 -0.40 0.95 children [ Shape { appearance USE GLASS geometry Box { size 0.55 0.02 0.30 } } ] }
|
||||||
|
# Seat
|
||||||
|
Transform { translation -0.25 0 0.55 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.12 0.12 roughness 0.9 } geometry Box { size 0.30 0.35 0.06 } } ] }
|
||||||
|
# Exhaust stack
|
||||||
|
Transform { translation 0.50 0.30 0.60 children [
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.4 metalness 0.6 } geometry Cylinder { height 0.90 radius 0.03 subdivision 6 } }
|
||||||
|
Transform { translation 0 0 0.50 children [ Shape { appearance PBRAppearance { baseColor 0.20 0.20 0.20 roughness 0.4 metalness 0.6 } geometry Cylinder { height 0.04 radius 0.045 subdivision 6 } } ] }
|
||||||
|
] }
|
||||||
|
# Rear axle
|
||||||
|
Transform { translation -0.45 0 0.40 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.08 1.15 0.08 } } ] }
|
||||||
|
# Front axle
|
||||||
|
Transform { translation 0.60 0 0.25 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.08 0.90 0.08 } } ] }
|
||||||
|
# Rear left wheel
|
||||||
|
Transform { translation -0.45 0.60 0.40 rotation 1 0 0 1.5708 children [
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.22 radius 0.40 subdivision 20 } }
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.24 radius 0.14 subdivision 10 } }
|
||||||
|
] }
|
||||||
|
# Rear right wheel
|
||||||
|
Transform { translation -0.45 -0.60 0.40 rotation 1 0 0 1.5708 children [
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.22 radius 0.40 subdivision 20 } }
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.24 radius 0.14 subdivision 10 } }
|
||||||
|
] }
|
||||||
|
# Front left wheel
|
||||||
|
Transform { translation 0.60 0.45 0.25 rotation 1 0 0 1.5708 children [
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.16 radius 0.25 subdivision 16 } }
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.18 radius 0.09 subdivision 8 } }
|
||||||
|
] }
|
||||||
|
# Front right wheel
|
||||||
|
Transform { translation 0.60 -0.45 0.25 rotation 1 0 0 1.5708 children [
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.08 0.08 0.08 roughness 0.95 } geometry Cylinder { height 0.16 radius 0.25 subdivision 16 } }
|
||||||
|
Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 metalness 0.5 } geometry Cylinder { height 0.18 radius 0.09 subdivision 8 } }
|
||||||
|
] }
|
||||||
|
# Rear fenders
|
||||||
|
Transform { translation -0.45 0.50 0.72 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.50 0.12 0.20 } } ] }
|
||||||
|
Transform { translation -0.45 -0.50 0.72 children [ Shape { appearance PBRAppearance { baseColor 0.12 0.40 0.10 roughness 0.75 metalness 0.1 } geometry Box { size 0.50 0.12 0.20 } } ] }
|
||||||
|
# Front bumper
|
||||||
|
Transform { translation 0.95 0 0.35 children [ Shape { appearance PBRAppearance { baseColor 0.35 0.35 0.35 roughness 0.7 metalness 0.3 } geometry Box { size 0.12 0.75 0.30 } } ] }
|
||||||
|
# Headlights
|
||||||
|
Transform { translation 0.97 0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.95 0.92 0.70 roughness 0.3 } geometry Sphere { radius 0.05 subdivision 3 } } ] }
|
||||||
|
Transform { translation 0.97 -0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.95 0.92 0.70 roughness 0.3 } geometry Sphere { radius 0.05 subdivision 3 } } ] }
|
||||||
|
# Taillights
|
||||||
|
Transform { translation -0.58 0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.80 0.10 0.10 roughness 0.4 } geometry Box { size 0.04 0.08 0.06 } } ] }
|
||||||
|
Transform { translation -0.58 -0.25 0.45 children [ Shape { appearance PBRAppearance { baseColor 0.80 0.10 0.10 roughness 0.4 } geometry Box { size 0.04 0.08 0.06 } } ] }
|
||||||
|
# Drawbar hitch
|
||||||
|
Transform { translation -0.95 0 0.20 children [ Shape { appearance PBRAppearance { baseColor 0.25 0.25 0.25 roughness 0.5 metalness 0.5 } geometry Box { size 0.12 0.06 0.06 } } ] }
|
||||||
|
]
|
||||||
|
boundingObject Box { size 2.2 1.4 1.3 }
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==================== GRASS PATCHES (inside field, decorative) ====================
|
||||||
|
Solid { translation -8 6 0.15 children [
|
||||||
|
Transform { translation 0.10 0.00 0 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.30 } } ] }
|
||||||
|
Transform { translation -0.05 0.12 0 rotation 0 0 1 0.4 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
|
||||||
|
Transform { translation 0.08 -0.10 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.28 } } ] }
|
||||||
|
Transform { translation -0.12 0.04 0 rotation 0 0 1 0.2 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.24 } } ] }
|
||||||
|
] }
|
||||||
|
Solid { translation 6 -9 0.15 children [
|
||||||
|
Transform { translation 0.08 0.06 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.28 } } ] }
|
||||||
|
Transform { translation -0.10 0.00 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.32 } } ] }
|
||||||
|
Transform { translation 0.02 -0.12 0 rotation 0 0 1 0.35 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.26 } } ] }
|
||||||
|
Transform { translation -0.06 0.10 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.22 } } ] }
|
||||||
|
] }
|
||||||
|
Solid { translation -3 11 0.15 children [
|
||||||
|
Transform { translation 0.06 -0.06 0 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.26 } } ] }
|
||||||
|
Transform { translation -0.08 0.08 0 rotation 0 0 1 0.3 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.30 } } ] }
|
||||||
|
Transform { translation 0.12 0.02 0 rotation 0 0 1 -0.25 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.28 } } ] }
|
||||||
|
] }
|
||||||
|
Solid { translation 10 8 0.15 children [
|
||||||
|
Transform { translation -0.07 0.05 0 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.24 } } ] }
|
||||||
|
Transform { translation 0.09 -0.07 0 rotation 0 0 1 0.4 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.28 } } ] }
|
||||||
|
Transform { translation 0.00 0.11 0 rotation 0 0 1 -0.2 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
|
||||||
|
] }
|
||||||
|
Solid { translation -11 -7 0.15 children [
|
||||||
|
Transform { translation 0.05 0.08 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.30 } } ] }
|
||||||
|
Transform { translation -0.09 -0.04 0 rotation 0 0 1 0.35 children [ Shape { appearance USE LEAF_B geometry Box { size 0.04 0.02 0.28 } } ] }
|
||||||
|
Transform { translation 0.10 -0.09 0 rotation 0 0 1 -0.3 children [ Shape { appearance USE LEAF_C geometry Box { size 0.04 0.02 0.24 } } ] }
|
||||||
|
Transform { translation -0.03 0.12 0 children [ Shape { appearance USE LEAF_A geometry Box { size 0.04 0.02 0.26 } } ] }
|
||||||
|
] }
|
||||||
|
|
||||||
|
# ==================== SHEPHERD DOG ====================
|
||||||
|
ShepherdDog {
|
||||||
|
translation 0 0 0.5
|
||||||
|
rotation 0 0 1 0
|
||||||
|
controller "shepherd_dog"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==================== SHEEP ====================
|
||||||
|
# Up to 10 sheep, scattered through the field's central/north zone. Comment
|
||||||
|
# out trailing slots to test smaller flock sizes; the dog policy is trained
|
||||||
|
# to handle 1..10 sheep so any prefix works.
|
||||||
|
Sheep { translation 3.0 2.0 0.5 name "sheep1" controller "sheep" }
|
||||||
|
Sheep { translation 3.0 -2.0 0.5 name "sheep2" controller "sheep" }
|
||||||
|
Sheep { translation 4.0 0.0 0.5 name "sheep3" controller "sheep" }
|
||||||
|
Sheep { translation -3.0 4.0 0.5 name "sheep4" controller "sheep" }
|
||||||
|
Sheep { translation -5.0 -2.0 0.5 name "sheep5" controller "sheep" }
|
||||||
|
Sheep { translation 6.0 5.0 0.5 name "sheep6" controller "sheep" }
|
||||||
|
Sheep { translation -6.0 6.0 0.5 name "sheep7" controller "sheep" }
|
||||||
|
Sheep { translation 0.0 8.0 0.5 name "sheep8" controller "sheep" }
|
||||||
|
Sheep { translation -8.0 0.0 0.5 name "sheep9" controller "sheep" }
|
||||||
|
Sheep { translation 7.0 -4.0 0.5 name "sheep10" controller "sheep" }
|
||||||
Reference in New Issue
Block a user