Checkpoint 2
This commit is contained in:
+137
@@ -0,0 +1,137 @@
|
||||
"""Observation builder for the shepherd dog policy.
|
||||
|
||||
Order-invariant 32-D feature vector — the policy generalises across
|
||||
flock sizes 1..MAX_SHEEP because individual sheep coordinates never
|
||||
appear in the observation by index, only summary statistics, a polar
|
||||
histogram, and two "named" sheep (closest-to-pen and rearmost-from-pen).
|
||||
|
||||
The two named sheep matter for the sequential-driving teacher: it
|
||||
targets the closest-to-pen sheep specifically, so the policy needs
|
||||
that channel to mimic the teacher.
|
||||
|
||||
Layout (all components normalised so values stay roughly in [-1, 1]):
|
||||
|
||||
idx field
|
||||
----- ----------------------------------------------------------
|
||||
0..3 dog pose: x/15, y/15, cos(heading), sin(heading)
|
||||
4..5 active-sheep CoM x/15, y/15
|
||||
6..8 flock dispersion: max-radius/15, std_x/15, std_y/15
|
||||
9..11 vector dog→CoM: dx/30, dy/30, dist/30
|
||||
12..14 vector dog→pen-entry: dx/30, dy/30, dist/30
|
||||
15..16 vector furthest-sheep→CoM: dx/15, dy/15
|
||||
17..18 min sheep-to-wall, min dog-to-wall (both /15)
|
||||
19 active-sheep count / MAX_SHEEP
|
||||
20..27 8-bin polar histogram of active sheep around the dog,
|
||||
rotation-aware (binned in dog-relative frame), normalised
|
||||
so the bins sum to 1.
|
||||
28..29 vector dog→closest-to-pen sheep: dx/15, dy/15
|
||||
30..31 vector dog→rearmost (furthest-from-pen) sheep: dx/15, dy/15
|
||||
"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
from herding.geometry import (
|
||||
FIELD_X, FIELD_Y, PEN_ENTRY, MAX_SHEEP,
|
||||
)
|
||||
|
||||
OBS_DIM = 32
|
||||
|
||||
|
||||
def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
|
||||
n_max: int = MAX_SHEEP) -> np.ndarray:
|
||||
"""Assemble the dog policy's observation vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dog_xy : tuple (x, y) of the dog's GPS position (m)
|
||||
dog_heading : dog heading in rad
|
||||
sheep_xy_list : iterable of (x, y) for ALL known sheep
|
||||
sheep_penned_list : parallel iterable of bool — True if sheep is penned
|
||||
n_max : maximum supported flock size used for the count normaliser
|
||||
"""
|
||||
dog_x, dog_y = dog_xy
|
||||
obs = np.zeros(OBS_DIM, dtype=np.float32)
|
||||
|
||||
obs[0] = dog_x / 15.0
|
||||
obs[1] = dog_y / 15.0
|
||||
obs[2] = math.cos(dog_heading)
|
||||
obs[3] = math.sin(dog_heading)
|
||||
|
||||
active = [(x, y) for (x, y), p
|
||||
in zip(sheep_xy_list, sheep_penned_list) if not p]
|
||||
n = len(active)
|
||||
|
||||
pdx0, pdy0 = PEN_ENTRY[0] - dog_x, PEN_ENTRY[1] - dog_y
|
||||
obs[12] = pdx0 / 30.0
|
||||
obs[13] = pdy0 / 30.0
|
||||
obs[14] = math.hypot(pdx0, pdy0) / 30.0
|
||||
|
||||
if n == 0:
|
||||
# All sheep penned — terminal observation.
|
||||
obs[19] = 0.0
|
||||
return obs
|
||||
|
||||
arr = np.asarray(active, dtype=np.float32)
|
||||
com_x = float(arr[:, 0].mean())
|
||||
com_y = float(arr[:, 1].mean())
|
||||
rel = arr - np.array([com_x, com_y], dtype=np.float32)
|
||||
dists = np.hypot(rel[:, 0], rel[:, 1])
|
||||
radius = float(dists.max())
|
||||
std_x = float(arr[:, 0].std())
|
||||
std_y = float(arr[:, 1].std())
|
||||
|
||||
obs[4] = com_x / 15.0
|
||||
obs[5] = com_y / 15.0
|
||||
obs[6] = radius / 15.0
|
||||
obs[7] = std_x / 15.0
|
||||
obs[8] = std_y / 15.0
|
||||
|
||||
cdx, cdy = com_x - dog_x, com_y - dog_y
|
||||
obs[9] = cdx / 30.0
|
||||
obs[10] = cdy / 30.0
|
||||
obs[11] = math.hypot(cdx, cdy) / 30.0
|
||||
|
||||
far_idx = int(np.argmax(dists))
|
||||
obs[15] = float(rel[far_idx, 0]) / 15.0
|
||||
obs[16] = float(rel[far_idx, 1]) / 15.0
|
||||
|
||||
min_sheep_wall = min(
|
||||
float(np.min(arr[:, 0] - FIELD_X[0])),
|
||||
float(np.min(FIELD_X[1] - arr[:, 0])),
|
||||
float(np.min(arr[:, 1] - FIELD_Y[0])),
|
||||
float(np.min(FIELD_Y[1] - arr[:, 1])),
|
||||
)
|
||||
min_dog_wall = min(
|
||||
dog_x - FIELD_X[0], FIELD_X[1] - dog_x,
|
||||
dog_y - FIELD_Y[0], FIELD_Y[1] - dog_y,
|
||||
)
|
||||
obs[17] = min_sheep_wall / 15.0
|
||||
obs[18] = float(min_dog_wall) / 15.0
|
||||
obs[19] = n / n_max
|
||||
|
||||
# 8-bin polar histogram in the dog's body frame.
|
||||
rel_dx = arr[:, 0] - dog_x
|
||||
rel_dy = arr[:, 1] - dog_y
|
||||
angles = np.arctan2(rel_dy, rel_dx) - dog_heading
|
||||
angles = np.arctan2(np.sin(angles), np.cos(angles))
|
||||
bins = np.floor((angles + math.pi) / (2 * math.pi) * 8).astype(int)
|
||||
bins = np.clip(bins, 0, 7)
|
||||
hist = np.bincount(bins, minlength=8).astype(np.float32)
|
||||
hist /= max(1, n)
|
||||
obs[20:28] = hist
|
||||
|
||||
# Closest-to-pen sheep (the sequential teacher's target) and rearmost
|
||||
# (furthest-from-pen, the natural "next target" once the closest is
|
||||
# penned). Both expressed as offset from dog. These two channels make
|
||||
# BC tractable — without them the obs doesn't uniquely identify which
|
||||
# sheep the teacher is steering toward.
|
||||
pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
|
||||
closest_idx = int(np.argmin(pen_dists))
|
||||
rearmost_idx = int(np.argmax(pen_dists))
|
||||
obs[28] = (float(arr[closest_idx, 0]) - dog_x) / 15.0
|
||||
obs[29] = (float(arr[closest_idx, 1]) - dog_y) / 15.0
|
||||
obs[30] = (float(arr[rearmost_idx, 0]) - dog_x) / 15.0
|
||||
obs[31] = (float(arr[rearmost_idx, 1]) - dog_y) / 15.0
|
||||
|
||||
return obs
|
||||
Reference in New Issue
Block a user