"""Observation builder for the shepherd-dog policy.

Order-invariant 32-D feature vector. Sheep never appear by index in
the observation, only via summary statistics, a polar histogram, and
two "named" channels (closest-to-pen, rearmost-from-pen) — so the
policy generalises across flock sizes 1..MAX_SHEEP.

Layout (all components normalised so values stay roughly in [-1, 1]):

    idx    field
    -----  ----------------------------------------------------------
     0..3  dog pose: x/15, y/15, cos(h), sin(h)
     4..5  active-sheep CoM x/15, y/15
     6..8  flock dispersion: max_radius/15, std_x/15, std_y/15
     9..11 dog → CoM: dx/30, dy/30, dist/30
    12..14 dog → pen entry: dx/30, dy/30, dist/30
    15..16 furthest sheep → CoM: dx/15, dy/15
    17..18 min sheep-to-wall, min dog-to-wall (both /15)
       19  active sheep count / MAX_SHEEP
    20..27 8-bin polar histogram of active sheep in the dog's body frame
    28..29 dog → closest-to-pen sheep: dx/15, dy/15
    30..31 dog → rearmost (furthest-from-pen) sheep: dx/15, dy/15
"""

import math
import numpy as np

from herding.world.geometry import (
    PEN_ENTRY, MAX_SHEEP, distance_to_wall,
)

OBS_DIM = 32


def build_obs(dog_xy, dog_heading, sheep_xy_list, sheep_penned_list,
              n_max: int = MAX_SHEEP,
              n_expected: int | None = None) -> np.ndarray:
    """Assemble the dog policy's observation vector.

    Parameters
    ----------
    dog_xy : tuple (x, y) of the dog's GPS position (m)
    dog_heading : dog heading in rad
    sheep_xy_list : iterable of (x, y) for ALL known sheep
    sheep_penned_list : parallel iterable of bool — True if sheep is penned
    n_max : maximum supported flock size used for the count normaliser
    n_expected : unused, kept for API compatibility.
    """
    dog_x, dog_y = dog_xy
    obs = np.zeros(OBS_DIM, dtype=np.float32)

    obs[0] = dog_x / 15.0
    obs[1] = dog_y / 15.0
    obs[2] = math.cos(dog_heading)
    obs[3] = math.sin(dog_heading)

    active = [(x, y) for (x, y), p
              in zip(sheep_xy_list, sheep_penned_list) if not p]
    n = len(active)

    pdx0, pdy0 = PEN_ENTRY[0] - dog_x, PEN_ENTRY[1] - dog_y
    obs[12] = pdx0 / 30.0
    obs[13] = pdy0 / 30.0
    obs[14] = math.hypot(pdx0, pdy0) / 30.0

    if n == 0:
        obs[19] = 0.0
        return obs

    arr = np.asarray(active, dtype=np.float32)
    com_x = float(arr[:, 0].mean())
    com_y = float(arr[:, 1].mean())
    rel = arr - np.array([com_x, com_y], dtype=np.float32)
    dists = np.hypot(rel[:, 0], rel[:, 1])
    radius = float(dists.max())
    std_x = float(arr[:, 0].std())
    std_y = float(arr[:, 1].std())

    obs[4] = com_x / 15.0
    obs[5] = com_y / 15.0
    obs[6] = radius / 15.0
    obs[7] = std_x / 15.0
    obs[8] = std_y / 15.0

    cdx, cdy = com_x - dog_x, com_y - dog_y
    obs[9]  = cdx / 30.0
    obs[10] = cdy / 30.0
    obs[11] = math.hypot(cdx, cdy) / 30.0

    far_idx = int(np.argmax(dists))
    obs[15] = float(rel[far_idx, 0]) / 15.0
    obs[16] = float(rel[far_idx, 1]) / 15.0

    min_sheep_wall = float(min(distance_to_wall(sx, sy) for sx, sy in active))
    min_dog_wall = distance_to_wall(dog_x, dog_y)
    obs[17] = min_sheep_wall / 15.0
    obs[18] = float(min_dog_wall) / 15.0
    obs[19] = n / n_max

    # Polar histogram in the dog's body frame.
    rel_dx = arr[:, 0] - dog_x
    rel_dy = arr[:, 1] - dog_y
    angles = np.arctan2(rel_dy, rel_dx) - dog_heading
    angles = np.arctan2(np.sin(angles), np.cos(angles))
    bins = np.floor((angles + math.pi) / (2 * math.pi) * 8).astype(int)
    bins = np.clip(bins, 0, 7)
    hist = np.bincount(bins, minlength=8).astype(np.float32)
    hist /= max(1, n)
    obs[20:28] = hist

    # Closest-to-pen and rearmost (furthest-from-pen) sheep. Without
    # these named channels the obs cannot uniquely identify which sheep
    # the teacher is steering toward, and BC fails to mimic it.
    pen_dists = np.hypot(arr[:, 0] - PEN_ENTRY[0], arr[:, 1] - PEN_ENTRY[1])
    closest_idx = int(np.argmin(pen_dists))
    rearmost_idx = int(np.argmax(pen_dists))
    obs[28] = (float(arr[closest_idx, 0]) - dog_x) / 15.0
    obs[29] = (float(arr[closest_idx, 1]) - dog_y) / 15.0
    obs[30] = (float(arr[rearmost_idx, 0]) - dog_x) / 15.0
    obs[31] = (float(arr[rearmost_idx, 1]) - dog_y) / 15.0

    return obs