Sheep training flock _ improver
This commit is contained in:
@@ -27,10 +27,40 @@ from copy import deepcopy
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from stable_baselines3 import PPO
|
from stable_baselines3 import PPO
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv, VecNormalize
|
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv, VecNormalize
|
||||||
|
|
||||||
from herding_env import HerdingEnv
|
from herding_env import HerdingEnv
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressCallback(BaseCallback):
|
||||||
|
"""Print a one-line trial-progress summary every `freq` env steps."""
|
||||||
|
def __init__(self, trial_id: int, stage_label: str, freq: int = 50_000):
|
||||||
|
super().__init__()
|
||||||
|
self.trial_id = trial_id
|
||||||
|
self.stage_label = stage_label
|
||||||
|
self.freq = freq
|
||||||
|
self._last = 0
|
||||||
|
self._ep_returns = [] # rolling list of completed-episode returns
|
||||||
|
|
||||||
|
def _on_step(self) -> bool:
|
||||||
|
for info, done in zip(self.locals.get("infos", []),
|
||||||
|
self.locals.get("dones", [])):
|
||||||
|
if done and "episode" in info:
|
||||||
|
self._ep_returns.append(info["episode"]["r"])
|
||||||
|
if len(self._ep_returns) > 50:
|
||||||
|
self._ep_returns.pop(0)
|
||||||
|
if self.num_timesteps - self._last >= self.freq:
|
||||||
|
self._last = self.num_timesteps
|
||||||
|
mean_r = (float(np.mean(self._ep_returns))
|
||||||
|
if self._ep_returns else float("nan"))
|
||||||
|
n_eps = len(self._ep_returns)
|
||||||
|
print(f" ... [trial {self.trial_id+1} | {self.stage_label} | "
|
||||||
|
f"{self.num_timesteps:>7,} steps | "
|
||||||
|
f"ep_return(last {n_eps})={mean_r:+.2f}]",
|
||||||
|
flush=True)
|
||||||
|
return True
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Search space — reward weights + a couple of hyperparams
|
# Search space — reward weights + a couple of hyperparams
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -128,12 +158,17 @@ def run_trial(trial_id: int, cfg: dict, log_path: str) -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
model.learn(total_timesteps=TRAIN_STAGE1_STEPS, reset_num_timesteps=True)
|
model.learn(total_timesteps=TRAIN_STAGE1_STEPS,
|
||||||
|
reset_num_timesteps=True,
|
||||||
|
callback=ProgressCallback(trial_id, "1 sheep"))
|
||||||
vn.env_method("set_n_sheep", 2)
|
vn.env_method("set_n_sheep", 2)
|
||||||
model.learn(total_timesteps=TRAIN_STAGE2_STEPS, reset_num_timesteps=False)
|
model.learn(total_timesteps=TRAIN_STAGE2_STEPS,
|
||||||
|
reset_num_timesteps=False,
|
||||||
|
callback=ProgressCallback(trial_id, "2 sheep"))
|
||||||
|
|
||||||
per_sheep = {}
|
per_sheep = {}
|
||||||
for n in EVAL_NSHEEP:
|
for n in EVAL_NSHEEP:
|
||||||
|
print(f" ... [trial {trial_id+1} | eval n={n}]", flush=True)
|
||||||
per_sheep[n] = evaluate(model, vn, n, EVAL_EPISODES, MAX_STEPS, rcfg)
|
per_sheep[n] = evaluate(model, vn, n, EVAL_EPISODES, MAX_STEPS, rcfg)
|
||||||
finally:
|
finally:
|
||||||
try: vn.close()
|
try: vn.close()
|
||||||
|
|||||||
Reference in New Issue
Block a user