Sheep training flock of 10 fix?
This commit is contained in:
@@ -160,7 +160,7 @@ SHEEP_COLORS = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",
|
|||||||
def _save_smoke_vis(model, vn, n_sheep, save_dir, seed=42, max_steps=2000):
|
def _save_smoke_vis(model, vn, n_sheep, save_dir, seed=42, max_steps=2000):
|
||||||
"""Run one episode and save trajectory + timeseries PNGs."""
|
"""Run one episode and save trajectory + timeseries PNGs."""
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
raw = DummyVecEnv([make_env(n_sheep, max_steps, seed)])
|
raw = DummyVecEnv([make_env(n_sheep, seed=seed, max_steps=max_steps)])
|
||||||
env = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
|
env = VecNormalize(raw, norm_obs=True, norm_reward=False, training=False)
|
||||||
env.obs_rms = deepcopy(vn.obs_rms)
|
env.obs_rms = deepcopy(vn.obs_rms)
|
||||||
env.ret_rms = deepcopy(vn.ret_rms)
|
env.ret_rms = deepcopy(vn.ret_rms)
|
||||||
@@ -241,10 +241,10 @@ def main():
|
|||||||
p.add_argument("--render", action="store_true")
|
p.add_argument("--render", action="store_true")
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
# 1 sheep (500k): sanity check — obs/reward structurally correct?
|
# 1 sheep (500k): hard check — obs/reward structurally correct?
|
||||||
# 2 sheep (1M): first multi-agent step — gradual transfer
|
# 2 sheep (1M): soft check — proves multi-sheep learning has started
|
||||||
# 3 sheep (1.5M): real multi-sheep test at curriculum pace
|
# 3 sheep (1.5M): directional check — not expected to fully converge here
|
||||||
stages = [(1, args.steps, 0.60), (2, args.steps * 2, 0.40), (3, args.steps * 3, 0.35)]
|
stages = [(1, args.steps, 0.60), (2, args.steps * 2, 0.20), (3, args.steps * 3, 0.10)]
|
||||||
|
|
||||||
model, vn = None, None
|
model, vn = None, None
|
||||||
all_passed = True
|
all_passed = True
|
||||||
|
|||||||
Reference in New Issue
Block a user