Trying attention method
This commit is contained in:
+32
-8
@@ -286,15 +286,39 @@ def main():
|
||||
|
||||
try:
|
||||
for n in range(1, args.max_sheep + 1):
|
||||
if n > 1:
|
||||
if n == 1:
|
||||
print(f"\n[Stage n_sheep=1] training {args.steps_per_stage:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=args.steps_per_stage,
|
||||
reset_num_timesteps=True,
|
||||
callback=ProgressCallback("1 sheep", freq=100_000),
|
||||
)
|
||||
else:
|
||||
# Mixed transition: half envs stay at n-1, half advance to n,
|
||||
# for the first half of the stage budget. This prevents the
|
||||
# n+1 task's noisy early gradients from destroying the n policy
|
||||
# (catastrophic forgetting) before it has a chance to adapt.
|
||||
half = max(1, args.n_envs // 2)
|
||||
for i in range(half):
|
||||
vn.env_method("set_n_sheep", n - 1, indices=[i])
|
||||
for i in range(half, args.n_envs):
|
||||
vn.env_method("set_n_sheep", n, indices=[i])
|
||||
mix_steps = args.steps_per_stage // 2
|
||||
full_steps = args.steps_per_stage - mix_steps
|
||||
print(f"\n[Stage n_sheep={n}] mixed ({n-1}/{n} sheep) "
|
||||
f"{mix_steps:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=mix_steps,
|
||||
reset_num_timesteps=False,
|
||||
callback=ProgressCallback(f"{n-1}→{n} mix", freq=100_000),
|
||||
)
|
||||
vn.env_method("set_n_sheep", n)
|
||||
|
||||
print(f"\n[Stage n_sheep={n}] training {args.steps_per_stage:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=args.steps_per_stage,
|
||||
reset_num_timesteps=(n == 1),
|
||||
callback=ProgressCallback(f"{n} sheep", freq=100_000),
|
||||
)
|
||||
print(f"[Stage n_sheep={n}] full ({n} sheep) {full_steps:,} steps")
|
||||
model.learn(
|
||||
total_timesteps=full_steps,
|
||||
reset_num_timesteps=False,
|
||||
callback=ProgressCallback(f"{n} sheep", freq=100_000),
|
||||
)
|
||||
|
||||
# Evaluate
|
||||
print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")
|
||||
|
||||
Reference in New Issue
Block a user