Trying attention method

This commit is contained in:
Johnny Fernandes
2026-04-26 22:28:43 +01:00
parent 57b1735e1a
commit a2363d882f
4 changed files with 448 additions and 12 deletions
+32 -8
View File
@@ -286,15 +286,39 @@ def main():
try:
for n in range(1, args.max_sheep + 1):
if n > 1:
if n == 1:
print(f"\n[Stage n_sheep=1] training {args.steps_per_stage:,} steps")
model.learn(
total_timesteps=args.steps_per_stage,
reset_num_timesteps=True,
callback=ProgressCallback("1 sheep", freq=100_000),
)
else:
# Mixed transition: half envs stay at n-1, half advance to n,
# for the first half of the stage budget. This prevents the
# n+1 task's noisy early gradients from destroying the n policy
# (catastrophic forgetting) before it has a chance to adapt.
half = max(1, args.n_envs // 2)
for i in range(half):
vn.env_method("set_n_sheep", n - 1, indices=[i])
for i in range(half, args.n_envs):
vn.env_method("set_n_sheep", n, indices=[i])
mix_steps = args.steps_per_stage // 2
full_steps = args.steps_per_stage - mix_steps
print(f"\n[Stage n_sheep={n}] mixed ({n-1}/{n} sheep) "
f"{mix_steps:,} steps")
model.learn(
total_timesteps=mix_steps,
reset_num_timesteps=False,
callback=ProgressCallback(f"{n-1}{n} mix", freq=100_000),
)
vn.env_method("set_n_sheep", n)
print(f"\n[Stage n_sheep={n}] training {args.steps_per_stage:,} steps")
model.learn(
total_timesteps=args.steps_per_stage,
reset_num_timesteps=(n == 1),
callback=ProgressCallback(f"{n} sheep", freq=100_000),
)
print(f"[Stage n_sheep={n}] full ({n} sheep) {full_steps:,} steps")
model.learn(
total_timesteps=full_steps,
reset_num_timesteps=False,
callback=ProgressCallback(f"{n} sheep", freq=100_000),
)
# Evaluate
print(f"[Stage n_sheep={n}] evaluating {args.eval_episodes} eps")