Generator runner improvement

This commit is contained in:
Johnny Fernandes
2026-04-30 03:21:49 +01:00
parent bb3dfb92d5
commit d75272cf84
4 changed files with 16 additions and 22 deletions
+14 -13
View File
@@ -429,21 +429,22 @@ class EphemeralVastRunner:
seen.remove(resolved)
return self._deep_merge_dicts(base_cfg, cfg)
# Resolve a config the same way the runners do: extends chain first,
# then shared.json overlaid underneath. Mirrors load_config() in
# classifier/src/utils/config.py and generator/src/utils/config.py — keep
# in sync if merge semantics ever change.
def _load_config_merged(self, config_path: Path) -> dict[str, Any]:
cfg = self._load_config_with_extends(config_path)
shared_path = config_path.parent.parent / "shared.json"
if shared_path.exists():
with open(shared_path, encoding="utf-8") as fh:
shared_cfg = json.load(fh)
cfg = self._deep_merge_dicts(shared_cfg, cfg)
return cfg
# Return a stable signature used to detect duplicate training configs
def _normalized_config_signature(self, config_path: Path) -> str:
run_script, _ = self._module_for_config(config_path)
# Generator configs do not currently use shared/extends inheritance.
if run_script.startswith("generator/"):
with open(config_path, encoding="utf-8") as fh:
cfg = json.load(fh)
else:
cfg = self._load_config_with_extends(config_path)
shared_path = config_path.parent.parent / "shared.json"
if shared_path.exists():
with open(shared_path, encoding="utf-8") as fh:
shared_cfg = json.load(fh)
cfg = self._deep_merge_dicts(shared_cfg, cfg)
cfg = self._load_config_merged(config_path)
# run_name should not influence whether two configs are equivalent to train.
cfg.pop("run_name", None)
return json.dumps(cfg, sort_keys=True, separators=(",", ":"))