Generator runner improvement

2026-04-30 03:21:49 +01:00
parent bb3dfb92d5
commit d75272cf84
4 changed files with 16 additions and 22 deletions
@@ -123,10 +123,3 @@ The default policy in `pipeline/defaults/vast.json` now targets:
 - `<= $0.20/hour`
 - sorted by `dlperf` descending
 - uses `vastai/pytorch:latest` as the default image
 ## Diagnostics
 ```bash
 python3 classifier/tools/analyze.py classifier/configs/phase2/p2_resnet18_facecrop.json
 python3 classifier/tools/ensemble.py classifier/configs/phase4/p4_ensemble.json
 ```
@@ -37,7 +37,7 @@ def main(config_path, *, data_dir_override=None, output_root="generator/outputs"
    cfg = load_config(config_path)
-    run_name    = cfg["run_name"]
+    run_name    = cfg.get("run_name", Path(config_path).stem)
    device      = "cuda" if torch.cuda.is_available() else "cpu"
    data_dir    = data_dir_override or cfg.get("data_dir", "data")
    output_root = Path(output_root)
@@ -7,7 +7,7 @@ from pipeline.orchestrator import EphemeralVastRunner, RunOptions
 # Accept one or more config files, or a single directory (all *.json inside, sorted)
 def _resolve_configs(raw: list[str]) -> list[Path]:
    if len(raw) == 1 and Path(raw[0]).is_dir():
-        configs = sorted(Path(raw[0]).glob("*.json"))
+        configs = sorted(p for p in Path(raw[0]).glob("*.json") if not p.name.startswith("_"))
        if not configs:
            raise ValueError(f"No JSON configs found in directory: {raw[0]}")
        return configs
@@ -429,21 +429,22 @@ class EphemeralVastRunner:
        seen.remove(resolved)
        return self._deep_merge_dicts(base_cfg, cfg)
-    # Return a stable signature used to detect duplicate training configs
+    # Resolve a config the same way the runners do: extends chain first,
-    def _normalized_config_signature(self, config_path: Path) -> str:
+    # then shared.json overlaid underneath. Mirrors load_config() in
-        run_script, _ = self._module_for_config(config_path)
+    # classifier/src/utils/config.py and generator/src/utils/config.py — keep
-        # Generator configs do not currently use shared/extends inheritance.
+    # in sync if merge semantics ever change.
-        if run_script.startswith("generator/"):
+    def _load_config_merged(self, config_path: Path) -> dict[str, Any]:
            with open(config_path, encoding="utf-8") as fh:
                cfg = json.load(fh)
        else:
        cfg = self._load_config_with_extends(config_path)
        shared_path = config_path.parent.parent / "shared.json"
        if shared_path.exists():
            with open(shared_path, encoding="utf-8") as fh:
                shared_cfg = json.load(fh)
            cfg = self._deep_merge_dicts(shared_cfg, cfg)
        return cfg
    # Return a stable signature used to detect duplicate training configs
    def _normalized_config_signature(self, config_path: Path) -> str:
        cfg = self._load_config_merged(config_path)
        # run_name should not influence whether two configs are equivalent to train.
        cfg.pop("run_name", None)
        return json.dumps(cfg, sort_keys=True, separators=(",", ":"))