Generator runner improvement

This commit is contained in:
Johnny Fernandes
2026-04-30 03:21:49 +01:00
parent bb3dfb92d5
commit d75272cf84
4 changed files with 16 additions and 22 deletions
-7
View File
@@ -123,10 +123,3 @@ The default policy in `pipeline/defaults/vast.json` now targets:
- `<= $0.20/hour` - `<= $0.20/hour`
- sorted by `dlperf` descending - sorted by `dlperf` descending
- uses `vastai/pytorch:latest` as the default image - uses `vastai/pytorch:latest` as the default image
## Diagnostics
```bash
python3 classifier/tools/analyze.py classifier/configs/phase2/p2_resnet18_facecrop.json
python3 classifier/tools/ensemble.py classifier/configs/phase4/p4_ensemble.json
```
+1 -1
View File
@@ -37,7 +37,7 @@ def main(config_path, *, data_dir_override=None, output_root="generator/outputs"
cfg = load_config(config_path) cfg = load_config(config_path)
run_name = cfg["run_name"] run_name = cfg.get("run_name", Path(config_path).stem)
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
data_dir = data_dir_override or cfg.get("data_dir", "data") data_dir = data_dir_override or cfg.get("data_dir", "data")
output_root = Path(output_root) output_root = Path(output_root)
+1 -1
View File
@@ -7,7 +7,7 @@ from pipeline.orchestrator import EphemeralVastRunner, RunOptions
# Accept one or more config files, or a single directory (all *.json inside, sorted) # Accept one or more config files, or a single directory (all *.json inside, sorted)
def _resolve_configs(raw: list[str]) -> list[Path]: def _resolve_configs(raw: list[str]) -> list[Path]:
if len(raw) == 1 and Path(raw[0]).is_dir(): if len(raw) == 1 and Path(raw[0]).is_dir():
configs = sorted(Path(raw[0]).glob("*.json")) configs = sorted(p for p in Path(raw[0]).glob("*.json") if not p.name.startswith("_"))
if not configs: if not configs:
raise ValueError(f"No JSON configs found in directory: {raw[0]}") raise ValueError(f"No JSON configs found in directory: {raw[0]}")
return configs return configs
+9 -8
View File
@@ -429,21 +429,22 @@ class EphemeralVastRunner:
seen.remove(resolved) seen.remove(resolved)
return self._deep_merge_dicts(base_cfg, cfg) return self._deep_merge_dicts(base_cfg, cfg)
# Return a stable signature used to detect duplicate training configs # Resolve a config the same way the runners do: extends chain first,
def _normalized_config_signature(self, config_path: Path) -> str: # then shared.json overlaid underneath. Mirrors load_config() in
run_script, _ = self._module_for_config(config_path) # classifier/src/utils/config.py and generator/src/utils/config.py — keep
# Generator configs do not currently use shared/extends inheritance. # in sync if merge semantics ever change.
if run_script.startswith("generator/"): def _load_config_merged(self, config_path: Path) -> dict[str, Any]:
with open(config_path, encoding="utf-8") as fh:
cfg = json.load(fh)
else:
cfg = self._load_config_with_extends(config_path) cfg = self._load_config_with_extends(config_path)
shared_path = config_path.parent.parent / "shared.json" shared_path = config_path.parent.parent / "shared.json"
if shared_path.exists(): if shared_path.exists():
with open(shared_path, encoding="utf-8") as fh: with open(shared_path, encoding="utf-8") as fh:
shared_cfg = json.load(fh) shared_cfg = json.load(fh)
cfg = self._deep_merge_dicts(shared_cfg, cfg) cfg = self._deep_merge_dicts(shared_cfg, cfg)
return cfg
# Return a stable signature used to detect duplicate training configs
def _normalized_config_signature(self, config_path: Path) -> str:
cfg = self._load_config_merged(config_path)
# run_name should not influence whether two configs are equivalent to train. # run_name should not influence whether two configs are equivalent to train.
cfg.pop("run_name", None) cfg.pop("run_name", None)
return json.dumps(cfg, sort_keys=True, separators=(",", ":")) return json.dumps(cfg, sort_keys=True, separators=(",", ":"))