Generator runner improvement
This commit is contained in:
@@ -123,10 +123,3 @@ The default policy in `pipeline/defaults/vast.json` now targets:
|
||||
- `<= $0.20/hour`
|
||||
- sorted by `dlperf` descending
|
||||
- uses `vastai/pytorch:latest` as the default image
|
||||
|
||||
## Diagnostics
|
||||
|
||||
```bash
|
||||
python3 classifier/tools/analyze.py classifier/configs/phase2/p2_resnet18_facecrop.json
|
||||
python3 classifier/tools/ensemble.py classifier/configs/phase4/p4_ensemble.json
|
||||
```
|
||||
|
||||
+1
-1
@@ -37,7 +37,7 @@ def main(config_path, *, data_dir_override=None, output_root="generator/outputs"
|
||||
|
||||
cfg = load_config(config_path)
|
||||
|
||||
run_name = cfg["run_name"]
|
||||
run_name = cfg.get("run_name", Path(config_path).stem)
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
data_dir = data_dir_override or cfg.get("data_dir", "data")
|
||||
output_root = Path(output_root)
|
||||
|
||||
+1
-1
@@ -7,7 +7,7 @@ from pipeline.orchestrator import EphemeralVastRunner, RunOptions
|
||||
# Accept one or more config files, or a single directory (all *.json inside, sorted)
|
||||
def _resolve_configs(raw: list[str]) -> list[Path]:
|
||||
if len(raw) == 1 and Path(raw[0]).is_dir():
|
||||
configs = sorted(Path(raw[0]).glob("*.json"))
|
||||
configs = sorted(p for p in Path(raw[0]).glob("*.json") if not p.name.startswith("_"))
|
||||
if not configs:
|
||||
raise ValueError(f"No JSON configs found in directory: {raw[0]}")
|
||||
return configs
|
||||
|
||||
@@ -429,21 +429,22 @@ class EphemeralVastRunner:
|
||||
seen.remove(resolved)
|
||||
return self._deep_merge_dicts(base_cfg, cfg)
|
||||
|
||||
# Return a stable signature used to detect duplicate training configs
|
||||
def _normalized_config_signature(self, config_path: Path) -> str:
|
||||
run_script, _ = self._module_for_config(config_path)
|
||||
# Generator configs do not currently use shared/extends inheritance.
|
||||
if run_script.startswith("generator/"):
|
||||
with open(config_path, encoding="utf-8") as fh:
|
||||
cfg = json.load(fh)
|
||||
else:
|
||||
# Resolve a config the same way the runners do: extends chain first,
|
||||
# then shared.json overlaid underneath. Mirrors load_config() in
|
||||
# classifier/src/utils/config.py and generator/src/utils/config.py — keep
|
||||
# in sync if merge semantics ever change.
|
||||
def _load_config_merged(self, config_path: Path) -> dict[str, Any]:
|
||||
cfg = self._load_config_with_extends(config_path)
|
||||
shared_path = config_path.parent.parent / "shared.json"
|
||||
if shared_path.exists():
|
||||
with open(shared_path, encoding="utf-8") as fh:
|
||||
shared_cfg = json.load(fh)
|
||||
cfg = self._deep_merge_dicts(shared_cfg, cfg)
|
||||
return cfg
|
||||
|
||||
# Return a stable signature used to detect duplicate training configs
|
||||
def _normalized_config_signature(self, config_path: Path) -> str:
|
||||
cfg = self._load_config_merged(config_path)
|
||||
# run_name should not influence whether two configs are equivalent to train.
|
||||
cfg.pop("run_name", None)
|
||||
return json.dumps(cfg, sort_keys=True, separators=(",", ":"))
|
||||
|
||||
Reference in New Issue
Block a user