Files
DRL_PROJ/classifier/tools/fetch_ds.py
T
Johnny Fernandes bb3dfb92d5 Clean state
2026-04-30 01:25:39 +01:00

57 lines
1.5 KiB
Python

"""
Download the DeepFakeFace dataset from HuggingFace and extract it.
Usage:
python tools/download_data.py
python tools/download_data.py --data-dir /mnt/data/DFF
"""
import argparse
import zipfile
from pathlib import Path
from huggingface_hub import snapshot_download
SOURCES = ["wiki", "inpainting", "text2img", "insight"]
def download(data_dir: Path) -> None:
print(f"Downloading dataset from HuggingFace into {data_dir}...")
snapshot_download(
repo_id="OpenRL/DeepFakeFace",
repo_type="dataset",
local_dir=data_dir,
)
for source in SOURCES:
zip_path = data_dir / f"{source}.zip"
target_dir = data_dir / source
if target_dir.exists():
print(f" {source}/ already extracted, skipping")
continue
if not zip_path.exists():
print(f" WARNING: {zip_path} not found, skipping")
continue
print(f" Extracting {zip_path.name}...")
with zipfile.ZipFile(zip_path, "r") as z:
z.extractall(data_dir)
print(f" Done -> {target_dir}")
print("\nVerifying...")
for source in SOURCES:
d = data_dir / source
count = sum(1 for _ in d.rglob("*.jpg")) if d.exists() else 0
print(f" {source}: {count} images")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--data-dir", default="data",
help="Directory to download into. Default: data",
)
args = parser.parse_args()
download(Path(args.data_dir))