57 lines
1.5 KiB
Python
57 lines
1.5 KiB
Python
"""
|
|
Download the DeepFakeFace dataset from HuggingFace and extract it.
|
|
|
|
Usage:
|
|
python tools/download_data.py
|
|
python tools/download_data.py --data-dir /mnt/data/DFF
|
|
"""
|
|
import argparse
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
from huggingface_hub import snapshot_download
|
|
|
|
SOURCES = ["wiki", "inpainting", "text2img", "insight"]
|
|
|
|
|
|
def download(data_dir: Path) -> None:
|
|
print(f"Downloading dataset from HuggingFace into {data_dir}...")
|
|
snapshot_download(
|
|
repo_id="OpenRL/DeepFakeFace",
|
|
repo_type="dataset",
|
|
local_dir=data_dir,
|
|
)
|
|
|
|
for source in SOURCES:
|
|
zip_path = data_dir / f"{source}.zip"
|
|
target_dir = data_dir / source
|
|
|
|
if target_dir.exists():
|
|
print(f" {source}/ already extracted, skipping")
|
|
continue
|
|
|
|
if not zip_path.exists():
|
|
print(f" WARNING: {zip_path} not found, skipping")
|
|
continue
|
|
|
|
print(f" Extracting {zip_path.name}...")
|
|
with zipfile.ZipFile(zip_path, "r") as z:
|
|
z.extractall(data_dir)
|
|
print(f" Done -> {target_dir}")
|
|
|
|
print("\nVerifying...")
|
|
for source in SOURCES:
|
|
d = data_dir / source
|
|
count = sum(1 for _ in d.rglob("*.jpg")) if d.exists() else 0
|
|
print(f" {source}: {count} images")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument(
|
|
"--data-dir", default="data",
|
|
help="Directory to download into. Default: data",
|
|
)
|
|
args = parser.parse_args()
|
|
download(Path(args.data_dir))
|