Clean state
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
Download the DeepFakeFace dataset from HuggingFace and extract it.
|
||||
|
||||
Usage:
|
||||
python tools/download_data.py
|
||||
python tools/download_data.py --data-dir /mnt/data/DFF
|
||||
"""
|
||||
import argparse
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
SOURCES = ["wiki", "inpainting", "text2img", "insight"]
|
||||
|
||||
|
||||
def download(data_dir: Path) -> None:
|
||||
print(f"Downloading dataset from HuggingFace into {data_dir}...")
|
||||
snapshot_download(
|
||||
repo_id="OpenRL/DeepFakeFace",
|
||||
repo_type="dataset",
|
||||
local_dir=data_dir,
|
||||
)
|
||||
|
||||
for source in SOURCES:
|
||||
zip_path = data_dir / f"{source}.zip"
|
||||
target_dir = data_dir / source
|
||||
|
||||
if target_dir.exists():
|
||||
print(f" {source}/ already extracted, skipping")
|
||||
continue
|
||||
|
||||
if not zip_path.exists():
|
||||
print(f" WARNING: {zip_path} not found, skipping")
|
||||
continue
|
||||
|
||||
print(f" Extracting {zip_path.name}...")
|
||||
with zipfile.ZipFile(zip_path, "r") as z:
|
||||
z.extractall(data_dir)
|
||||
print(f" Done -> {target_dir}")
|
||||
|
||||
print("\nVerifying...")
|
||||
for source in SOURCES:
|
||||
d = data_dir / source
|
||||
count = sum(1 for _ in d.rglob("*.jpg")) if d.exists() else 0
|
||||
print(f" {source}: {count} images")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--data-dir", default="data",
|
||||
help="Directory to download into. Default: data",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
download(Path(args.data_dir))
|
||||
Reference in New Issue
Block a user