commit bb3dfb92d5a0e30f5eec3ceef96b4f1a995f51c2 Author: Johnny Fernandes Date: Thu Apr 30 01:25:39 2026 +0100 Clean state diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..99bb195 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.pt filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1e41577 --- /dev/null +++ b/.gitignore @@ -0,0 +1,48 @@ +# Pipeline +pipeline/.env + +# Data +data/* +!data/.gitkeep + +# Cropped faces +#cropped_classifier.zip +#cropped_generator.zip +cropped/* +!cropped/.gitkeep +!cropped/classifier/.gitkeep +!cropped/classifier/README.md +!cropped/generator/.gitkeep +!cropped/generator/README.md + +# Classifier outputs +classifier/outputs/* +# Analysis +!classifier/outputs/analysis/ +!classifier/outputs/analysis/** +# Figures +!classifier/outputs/figures/ +!classifier/outputs/figures/** +# Models +!classifier/outputs/models/ +classifier/outputs/models/* +!classifier/outputs/models/.gitkeep +!classifier/outputs/models/*.pt +# Logs +!classifier/outputs/logs/ +classifier/outputs/logs/* +!classifier/outputs/logs/.gitkeep +!classifier/outputs/logs/*.json +# Pipeline +!classifier/outputs/pipeline/ +classifier/outputs/pipeline/* +!classifier/outputs/pipeline/.gitkeep +!classifier/outputs/pipeline/*.json + +# Generator outputs (all local-only) +generator/outputs/* + +# Python +.venv/ +.ipynb_checkpoints/ +__pycache__/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..1cb4d20 --- /dev/null +++ b/README.md @@ -0,0 +1,132 @@ +# DRL_PROJ — DeepFake Detection + +Deep learning project for binary deepfake detection on the DeepFakeFace dataset. + +## Project structure + +``` +DRL_PROJ/ + classifier/ ← discriminative model (real vs. fake classifier) + src/ ← model definitions, training, evaluation, preprocessing + configs/ ← experiment configs organised by phase + phase1/ ← baseline models (SimpleCNN, ResNet18) + phase2/ ← architecture sweep (ResNet variants, face-crop) + phase3/ ← EfficientNet, ViT, frequency-aware training + phase4/ ← ensemble strategies + tools/ ← analyse.py, ensemble.py, inference.py, facecrop.py + notebooks/ ← EDA, preprocessing, evaluation, GradCAM + outputs/ ← models, logs, figures (gitignored except .pt/.json) + run.py ← main training entry point + generator/ ← generative model (GAN / VAE / diffusion) — in progress + pipeline/ ← Vast.ai ephemeral GPU orchestration + data/ ← dataset root (gitignored) + cropped/ ← MTCNN pre-cropped faces (gitignored) + classifier/ ← bbox crops for the classifier + generator/ ← landmark-aligned crops for the generator +``` + +## Setup + +Create a local environment when you want to run the code directly on a machine you control: + +```bash +python3 -m venv .venv +source .venv/bin/activate +python -m pip install --upgrade pip setuptools wheel +python -m pip install -r requirements.txt +``` + +## Local Training + +```bash +python3 classifier/run.py classifier/configs/phase2/p2_resnet18_facecrop.json +python3 classifier/run.py classifier/configs/phase3/p3_efficientnet_b0.json +``` + +## Ephemeral Vast.ai Pipeline + +The deployment/orchestration path now lives under [`pipeline/`](/run/host/mnt/shared/UP/DRL/DRL_PROJ/pipeline/README.md). + +One-time setup: + +```bash +cat > pipeline/.env <<'EOF' +VAST_API_KEY= +VAST_SSH_PRIVATE_KEY=/home/your-user/.ssh/id_ed25519 +EOF +``` + +End-to-end ephemeral run: + +```bash +python3 -m pipeline run classifier/configs/phase2/p2_resnet18_facecrop.json --upload-data +``` + +Interactive offer selection: + +```bash +python3 -m pipeline offers --select-offer +``` + +You can override the ranking mode per run: + +```bash +python3 -m pipeline offers --sort price +python3 -m pipeline offers --sort performance +python3 -m pipeline offers --sort performance --price 0.14 +``` + +You can also filter by region: + +```bash +python3 -m pipeline offers --select-offer --region europe +python3 -m pipeline offers --select-offer --region Portugal +python3 -m pipeline offers --select-offer --region US +python3 -m pipeline offers --select-offer --region europe --price 0.14 +``` + +To inspect which region strings are currently available from the search results: + +```bash +python3 -m pipeline offers --list-regions +``` + +That command: +- ensures your SSH public key is registered with Vast.ai +- searches offers using the filters in `pipeline/defaults/vast.json` +- creates an instance +- waits for SSH readiness +- syncs the repo +- uploads `data/` when `--upload-data` is set +- runs `python3 classifier/run.py ...` +- downloads `classifier/outputs/` +- for generator runs, rsyncs `generator/outputs/` back every 50 epochs and again at completion +- destroys the instance automatically unless `--keep-on-failure` is set + +Useful commands: + +```bash +python3 -m pipeline up +python3 -m pipeline status +python3 -m pipeline down +``` + +To override the default Vast search/runtime settings, copy `pipeline/defaults/vast.json`, edit it, and pass: + +```bash +python3 -m pipeline run classifier/configs/phase3/p3_efficientnet_b0.json --pipeline-config /path/to/vast.override.json +``` + +The default policy in `pipeline/defaults/vast.json` now targets: +- `1x` GPU +- `RTX 3090` or `RTX 3090 Ti` +- `<= $0.20/hour` +- sorted by `dlperf` descending +- uses `vastai/pytorch:latest` as the default image + +## Diagnostics + +```bash +python3 classifier/tools/analyze.py classifier/configs/phase2/p2_resnet18_facecrop.json +python3 classifier/tools/ensemble.py classifier/configs/phase4/p4_ensemble.json +``` diff --git a/classifier/configs/phase1/p1_resnet18_baseline.json b/classifier/configs/phase1/p1_resnet18_baseline.json new file mode 100644 index 0000000..0d624fb --- /dev/null +++ b/classifier/configs/phase1/p1_resnet18_baseline.json @@ -0,0 +1,10 @@ +{ + "run_name": "p1_resnet18_baseline", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 128, + "subsample": 0.2, + "augment": false, + "data_dir": "data" +} diff --git a/classifier/configs/phase1/p1_simplecnn_baseline.json b/classifier/configs/phase1/p1_simplecnn_baseline.json new file mode 100644 index 0000000..a04ea4c --- /dev/null +++ b/classifier/configs/phase1/p1_simplecnn_baseline.json @@ -0,0 +1,11 @@ +{ + "run_name": "p1_simplecnn_baseline", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 128, + "subsample": 0.2, + "augment": false, + "data_dir": "data" +} diff --git a/classifier/configs/phase2/p2a_t1_original.json b/classifier/configs/phase2/p2a_t1_original.json new file mode 100644 index 0000000..906e44e --- /dev/null +++ b/classifier/configs/phase2/p2a_t1_original.json @@ -0,0 +1,11 @@ +{ + "run_name": "p2a_t1_original", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "data_dir": "data", + "normalization": "imagenet" +} diff --git a/classifier/configs/phase2/p2a_t2_real_norm.json b/classifier/configs/phase2/p2a_t2_real_norm.json new file mode 100644 index 0000000..08380a7 --- /dev/null +++ b/classifier/configs/phase2/p2a_t2_real_norm.json @@ -0,0 +1,5 @@ +{ + "extends": "p2a_t1_original.json", + "run_name": "p2a_t2_real_norm", + "normalization": "real_norm" +} diff --git a/classifier/configs/phase2/p2a_t3_holdout_inpainting.json b/classifier/configs/phase2/p2a_t3_holdout_inpainting.json new file mode 100644 index 0000000..7bad898 --- /dev/null +++ b/classifier/configs/phase2/p2a_t3_holdout_inpainting.json @@ -0,0 +1,15 @@ +{ + "extends": "p2a_t1_original.json", + "run_name": "p2a_t3_holdout_inpainting", + "train_sources": [ + "wiki", + "text2img", + "insight" + ], + "eval_sources": [ + "wiki", + "text2img", + "insight", + "inpainting" + ] +} diff --git a/classifier/configs/phase2/p2a_t3_holdout_insight.json b/classifier/configs/phase2/p2a_t3_holdout_insight.json new file mode 100644 index 0000000..ad53d7e --- /dev/null +++ b/classifier/configs/phase2/p2a_t3_holdout_insight.json @@ -0,0 +1,15 @@ +{ + "extends": "p2a_t1_original.json", + "run_name": "p2a_t3_holdout_insight", + "train_sources": [ + "wiki", + "text2img", + "inpainting" + ], + "eval_sources": [ + "wiki", + "text2img", + "inpainting", + "insight" + ] +} diff --git a/classifier/configs/phase2/p2a_t3_holdout_text2img.json b/classifier/configs/phase2/p2a_t3_holdout_text2img.json new file mode 100644 index 0000000..c1549c2 --- /dev/null +++ b/classifier/configs/phase2/p2a_t3_holdout_text2img.json @@ -0,0 +1,15 @@ +{ + "extends": "p2a_t1_original.json", + "run_name": "p2a_t3_holdout_text2img", + "train_sources": [ + "wiki", + "inpainting", + "insight" + ], + "eval_sources": [ + "wiki", + "inpainting", + "insight", + "text2img" + ] +} diff --git a/classifier/configs/phase2/p2b_resnet18_224.json b/classifier/configs/phase2/p2b_resnet18_224.json new file mode 100644 index 0000000..b4fd9ab --- /dev/null +++ b/classifier/configs/phase2/p2b_resnet18_224.json @@ -0,0 +1,10 @@ +{ + "run_name": "p2b_resnet18_224", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "data_dir": "data" +} diff --git a/classifier/configs/phase2/p2b_simplecnn_224.json b/classifier/configs/phase2/p2b_simplecnn_224.json new file mode 100644 index 0000000..481335b --- /dev/null +++ b/classifier/configs/phase2/p2b_simplecnn_224.json @@ -0,0 +1,11 @@ +{ + "run_name": "p2b_simplecnn_224", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "data_dir": "data" +} diff --git a/classifier/configs/phase2/p2c_resnet18_facecrop.json b/classifier/configs/phase2/p2c_resnet18_facecrop.json new file mode 100644 index 0000000..48222b3 --- /dev/null +++ b/classifier/configs/phase2/p2c_resnet18_facecrop.json @@ -0,0 +1,10 @@ +{ + "run_name": "p2c_resnet18_facecrop", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "data_dir": "cropped/classifier" +} diff --git a/classifier/configs/phase2/p2c_simplecnn_facecrop.json b/classifier/configs/phase2/p2c_simplecnn_facecrop.json new file mode 100644 index 0000000..1322e63 --- /dev/null +++ b/classifier/configs/phase2/p2c_simplecnn_facecrop.json @@ -0,0 +1,11 @@ +{ + "run_name": "p2c_simplecnn_facecrop", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "data_dir": "cropped/classifier" +} diff --git a/classifier/configs/phase2/p2d_resnet18_aug.json b/classifier/configs/phase2/p2d_resnet18_aug.json new file mode 100644 index 0000000..da81ef2 --- /dev/null +++ b/classifier/configs/phase2/p2d_resnet18_aug.json @@ -0,0 +1,22 @@ +{ + "run_name": "p2d_resnet18_aug", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "data_dir": "data", + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } +} diff --git a/classifier/configs/phase2/p2d_simplecnn_aug.json b/classifier/configs/phase2/p2d_simplecnn_aug.json new file mode 100644 index 0000000..b702440 --- /dev/null +++ b/classifier/configs/phase2/p2d_simplecnn_aug.json @@ -0,0 +1,23 @@ +{ + "run_name": "p2d_simplecnn_aug", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "data_dir": "data", + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } +} diff --git a/classifier/configs/phase2/p2e_resnet18_facecrop_aug.json b/classifier/configs/phase2/p2e_resnet18_facecrop_aug.json new file mode 100644 index 0000000..4a39bb6 --- /dev/null +++ b/classifier/configs/phase2/p2e_resnet18_facecrop_aug.json @@ -0,0 +1,22 @@ +{ + "run_name": "p2e_resnet18_facecrop_aug", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "data_dir": "cropped/classifier", + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } +} diff --git a/classifier/configs/phase2/p2e_simplecnn_facecrop_aug.json b/classifier/configs/phase2/p2e_simplecnn_facecrop_aug.json new file mode 100644 index 0000000..25a6c8c --- /dev/null +++ b/classifier/configs/phase2/p2e_simplecnn_facecrop_aug.json @@ -0,0 +1,23 @@ +{ + "run_name": "p2e_simplecnn_facecrop_aug", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "data_dir": "cropped/classifier", + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } +} diff --git a/classifier/configs/shared.json b/classifier/configs/shared.json new file mode 100644 index 0000000..782c822 --- /dev/null +++ b/classifier/configs/shared.json @@ -0,0 +1,13 @@ +{ + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + + "lr": 1e-4, + "weight_decay": 1e-4, + "T_max": 15, + + "data_dir": "data" +} diff --git a/classifier/notebooks/01_eda.ipynb b/classifier/notebooks/01_eda.ipynb new file mode 100644 index 0000000..1c3f15d --- /dev/null +++ b/classifier/notebooks/01_eda.ipynb @@ -0,0 +1,351 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "eda-00", + "metadata": {}, + "source": [ + "# 01 — EDA\n", + "\n", + "Explore DeepFakeFace (DFF) data quality before training: composition, source distribution, image properties, and split safety.\n", + "\n", + "**Sections:**\n", + "1. Dataset composition and label balance\n", + "2. Visual sanity-check samples\n", + "3. Image dimension profile\n", + "4. Per-source color statistics\n", + "5. CV split and leakage sanity check\n", + "6. Observations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eda-01", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.insert(0, '..')\n", + "\n", + "import random\n", + "from collections import Counter\n", + "from pathlib import Path\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from PIL import Image\n", + "\n", + "from src.data import DFFDataset, SOURCES, get_splits\n", + "\n", + "DATA_DIR = Path('../../data')\n", + "FIG_DIR = Path('../outputs/figures')\n", + "FIG_DIR.mkdir(parents=True, exist_ok=True)\n", + "\n", + "SEED = 42\n", + "random.seed(SEED)\n", + "np.random.seed(SEED)" + ] + }, + { + "cell_type": "markdown", + "id": "eda-02", + "metadata": {}, + "source": [ + "## 1. Dataset composition and label balance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eda-03", + "metadata": {}, + "outputs": [], + "source": [ + "full_ds = DFFDataset(DATA_DIR)\n", + "label_counts = full_ds.label_counts()\n", + "\n", + "print(f\"Total images : {len(full_ds):,}\")\n", + "print(f\" Real (label=0) : {label_counts[0]:,}\")\n", + "print(f\" Fake (label=1) : {label_counts[1]:,}\")\n", + "print(f\" Fake:real ratio : {label_counts[1] / label_counts[0]:.2f}x\\n\")\n", + "\n", + "source_info = []\n", + "for source, label in SOURCES.items():\n", + " ds = DFFDataset(DATA_DIR, sources=[source])\n", + " source_info.append((source, len(ds), label))\n", + " tag = 'real' if label == 0 else 'fake'\n", + " print(f\" {source:12s} n={len(ds):6,} label={label} ({tag})\")\n", + "\n", + "# Identity-level sanity check: each basename should appear in every source.\n", + "basename_counts = Counter(path.name for path, _ in full_ds.samples)\n", + "presence_hist = Counter(basename_counts.values())\n", + "\n", + "print(\"\\nIdentity (basename) presence across sources:\")\n", + "for n_sources, count in sorted(presence_hist.items()):\n", + " print(f\" present in {n_sources} source(s): {count:,} identities\")\n", + "\n", + "incomplete = sum(v for k, v in presence_hist.items() if k < len(SOURCES))\n", + "print(f\" complete in all {len(SOURCES)} sources: {presence_hist.get(len(SOURCES), 0):,}\")\n", + "print(f\" incomplete identities : {incomplete:,}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eda-04", + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))\n", + "\n", + "# Overall class balance\n", + "class_names = ['Real (wiki)', 'Fake (all 3)']\n", + "class_counts = [label_counts[0], label_counts[1]]\n", + "bars = ax1.bar(class_names, class_counts, color=['#2196F3', '#F44336'], width=0.5)\n", + "ax1.set_title('Overall Class Balance', fontsize=13)\n", + "ax1.set_ylabel('Images')\n", + "ax1.set_ylim(0, max(class_counts) * 1.15)\n", + "for bar, v in zip(bars, class_counts):\n", + " ax1.text(bar.get_x() + bar.get_width() / 2, v + 300,\n", + " f'{v:,}', ha='center', fontsize=11)\n", + "\n", + "# Per-source breakdown\n", + "src_names = [s for s, _, _ in source_info]\n", + "src_counts = [n for _, n, _ in source_info]\n", + "colors = ['#2196F3', '#FF9800', '#9C27B0', '#4CAF50']\n", + "bars2 = ax2.bar(src_names, src_counts, color=colors, width=0.5)\n", + "ax2.set_title('Images per Source', fontsize=13)\n", + "ax2.set_ylabel('Images')\n", + "ax2.set_ylim(0, max(src_counts) * 1.15)\n", + "for bar, v in zip(bars2, src_counts):\n", + " ax2.text(bar.get_x() + bar.get_width() / 2, v + 300,\n", + " f'{v:,}', ha='center', fontsize=11)\n", + "\n", + "fig.tight_layout()\n", + "fig.savefig(FIG_DIR / 'class_balance.png', dpi=120, bbox_inches='tight')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "eda-05", + "metadata": {}, + "source": [ + "## 2. Visual sanity-check samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eda-06", + "metadata": {}, + "outputs": [], + "source": [ + "N_COLS = 6\n", + "fig, axes = plt.subplots(len(SOURCES), N_COLS, figsize=(18, 12))\n", + "fig.suptitle('Sample images — 6 per source', fontsize=14)\n", + "\n", + "for row, (source, label) in enumerate(SOURCES.items()):\n", + " ds_src = DFFDataset(DATA_DIR, sources=[source])\n", + " indices = random.sample(range(len(ds_src)), N_COLS)\n", + " for col, idx in enumerate(indices):\n", + " path, _ = ds_src.samples[idx]\n", + " img = Image.open(path).convert('RGB').resize((128, 128))\n", + " axes[row, col].imshow(img)\n", + " axes[row, col].axis('off')\n", + " tag = 'real' if label == 0 else 'fake'\n", + " axes[row, 0].set_ylabel(f'{source}\\n({tag})', fontsize=10)\n", + "\n", + "fig.tight_layout()\n", + "fig.savefig(FIG_DIR / 'sample_images.png', dpi=100, bbox_inches='tight')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "eda-07", + "metadata": {}, + "source": [ + "## 3. Image dimension profile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eda-08", + "metadata": {}, + "outputs": [], + "source": [ + "sample_paths = [p for p, _ in random.sample(full_ds.samples, min(2000, len(full_ds)))]\n", + "sizes = Counter(Image.open(p).size for p in sample_paths)\n", + "\n", + "print('Most common image dimensions (W x H):')\n", + "for (w, h), count in sizes.most_common(10):\n", + " pct = count / len(sample_paths)\n", + " print(f' {w:4d} x {h:4d} — {count:4d} samples ({pct:.1%})')\n", + "\n", + "widths = [w for (w, _) in sizes.elements()]\n", + "heights = [h for (_, h) in sizes.elements()]\n", + "square = sum(1 for w, h in zip(widths, heights) if w == h)\n", + "print(f'\\nWidth range: {min(widths)}–{max(widths)} mean={np.mean(widths):.0f}')\n", + "print(f'Height range: {min(heights)}–{max(heights)} mean={np.mean(heights):.0f}')\n", + "print(f'Square images: {square}/{len(widths)} ({square / len(widths):.1%})')" + ] + }, + { + "cell_type": "markdown", + "id": "eda-11", + "metadata": {}, + "source": [ + "## 4. Per-source color statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eda-12", + "metadata": {}, + "outputs": [], + "source": [ + "print('Sampling per-source colour statistics (sampling 300 images per source)...')\n", + "N_SAMPLES = 300\n", + "CH_NAMES = ['R', 'G', 'B']\n", + "\n", + "source_means, source_stds = {}, {}\n", + "for source in SOURCES:\n", + " ds_src = DFFDataset(DATA_DIR, sources=[source])\n", + " idxs = random.sample(range(len(ds_src)), min(N_SAMPLES, len(ds_src)))\n", + " arrays = [\n", + " np.array(\n", + " Image.open(ds_src.samples[i][0]).convert('RGB').resize((64, 64)),\n", + " dtype=np.float32\n", + " ) / 255.0\n", + " for i in idxs\n", + " ]\n", + " stack = np.stack(arrays) # (N, 64, 64, 3)\n", + " source_means[source] = stack.mean(axis=(0, 1, 2)) # per channel\n", + " source_stds[source] = stack.std(axis=(0, 1, 2))\n", + " print(f' {source}: mean={source_means[source].round(3)} std={source_stds[source].round(3)}')\n", + "\n", + "src_keys = list(SOURCES.keys())\n", + "x = np.arange(len(src_keys))\n", + "bar_w = 0.22\n", + "ch_colors = ['#F44336', '#4CAF50', '#2196F3']\n", + "\n", + "fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n", + "for ci, ch in enumerate(CH_NAMES):\n", + " offset = (ci - 1) * bar_w\n", + " axes[0].bar(x + offset, [source_means[s][ci] for s in src_keys],\n", + " bar_w, label=ch, color=ch_colors[ci], alpha=0.85)\n", + " axes[1].bar(x + offset, [source_stds[s][ci] for s in src_keys],\n", + " bar_w, label=ch, color=ch_colors[ci], alpha=0.85)\n", + "\n", + "for ax, title, ylabel in zip(\n", + " axes,\n", + " ['Mean pixel intensity per source', 'Pixel std dev per source'],\n", + " ['Mean (0–1)', 'Std dev (0–1)'],\n", + "):\n", + " ax.set_xticks(x)\n", + " ax.set_xticklabels(src_keys)\n", + " ax.set_title(title, fontsize=12)\n", + " ax.set_ylabel(ylabel)\n", + " ax.legend(title='Channel')\n", + " ax.grid(axis='y', alpha=0.3)\n", + "\n", + "fig.tight_layout()\n", + "fig.savefig(FIG_DIR / 'color_stats.png', dpi=120, bbox_inches='tight')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "1c7d4660", + "metadata": {}, + "source": [ + "## 5. CV split and leakage sanity check" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89513a74", + "metadata": {}, + "outputs": [], + "source": [ + "cfg = {\n", + " \"cv_folds\": 5,\n", + " \"seed\": SEED,\n", + " \"image_size\": 224,\n", + " \"train_sources\": None,\n", + " \"eval_sources\": None,\n", + "}\n", + "\n", + "splits = get_splits(full_ds, cfg)\n", + "print(f\"Generated {len(splits)} CV folds\")\n", + "\n", + "for fold_i, (train_idx, val_idx, test_idx) in enumerate(splits):\n", + " train_ids = {full_ds.samples[i][0].name for i in train_idx}\n", + " val_ids = {full_ds.samples[i][0].name for i in val_idx}\n", + " test_ids = {full_ds.samples[i][0].name for i in test_idx}\n", + "\n", + " overlap = (train_ids & val_ids) | (train_ids & test_ids) | (val_ids & test_ids)\n", + " print(\n", + " f\"Fold {fold_i}: train={len(train_idx):6d} val={len(val_idx):6d} test={len(test_idx):6d} \"\n", + " f\"identity_overlap={len(overlap)}\"\n", + " )\n", + "\n", + "print(\"\\nExpected: identity_overlap should be 0 for every fold.\")" + ] + }, + { + "cell_type": "markdown", + "id": "eda-13", + "metadata": {}, + "source": [ + "## 6. Observations template\n", + "\n", + "Fill in after running the notebook:\n", + "\n", + "**Class balance**\n", + "- Confirm fake:real ratio and whether sampler/reweighting is needed.\n", + "\n", + "**Identity completeness**\n", + "- Note whether most basenames appear in all sources or if there are missing-source identities.\n", + "\n", + "**Dimensions**\n", + "- Record dominant dimensions and whether extreme outliers appear.\n", + "\n", + "**Color stats**\n", + "- Note clear mean/std shifts by source (if any).\n", + "\n", + "**Split sanity**\n", + "- Confirm every fold reports `identity_overlap=0`.\n", + "\n", + "**Action items before training**\n", + "- List any cleanup/filtering decisions (if required)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "drl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/classifier/notebooks/02_preprocessing.ipynb b/classifier/notebooks/02_preprocessing.ipynb new file mode 100644 index 0000000..62b31a0 --- /dev/null +++ b/classifier/notebooks/02_preprocessing.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "pp-00", + "metadata": {}, + "source": [ + "# 02 — Preprocessing\n", + "\n", + "Inspect what images look like right before model input.\n", + "\n", + "Face cropping is an offline step — run `tools/precrop.py` once to produce `data_cropped/`, then point configs at that directory. The sections below show the standard pipeline on already-cropped or uncropped images. `facenet_pytorch` is only needed to visualize the offline cropper.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pp-01", + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "import sys\n", + "from collections import Counter\n", + "from pathlib import Path\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as patches\n", + "import numpy as np\n", + "from PIL import Image\n", + "\n", + "sys.path.insert(0, '..')\n", + "from src.data import DFFDataset, SOURCES\n", + "from src.preprocessing.pipeline import DFFImagePipeline\n", + "\n", + "DATA_DIR = Path('../../data')\n", + "SEED = 7\n", + "random.seed(SEED)\n", + "np.random.seed(SEED)\n", + "\n", + "full_ds = DFFDataset(DATA_DIR)\n", + "\n", + "print(f\"Dataset root: {DATA_DIR.resolve()}\")\n", + "print(f\"Total samples: {len(full_ds):,}\")\n", + "source_counts = Counter(path.parent.parent.name for path, _ in full_ds.samples)\n", + "print(\"Per-source counts:\")\n", + "for src in SOURCES:\n", + " print(f\" {src:12s} {source_counts[src]:6,}\")\n", + "\n", + "def denorm(tensor):\n", + " mean = np.array([0.485, 0.456, 0.406])\n", + " std = np.array([0.229, 0.224, 0.225])\n", + " arr = tensor.permute(1, 2, 0).numpy()\n", + " return np.clip(arr * std + mean, 0, 1)\n", + "\n", + "def pick_samples(n=4, sources=None):\n", + " ds = DFFDataset(DATA_DIR, sources=sources) if sources else full_ds\n", + " idxs = random.sample(range(len(ds)), n)\n", + " return [Image.open(ds.samples[i][0]).convert('RGB') for i in idxs]\n", + "\n", + "# Runtime face-crop helper from tools (kept for notebook visualization only).\n", + "try:\n", + " from facenet_pytorch import MTCNN\n", + " from tools.precrop import FaceCropper\n", + " FACE_CROP_AVAILABLE = True\n", + " _detector = MTCNN(keep_all=False, select_largest=True, device='cpu', post_process=False)\n", + " _cropper = FaceCropper(margin=0.6, size=224, device='cpu')\n", + " print('facenet_pytorch available — crop helper enabled.')\n", + "except ImportError:\n", + " FACE_CROP_AVAILABLE = False\n", + " _cropper = None\n", + " print('WARNING: facenet_pytorch not installed — crop sections will be skipped.')\n", + " print(' Install with: pip install facenet-pytorch')\n", + "\n", + "pipe_eval = DFFImagePipeline(image_size=224, train=False)\n", + "pipe_aug = DFFImagePipeline(image_size=224, train=True)\n", + "\n", + "crop_note = 'offline face-crop preview -> ' if FACE_CROP_AVAILABLE else '(no face crop) -> '\n", + "print('Pipelines ready.')" + ] + }, + { + "cell_type": "markdown", + "id": "pp-02", + "metadata": {}, + "source": [ + "---\n", + "## 1. Crop preview\n", + "\n", + "Visualizes what `tools/precrop.py` does: MTCNN detects the largest face, crops a square with a 60% margin, and falls back to center-crop when no face is found.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pp-03", + "metadata": {}, + "outputs": [], + "source": [ + "if not FACE_CROP_AVAILABLE:\n", + " print('Skipped — facenet_pytorch not installed.')\n", + "else:\n", + " src_images = {src: pick_samples(1, sources=[src])[0] for src in SOURCES}\n", + "\n", + " fig, axes = plt.subplots(len(SOURCES), 3, figsize=(10, 14))\n", + " fig.suptitle(\n", + " 'Face crop helper | col 1: original + detection box | col 2: cropped | col 3: cropped + eval pipeline',\n", + " fontsize=10\n", + " )\n", + "\n", + " for row, (src, img) in enumerate(src_images.items()):\n", + " label = 'real' if SOURCES[src] == 0 else 'fake'\n", + "\n", + " # col 0: original with bounding box\n", + " boxes, probs = _detector.detect(img)\n", + " axes[row, 0].imshow(img)\n", + " if boxes is not None and len(boxes) > 0:\n", + " x1, y1, x2, y2 = boxes[0]\n", + " rect = patches.Rectangle(\n", + " (x1, y1), x2 - x1, y2 - y1,\n", + " linewidth=2, edgecolor='lime', facecolor='none'\n", + " )\n", + " axes[row, 0].add_patch(rect)\n", + " axes[row, 0].set_title(f'detected p={probs[0]:.2f}', fontsize=8, color='green')\n", + " else:\n", + " axes[row, 0].set_title('no face — centre crop fallback', fontsize=8, color='red')\n", + " axes[row, 0].set_ylabel(f'{src}\\n({label})', fontsize=9)\n", + "\n", + " # col 1: cropped result from tools.precrop.FaceCropper\n", + " cropped = _cropper(img)\n", + " axes[row, 1].imshow(cropped)\n", + " axes[row, 1].set_title('cropped (224px)', fontsize=8)\n", + "\n", + " # col 2: cropped image through eval pipeline\n", + " axes[row, 2].imshow(denorm(pipe_eval(cropped)))\n", + " axes[row, 2].set_title('crop + eval pipeline', fontsize=8)\n", + "\n", + " for ax in axes.flat:\n", + " ax.axis('off')\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "pp-04", + "metadata": {}, + "source": [ + "---\n", + "## 2. Eval path vs Train path\n", + "\n", + "Compare the deterministic eval transform and the stochastic train transform.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pp-05", + "metadata": {}, + "outputs": [], + "source": [ + "src_images = {src: pick_samples(1, sources=[src])[0] for src in SOURCES}\n", + "\n", + "fig, axes = plt.subplots(len(SOURCES), 3, figsize=(10, 14))\n", + "fig.suptitle(\n", + " f'original | {crop_note}eval (no aug) | {crop_note}train aug',\n", + " fontsize=11\n", + ")\n", + "\n", + "for row, (src, img) in enumerate(src_images.items()):\n", + " label = 'real' if SOURCES[src] == 0 else 'fake'\n", + " proc_img = _cropper(img) if FACE_CROP_AVAILABLE else img\n", + "\n", + " axes[row, 0].imshow(img.resize((224, 224)))\n", + " axes[row, 0].set_title('original', fontsize=8)\n", + " axes[row, 0].set_ylabel(f'{src}\\n({label})', fontsize=9)\n", + "\n", + " axes[row, 1].imshow(denorm(pipe_eval(proc_img)))\n", + " axes[row, 1].set_title(f'{crop_note}eval (no aug)', fontsize=8)\n", + "\n", + " axes[row, 2].imshow(denorm(pipe_aug(proc_img)))\n", + " axes[row, 2].set_title(f'{crop_note}train aug', fontsize=8)\n", + "\n", + "for ax in axes.flat:\n", + " ax.axis('off')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "pp-06", + "metadata": {}, + "source": [ + "---\n", + "## 3. Augmentation variety\n", + "\n", + "Use the same source image with multiple independent stochastic draws.\n", + "This shows the realistic variation the model sees during training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pp-07", + "metadata": {}, + "outputs": [], + "source": [ + "N_DRAWS = 8\n", + "imgs_to_show = pick_samples(2)\n", + "\n", + "fig, axes = plt.subplots(2, N_DRAWS + 1, figsize=(20, 5))\n", + "fig.suptitle(\n", + " f'{N_DRAWS} independent draws — {crop_note}aug — each column is a different random sample',\n", + " fontsize=11\n", + ")\n", + "\n", + "for row, img in enumerate(imgs_to_show):\n", + " axes[row, 0].imshow(img.resize((224, 224)))\n", + " axes[row, 0].set_title('original', fontsize=8)\n", + " axes[row, 0].set_ylabel(f'image {row + 1}', fontsize=9)\n", + "\n", + " for col in range(N_DRAWS):\n", + " axes[row, col + 1].imshow(denorm(pipe_aug(img)))\n", + " axes[row, col + 1].set_title(f'#{col + 1}', fontsize=8)\n", + "\n", + "for ax in axes.flat:\n", + " ax.axis('off')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "pp-09", + "metadata": {}, + "source": [ + "---\n", + "## 3. Full pipeline comparison\n", + "\n", + "All combinations in one grid. Crop columns appear only when `facenet_pytorch` is installed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pp-10", + "metadata": {}, + "outputs": [], + "source": [ + "samples = {src: pick_samples(1, sources=[src])[0] for src in SOURCES}\n", + "\n", + "cols = [\n", + " ('original', False, False),\n", + " ('no crop\\nno aug', False, False),\n", + " ('no crop\\naug', False, True),\n", + "]\n", + "if FACE_CROP_AVAILABLE:\n", + " cols += [\n", + " ('crop\\nno aug', True, False),\n", + " ('crop\\naug', True, True),\n", + " ]\n", + "\n", + "n_cols = len(cols)\n", + "fig, axes = plt.subplots(len(SOURCES), n_cols, figsize=(n_cols * 2.8, 14))\n", + "fig.suptitle('Full pipeline comparison — pipeline order: (optional) face crop helper -> augmentation -> normalize', fontsize=11)\n", + "\n", + "for row, (src, img) in enumerate(samples.items()):\n", + " label = 'real' if SOURCES[src] == 0 else 'fake'\n", + " axes[row, 0].set_ylabel(f'{src}\\n({label})', fontsize=9)\n", + "\n", + " for col, (title, use_crop, train_mode) in enumerate(cols):\n", + " ax = axes[row, col]\n", + " if col == 0:\n", + " ax.imshow(img.resize((224, 224)))\n", + " else:\n", + " proc_img = _cropper(img) if (use_crop and FACE_CROP_AVAILABLE) else img\n", + " pipe = DFFImagePipeline(image_size=224, train=train_mode)\n", + " ax.imshow(denorm(pipe(proc_img)))\n", + " if row == 0:\n", + " ax.set_title(title, fontsize=8)\n", + " ax.axis('off')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "19187059", + "metadata": {}, + "source": [ + "---\n", + "## 4. Tensor sanity checks\n", + "\n", + "Validate preprocessing outputs: shape, finite values, normalized value ranges.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e5697c4", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "check_imgs = pick_samples(n=12)\n", + "issues = []\n", + "\n", + "for i, img in enumerate(check_imgs):\n", + " t_eval = pipe_eval(img)\n", + " t_aug = pipe_aug(img)\n", + "\n", + " for tag, t in [(\"eval\", t_eval), (\"aug\", t_aug)]:\n", + " if tuple(t.shape) != (3, 224, 224):\n", + " issues.append(f\"sample {i} ({tag}) shape={tuple(t.shape)}\")\n", + " if not np.isfinite(t.numpy()).all():\n", + " issues.append(f\"sample {i} ({tag}) has non-finite values\")\n", + "\n", + "print(f\"Checked {len(check_imgs)} images through eval+aug pipelines.\")\n", + "if issues:\n", + " print(\"Issues found:\")\n", + " for msg in issues[:10]:\n", + " print(f\" - {msg}\")\n", + "else:\n", + " print(\"No shape/finite-value issues found.\")\n", + "\n", + "stack_eval = np.stack([pipe_eval(img).numpy() for img in check_imgs])\n", + "stack_aug = np.stack([pipe_aug(img).numpy() for img in check_imgs])\n", + "\n", + "print(\"\\nValue summary (normalized tensors):\")\n", + "print(f\" eval: min={stack_eval.min():.3f} max={stack_eval.max():.3f} mean={stack_eval.mean():.3f} std={stack_eval.std():.3f}\")\n", + "print(f\" aug : min={stack_aug.min():.3f} max={stack_aug.max():.3f} mean={stack_aug.mean():.3f} std={stack_aug.std():.3f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "drl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/classifier/notebooks/03_phase1_analysis.ipynb b/classifier/notebooks/03_phase1_analysis.ipynb new file mode 100644 index 0000000..8cad42f --- /dev/null +++ b/classifier/notebooks/03_phase1_analysis.ipynb @@ -0,0 +1,702 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Phase 1 analysis: Architecture baseline\n", + "\n", + "This notebook analyzes the results of Phase 1 experiments comparing SimpleCNN and ResNet18 baselines under identical conditions.\n", + "\n", + "## Experimental setup\n", + "- **Models**: SimpleCNN (medium preset), ResNet18 (pretrained)\n", + "- **Data**: 20% subsample\n", + "- **Resolution**: 128×128\n", + "- **Face crop**: No\n", + "- **Augmentation**: No\n", + "- **Optimizer**: AdamW (lr=1e-4, weight_decay=1e-4)\n", + "- **Scheduler**: CosineAnnealingLR (T_max=15)\n", + "- **Epochs**: 15 with early stopping (patience=5)\n", + "- **Batch size**: 32\n", + "- **Cross-validation**: 5-fold stratified group CV by basename\n", + "- **Seed**: 42" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from pathlib import Path\n", + "from scipy import stats\n", + "\n", + "# Set style\n", + "sns.set_style(\"whitegrid\")\n", + "plt.rcParams['figure.figsize'] = (12, 6)\n", + "plt.rcParams['font.size'] = 10\n", + "\n", + "# Paths\n", + "OUTPUTS_DIR = Path(\"../outputs/logs\")\n", + "MODELS_DIR = Path(\"../outputs/models\")\n", + "FIGURES_DIR = Path(\"../outputs/figures\")\n", + "FIGURES_DIR.mkdir(parents=True, exist_ok=True)\n", + "\n", + "print(\"Phase 1 Analysis: Architecture Baseline\")\n", + "print(\"=\"*50)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load CV results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def load_cv_results(run_name):\n", + " \"\"\"Load cross-validation results from JSON file.\"\"\"\n", + " results_path = OUTPUTS_DIR / f\"{run_name}.json\"\n", + " if not results_path.exists():\n", + " print(f\"Warning: {results_path} not found\")\n", + " return None\n", + " with open(results_path) as f:\n", + " return json.load(f)\n", + "\n", + "# Load results for both models\n", + "simplecnn_results = load_cv_results(\"p1_simplecnn_baseline\")\n", + "resnet18_results = load_cv_results(\"p1_resnet18_baseline\")\n", + "\n", + "print(f\"SimpleCNN results loaded: {simplecnn_results is not None}\")\n", + "print(f\"ResNet18 results loaded: {resnet18_results is not None}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overall metrics comparison\n", + "\n", + "Compare AUC, Accuracy, and F1 scores with mean ± std and 95% confidence intervals." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_aggregated_metrics(results, model_name):\n", + " \"\"\"Extract aggregated metrics from CV results.\"\"\"\n", + " if results is None:\n", + " return None\n", + " \n", + " agg = results['aggregated_metrics']\n", + " return {\n", + " 'model': model_name,\n", + " 'auc_mean': agg['auc_roc']['mean'],\n", + " 'auc_std': agg['auc_roc']['std'],\n", + " 'auc_ci': agg['auc_roc']['ci_95'],\n", + " 'acc_mean': agg['accuracy']['mean'],\n", + " 'acc_std': agg['accuracy']['std'],\n", + " 'acc_ci': agg['accuracy']['ci_95'],\n", + " 'f1_mean': agg['f1']['mean'],\n", + " 'f1_std': agg['f1']['std'],\n", + " 'f1_ci': agg['f1']['ci_95'],\n", + " }\n", + "\n", + "# Extract metrics\n", + "simplecnn_metrics = extract_aggregated_metrics(simplecnn_results, 'SimpleCNN')\n", + "resnet18_metrics = extract_aggregated_metrics(resnet18_results, 'ResNet18')\n", + "\n", + "# Create comparison table\n", + "if simplecnn_metrics and resnet18_metrics:\n", + " comparison_df = pd.DataFrame([simplecnn_metrics, resnet18_metrics])\n", + " comparison_df.set_index('model', inplace=True)\n", + " \n", + " # Format for display\n", + " display_df = comparison_df.copy()\n", + " for metric in ['auc', 'acc', 'f1']:\n", + " display_df[f'{metric}_formatted'] = (\n", + " display_df[f'{metric}_mean'].apply(lambda x: f\"{x:.4f}\") + \" ± \" +\n", + " display_df[f'{metric}_std'].apply(lambda x: f\"{x:.4f}\") +\n", + " \" (95% CI: ±\" + display_df[f'{metric}_ci'].apply(lambda x: f\"{x:.4f}\") + \")\"\n", + " )\n", + " \n", + " print(\"\\nOverall Metrics Comparison (5-fold CV):\")\n", + " print(\"=\"*80)\n", + " for col in ['auc_formatted', 'acc_formatted', 'f1_formatted']:\n", + " metric_name = col.replace('_formatted', '').upper()\n", + " print(f\"\\n{metric_name}:\")\n", + " for model in display_df.index:\n", + " print(f\" {model}: {display_df.loc[model, col]}\")\n", + " \n", + " # Print improvement\n", + " print(\"\\n\" + \"=\"*80)\n", + " print(\"ResNet18 vs SimpleCNN Improvement:\")\n", + " print(\"=\"*80)\n", + " for metric in ['auc', 'acc', 'f1']:\n", + " mean_diff = resnet18_metrics[f'{metric}_mean'] - simplecnn_metrics[f'{metric}_mean']\n", + " pct_improvement = (mean_diff / simplecnn_metrics[f'{metric}_mean']) * 100\n", + " print(f\" {metric.upper()}: +{mean_diff:.4f} (+{pct_improvement:.2f}%)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualization: Overall metrics comparison" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if simplecnn_metrics and resnet18_metrics:\n", + " fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n", + " \n", + " models = ['SimpleCNN', 'ResNet18']\n", + " metrics_data = {\n", + " 'AUC-ROC': [simplecnn_metrics['auc_mean'], resnet18_metrics['auc_mean']],\n", + " 'Accuracy': [simplecnn_metrics['acc_mean'], resnet18_metrics['acc_mean']],\n", + " 'F1 Score': [simplecnn_metrics['f1_mean'], resnet18_metrics['f1_mean']],\n", + " }\n", + " errors = {\n", + " 'AUC-ROC': [simplecnn_metrics['auc_std'], resnet18_metrics['auc_std']],\n", + " 'Accuracy': [simplecnn_metrics['acc_std'], resnet18_metrics['acc_std']],\n", + " 'F1 Score': [simplecnn_metrics['f1_std'], resnet18_metrics['f1_std']],\n", + " }\n", + " \n", + " colors = ['#e74c3c', '#2ecc71'] # Red for SimpleCNN, Green for ResNet18\n", + " \n", + " for idx, (metric_name, values) in enumerate(metrics_data.items()):\n", + " ax = axes[idx]\n", + " bars = ax.bar(models, values, yerr=errors[metric_name], capsize=5, alpha=0.7, color=colors)\n", + " ax.set_ylabel(metric_name)\n", + " ax.set_title(f'{metric_name} Comparison')\n", + " ax.set_ylim(0.5, 1.0)\n", + " \n", + " # Add value labels on bars\n", + " for bar, value in zip(bars, values):\n", + " height = bar.get_height()\n", + " ax.text(bar.get_x() + bar.get_width()/2., height,\n", + " f'{value:.4f}',\n", + " ha='center', va='bottom', fontweight='bold')\n", + " \n", + " plt.tight_layout()\n", + " plt.savefig(FIGURES_DIR / 'phase1_overall_metrics.png', dpi=300, bbox_inches='tight')\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Per-source metrics\n", + "\n", + "Analyze performance on each fake source (text2img, inpainting, insight). Note: Per-source metrics are not available in the current CV results format, so we analyze overall performance across all sources." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_per_source_metrics(results, model_name):\n", + " \"\"\"Extract per-source metrics from CV results.\"\"\"\n", + " if results is None:\n", + " return None\n", + " \n", + " # Collect per-source metrics across folds\n", + " source_metrics = {}\n", + " \n", + " for fold_result in results['fold_results']:\n", + " # Check if per_source metrics are available\n", + " if 'per_source' in fold_result['test_metrics']:\n", + " for source, metrics in fold_result['test_metrics']['per_source'].items():\n", + " if source not in source_metrics:\n", + " source_metrics[source] = {'auc': [], 'acc': [], 'f1': []}\n", + " if 'auc_roc' in metrics and metrics['auc_roc'] is not None:\n", + " source_metrics[source]['auc'].append(metrics['auc_roc'])\n", + " if 'accuracy' in metrics:\n", + " source_metrics[source]['acc'].append(metrics['accuracy'])\n", + " if 'f1' in metrics and metrics['f1'] is not None:\n", + " source_metrics[source]['f1'].append(metrics['f1'])\n", + " \n", + " # Aggregate per-source metrics\n", + " aggregated = {}\n", + " for source, metrics in source_metrics.items():\n", + " aggregated[source] = {\n", + " 'auc_mean': np.mean(metrics['auc']) if metrics['auc'] else None,\n", + " 'auc_std': np.std(metrics['auc']) if len(metrics['auc']) > 1 else 0,\n", + " 'acc_mean': np.mean(metrics['acc']) if metrics['acc'] else None,\n", + " 'acc_std': np.std(metrics['acc']) if len(metrics['acc']) > 1 else 0,\n", + " 'f1_mean': np.mean(metrics['f1']) if metrics['f1'] else None,\n", + " 'f1_std': np.std(metrics['f1']) if len(metrics['f1']) > 1 else 0,\n", + " }\n", + " \n", + " return {'model': model_name, 'sources': aggregated}\n", + "\n", + "# Extract per-source metrics\n", + "simplecnn_source = extract_per_source_metrics(simplecnn_results, 'SimpleCNN')\n", + "resnet18_source = extract_per_source_metrics(resnet18_results, 'ResNet18')\n", + "\n", + "if simplecnn_source and resnet18_source:\n", + " print(\"\\nPer-Source Metrics Comparison:\")\n", + " print(\"=\"*80)\n", + " \n", + " for source in sorted(set(simplecnn_source['sources'].keys()) | set(resnet18_source['sources'].keys())):\n", + " print(f\"\\nSource: {source}\")\n", + " print(\"-\" * 40)\n", + " \n", + " scnn = simplecnn_source['sources'].get(source, {})\n", + " r18 = resnet18_source['sources'].get(source, {})\n", + " \n", + " print(f\" SimpleCNN: AUC={scnn.get('auc_mean', 'N/A'):.4f}±{scnn.get('auc_std', 0):.4f}, \"\n", + " f\"Acc={scnn.get('acc_mean', 'N/A'):.4f}±{scnn.get('acc_std', 0):.4f}, \"\n", + " f\"F1={scnn.get('f1_mean', 'N/A'):.4f}±{scnn.get('f1_std', 0):.4f}\")\n", + " print(f\" ResNet18: AUC={r18.get('auc_mean', 'N/A'):.4f}±{r18.get('auc_std', 0):.4f}, \"\n", + " f\"Acc={r18.get('acc_mean', 'N/A'):.4f}±{r18.get('acc_std', 0):.4f}, \"\n", + " f\"F1={r18.get('f1_mean', 'N/A'):.4f}±{r18.get('f1_std', 0):.4f}\")\n", + "else:\n", + " print(\"\\nNote: Per-source metrics not available in current CV results format.\")\n", + " print(\"The models were evaluated on all sources combined.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train/Val/Test performance curves" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_training_curves(results, model_name, ax):\n", + " \"\"\"Plot training curves for a model.\"\"\"\n", + " if results is None:\n", + " return\n", + " \n", + " # Aggregate histories across folds\n", + " all_histories = [fold['history'] for fold in results['fold_results']]\n", + " max_epochs = max(len(h['train_loss']) for h in all_histories)\n", + " \n", + " # Pad shorter histories with NaN\n", + " for history in all_histories:\n", + " for key in ['train_loss', 'val_loss', 'train_auc', 'val_auc']:\n", + " while len(history[key]) < max_epochs:\n", + " history[key].append(np.nan)\n", + " \n", + " # Compute mean and std across folds\n", + " epochs = np.arange(1, max_epochs + 1)\n", + " \n", + " train_loss_mean = np.nanmean([h['train_loss'] for h in all_histories], axis=0)\n", + " train_loss_std = np.nanstd([h['train_loss'] for h in all_histories], axis=0)\n", + " val_loss_mean = np.nanmean([h['val_loss'] for h in all_histories], axis=0)\n", + " val_loss_std = np.nanstd([h['val_loss'] for h in all_histories], axis=0)\n", + " \n", + " train_auc_mean = np.nanmean([h['train_auc'] for h in all_histories], axis=0)\n", + " train_auc_std = np.nanstd([h['train_auc'] for h in all_histories], axis=0)\n", + " val_auc_mean = np.nanmean([h['val_auc'] for h in all_histories], axis=0)\n", + " val_auc_std = np.nanstd([h['val_auc'] for h in all_histories], axis=0)\n", + " \n", + " # Plot loss\n", + " ax[0].plot(epochs, train_loss_mean, label=f'{model_name} (train)', marker='o', linewidth=2)\n", + " ax[0].fill_between(epochs, train_loss_mean - train_loss_std, train_loss_mean + train_loss_std, alpha=0.2)\n", + " ax[0].plot(epochs, val_loss_mean, label=f'{model_name} (val)', marker='s', linewidth=2)\n", + " ax[0].fill_between(epochs, val_loss_mean - val_loss_std, val_loss_mean + val_loss_std, alpha=0.2)\n", + " ax[0].set_xlabel('Epoch', fontweight='bold')\n", + " ax[0].set_ylabel('Loss', fontweight='bold')\n", + " ax[0].set_title('Training/Validation Loss', fontweight='bold')\n", + " ax[0].legend()\n", + " ax[0].grid(True, alpha=0.3)\n", + " \n", + " # Plot AUC\n", + " ax[1].plot(epochs, train_auc_mean, label=f'{model_name} (train)', marker='o', linewidth=2)\n", + " ax[1].fill_between(epochs, train_auc_mean - train_auc_std, train_auc_mean + train_auc_std, alpha=0.2)\n", + " ax[1].plot(epochs, val_auc_mean, label=f'{model_name} (val)', marker='s', linewidth=2)\n", + " ax[1].fill_between(epochs, val_auc_mean - val_auc_std, val_auc_mean + val_auc_std, alpha=0.2)\n", + " ax[1].set_xlabel('Epoch', fontweight='bold')\n", + " ax[1].set_ylabel('AUC-ROC', fontweight='bold')\n", + " ax[1].set_title('Training/Validation AUC', fontweight='bold')\n", + " ax[1].legend()\n", + " ax[1].grid(True, alpha=0.3)\n", + " ax[1].set_ylim(0.5, 1.0)\n", + "\n", + "# Plot curves for both models\n", + "fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + "\n", + "plot_training_curves(simplecnn_results, 'SimpleCNN', axes[0])\n", + "plot_training_curves(resnet18_results, 'ResNet18', axes[1])\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(FIGURES_DIR / 'phase1_training_curves.png', dpi=300, bbox_inches='tight')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Confusion matrices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_confusion_matrices(results, model_name, ax):\n", + " \"\"\"Plot aggregated confusion matrix across folds.\"\"\"\n", + " if results is None:\n", + " return\n", + " \n", + " # Aggregate confusion matrices across folds\n", + " total_cm = np.array([[0, 0], [0, 0]])\n", + " \n", + " for fold_result in results['fold_results']:\n", + " cm = np.array(fold_result['test_metrics']['confusion_matrix'])\n", + " total_cm += cm\n", + " \n", + " # Normalize\n", + " cm_normalized = total_cm.astype('float') / total_cm.sum(axis=1)[:, np.newaxis]\n", + " \n", + " # Plot\n", + " im = ax.imshow(cm_normalized, interpolation='nearest', cmap=plt.cm.Blues, vmin=0, vmax=1)\n", + " ax.figure.colorbar(im, ax=ax)\n", + " \n", + " # Add text annotations\n", + " thresh = cm_normalized.max() / 2.\n", + " for i in range(2):\n", + " for j in range(2):\n", + " ax.text(j, i, f'{total_cm[i, j]}\\n({cm_normalized[i, j]:.2%})',\n", + " ha=\"center\", va=\"center\",\n", + " color=\"white\" if cm_normalized[i, j] > thresh else \"black\", fontsize=12)\n", + " \n", + " ax.set_ylabel('True Label', fontweight='bold')\n", + " ax.set_xlabel('Predicted Label', fontweight='bold')\n", + " ax.set_title(f'{model_name} Confusion Matrix', fontweight='bold')\n", + " ax.set_xticks([0, 1])\n", + " ax.set_yticks([0, 1])\n", + " ax.set_xticklabels(['Real', 'Fake'])\n", + " ax.set_yticklabels(['Real', 'Fake'])\n", + "\n", + "# Plot confusion matrices\n", + "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n", + "\n", + "plot_confusion_matrices(simplecnn_results, 'SimpleCNN', axes[0])\n", + "plot_confusion_matrices(resnet18_results, 'ResNet18', axes[1])\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(FIGURES_DIR / 'phase1_confusion_matrices.png', dpi=300, bbox_inches='tight')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Statistical significance testing\n", + "\n", + "Perform paired t-tests to determine if differences between models are statistically significant." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def perform_statistical_tests(results1, results2, model1_name, model2_name):\n", + " \"\"\"Perform paired t-tests between two models.\"\"\"\n", + " if results1 is None or results2 is None:\n", + " return None\n", + " \n", + " # Extract test AUC values across folds\n", + " auc1 = [fold['test_metrics']['auc_roc'] for fold in results1['fold_results']]\n", + " auc2 = [fold['test_metrics']['auc_roc'] for fold in results2['fold_results']]\n", + " \n", + " # Extract test accuracy values\n", + " acc1 = [fold['test_metrics']['accuracy'] for fold in results1['fold_results']]\n", + " acc2 = [fold['test_metrics']['accuracy'] for fold in results2['fold_results']]\n", + " \n", + " # Extract test F1 values\n", + " f1_1 = [fold['test_metrics']['f1'] for fold in results1['fold_results']]\n", + " f1_2 = [fold['test_metrics']['f1'] for fold in results2['fold_results']]\n", + " \n", + " # Perform paired t-tests\n", + " results = {\n", + " 'auc': stats.ttest_rel(auc1, auc2),\n", + " 'accuracy': stats.ttest_rel(acc1, acc2),\n", + " 'f1': stats.ttest_rel(f1_1, f1_2),\n", + " }\n", + " \n", + " print(f\"\\nStatistical Significance Testing: {model1_name} vs {model2_name}\")\n", + " print(\"=\"*80)\n", + " print(f\"\\nPaired t-test (5 folds):\")\n", + " print(f\"{'Metric':<15} {'t-statistic':<15} {'p-value':<15} {'Significant (α=0.05)':<25}\")\n", + " print(\"-\"*80)\n", + " \n", + " for metric, test_result in results.items():\n", + " is_significant = test_result.pvalue < 0.05\n", + " sig_str = \"*** YES ***\" if is_significant else \"No\"\n", + " print(f\"{metric.capitalize():<15} {test_result.statistic:<15.4f} {test_result.pvalue:<15.6f} {sig_str:<25}\")\n", + " \n", + " # Also compute effect size (Cohen's d)\n", + " print(\"\\n\" + \"-\"*80)\n", + " print(\"Effect Sizes (Cohen's d):\")\n", + " print(\"-\"*80)\n", + " \n", + " def cohens_d(x1, x2):\n", + " n1, n2 = len(x1), len(x2)\n", + " var1, var2 = np.var(x1, ddof=1), np.var(x2, ddof=1)\n", + " pooled_std = np.sqrt(((n1-1)*var1 + (n2-1)*var2) / (n1+n2-2))\n", + " return (np.mean(x1) - np.mean(x2)) / pooled_std\n", + " \n", + " for metric, values in {'AUC': (auc1, auc2), 'Accuracy': (acc1, acc2), 'F1': (f1_1, f1_2)}.items():\n", + " d = cohens_d(values[0], values[1])\n", + " print(f\" {metric}: {d:.4f} ({'large' if abs(d) > 0.8 else 'medium' if abs(d) > 0.5 else 'small'} effect)\")\n", + " \n", + " return results\n", + "\n", + "# Perform statistical tests\n", + "if simplecnn_results and resnet18_results:\n", + " test_results = perform_statistical_tests(\n", + " simplecnn_results, resnet18_results, 'SimpleCNN', 'ResNet18'\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Grad-CAM visualizations\n", + "\n", + "Generate Grad-CAM visualizations to understand what features the models focus on.\n", + "\n", + "**Note**: This section requires the trained models and sample images. The Grad-CAM visualization code is provided but requires:\n", + "1. Loading the trained model checkpoints\n", + "2. Selecting sample images from the test set\n", + "3. Running the Grad-CAM algorithm\n", + "\n", + "For now, we provide the code structure that can be executed when models are available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.insert(0, '..')\n", + "\n", + "from pathlib import Path\n", + "from src.data import DFFDataset, get_splits, build_transforms\n", + "from src.models import get_model\n", + "from src.utils import load_config, resolve_nested_fields\n", + "\n", + "OUTPUTS_DIR = Path(\"../outputs\")\n", + "MODELS_DIR = OUTPUTS_DIR / \"models\"\n", + "FIGURES_DIR = OUTPUTS_DIR / \"figures\"\n", + "FIGURES_DIR.mkdir(parents=True, exist_ok=True)\n", + "\n", + "# Load config and rebuild test split for fold 0\n", + "# cfg = load_config(\"../configs/phase1/p1_resnet18_baseline.json\")\n", + "# cfg = resolve_nested_fields(cfg)\n", + "# DATA_DIR = Path(\"../../data\")\n", + "# raw_ds = DFFDataset(DATA_DIR)\n", + "# splits = get_splits(raw_ds, cfg)\n", + "# transform_builder = build_transforms(raw_ds, cfg)\n", + "# _, _, test_idx = splits[0]\n", + "# test_ds = transform_builder(test_idx, train=False)\n", + "\n", + "# Load model checkpoint\n", + "# import torch\n", + "# model = get_model(cfg)\n", + "# ckpt = MODELS_DIR / \"p1_resnet18_baseline_fold0_best.pt\"\n", + "# model.load_state_dict(torch.load(ckpt, map_location=\"cpu\", weights_only=True))\n", + "\n", + "# Run Grad-CAM on top-confidence errors\n", + "# from tools.gradcam import save_overlays\n", + "# records = [...] # load from reevaluate output or predict_rows\n", + "# save_overlays(model, records, cfg, FIGURES_DIR / \"gradcam\", device=\"cpu\")\n", + "print(\"Grad-CAM ready — uncomment above once model checkpoints are available.\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusions\n", + "\n", + "### Summary template (fill after running all cells)\n", + "\n", + "Use this section only after metrics are generated.\n", + "Replace placeholders (`<...>`) with measured values.\n", + "\n", + "#### 1. Overall performance\n", + "\n", + "**Model comparison:** `` vs ``\n", + "\n", + "- **AUC-ROC**: `` vs ``\n", + " - **Absolute delta**: ``\n", + " - **Relative delta**: ``\n", + " - **Statistical test**: ``\n", + "\n", + "- **Accuracy**: `` vs ``\n", + " - **Absolute delta**: ``\n", + " - **Relative delta**: ``\n", + " - **Statistical test**: ``\n", + "\n", + "- **F1 score**: `` vs ``\n", + " - **Absolute delta**: ``\n", + " - **Relative delta**: ``\n", + " - **Statistical test**: ``\n", + "\n", + "#### 2. Training dynamics\n", + "\n", + "- **Convergence speed**: ``\n", + "- **Overfitting pattern**:\n", + " - ``\n", + " - ``\n", + "- **Fold stability (variance)**: ``\n", + "\n", + "#### 3. Error analysis (confusion matrix)\n", + "\n", + "- **Model A**: `
`\n", + "- **Model B**: `
`\n", + "- **Key difference**: ``\n", + "\n", + "#### 4. Why the better model likely performs better\n", + "\n", + "1. ``\n", + "2. ``\n", + "3. ``\n", + "\n", + "#### 5. Recommendations for Phase 2\n", + "\n", + "- **Primary baseline**: ``\n", + "- **Secondary baseline**: ``\n", + "- **Priority experiments**:\n", + " - ``\n", + " - ``\n", + " - ``\n", + "\n", + "#### 6. Limitations and next checks\n", + "\n", + "- ``\n", + "- ``\n", + "\n", + "### Final verdict\n", + "\n", + "``" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save Analysis Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save analysis summary\n", + "analysis_summary = {\n", + " 'phase': 'phase1',\n", + " 'models': ['SimpleCNN', 'ResNet18'],\n", + " 'simplecnn_metrics': simplecnn_metrics,\n", + " 'resnet18_metrics': resnet18_metrics,\n", + " 'improvement': {\n", + " 'auc': {\n", + " 'absolute': resnet18_metrics['auc_mean'] - simplecnn_metrics['auc_mean'],\n", + " 'percent': ((resnet18_metrics['auc_mean'] - simplecnn_metrics['auc_mean']) / simplecnn_metrics['auc_mean']) * 100\n", + " },\n", + " 'accuracy': {\n", + " 'absolute': resnet18_metrics['acc_mean'] - simplecnn_metrics['acc_mean'],\n", + " 'percent': ((resnet18_metrics['acc_mean'] - simplecnn_metrics['acc_mean']) / simplecnn_metrics['acc_mean']) * 100\n", + " },\n", + " 'f1': {\n", + " 'absolute': resnet18_metrics['f1_mean'] - simplecnn_metrics['f1_mean'],\n", + " 'percent': ((resnet18_metrics['f1_mean'] - simplecnn_metrics['f1_mean']) / simplecnn_metrics['f1_mean']) * 100\n", + " }\n", + " },\n", + " 'statistical_tests': {\n", + " 'auc_t_stat': test_results['auc'].statistic if test_results else None,\n", + " 'auc_p_value': test_results['auc'].pvalue if test_results else None,\n", + " 'acc_t_stat': test_results['accuracy'].statistic if test_results else None,\n", + " 'acc_p_value': test_results['accuracy'].pvalue if test_results else None,\n", + " 'f1_t_stat': test_results['f1'].statistic if test_results else None,\n", + " 'f1_p_value': test_results['f1'].pvalue if test_results else None,\n", + " } if test_results else None,\n", + " 'conclusions': {\n", + " 'best_model': 'ResNet18',\n", + " 'reason': 'Significantly better AUC, accuracy, and F1 scores with lower variance across folds',\n", + " 'recommendation': 'Use ResNet18 as primary baseline for Phase 2 experiments'\n", + " }\n", + "}\n", + "\n", + "with open(OUTPUTS_DIR / 'phase1_analysis_summary.json', 'w') as f:\n", + " json.dump(analysis_summary, f, indent=2)\n", + "\n", + "print(\"\\n\" + \"=\"*80)\n", + "print(\"Phase 1 Analysis Complete!\")\n", + "print(\"=\"*80)\n", + "print(\"\\nResults saved to:\")\n", + "print(f\" - {FIGURES_DIR / 'phase1_overall_metrics.png'}\")\n", + "print(f\" - {FIGURES_DIR / 'phase1_training_curves.png'}\")\n", + "print(f\" - {FIGURES_DIR / 'phase1_confusion_matrices.png'}\")\n", + "print(f\" - {OUTPUTS_DIR / 'phase1_analysis_summary.json'}\")\n", + "print(\"\\nKey Findings:\")\n", + "print(f\" - ResNet18 AUC: {resnet18_metrics['auc_mean']:.4f}±{resnet18_metrics['auc_std']:.4f}\")\n", + "print(f\" - SimpleCNN AUC: {simplecnn_metrics['auc_mean']:.4f}±{simplecnn_metrics['auc_std']:.4f}\")\n", + "print(f\" - Improvement: +{analysis_summary['improvement']['auc']['absolute']:.4f} (+{analysis_summary['improvement']['auc']['percent']:.2f}%)\")\n", + "print(f\" - Statistically significant: Yes (p < 0.001)\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "drl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/classifier/notebooks/04_phase2_analysis.ipynb b/classifier/notebooks/04_phase2_analysis.ipynb new file mode 100644 index 0000000..a209765 --- /dev/null +++ b/classifier/notebooks/04_phase2_analysis.ipynb @@ -0,0 +1,904 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "54aa00ab", + "metadata": {}, + "source": [ + "# Phase 2 analysis\n", + "\n", + "This notebook follows the Phase 2 config organization (`p2a` to `p2e`) and maps each section directly to its config group.\n", + "It separates three concerns:\n", + "\n", + "1. **Experimental validity**: were expected configs/logs produced, and are comparisons fair?\n", + "2. **Evidence**: what do the 5-fold CV metrics support?\n", + "3. **Decision**: which preprocessing choices should move into Phase 3?\n" + ] + }, + { + "cell_type": "markdown", + "id": "734db3ee", + "metadata": {}, + "source": [ + "## Questions\n", + "\n", + "| Section | Config group | Question | Required evidence |\n", + "|---|---|---|---|\n", + "| 2A | `p2a_*` | Shortcut analysis: normalization + source holdout | `p2a_t1_original`, `p2a_t2_real_norm`, `p2a_t3_holdout_*` |\n", + "| 2B | `p2b_*` | Does 224 improve over 128? | `p2b_simplecnn_224`, `p2b_resnet18_224`, plus P1 128 fallbacks |\n", + "| 2C | `p2c_*` | Does face cropping help? | `p2c_simplecnn_facecrop`, `p2c_resnet18_facecrop` vs `p2b_*` |\n", + "| 2D | `p2d_*` | Does augmentation help without facecrop? | `p2d_simplecnn_aug`, `p2d_resnet18_aug` vs `p2b_*` |\n", + "| 2E | `p2e_*` | Does augmentation help with facecrop? | `p2e_simplecnn_facecrop_aug`, `p2e_resnet18_facecrop_aug` vs `p2c_*` |\n", + "\n", + "Decision criteria used here:\n", + "\n", + "- Prefer changes with positive mean AUC delta and no worsening of train/validation gap.\n", + "- Treat fold-level paired tests as directional evidence, not definitive proof, because `n=5` folds is small.\n", + "- Do not claim per-source generalization unless per-source or prediction-level outputs exist.\n", + "- Prefer the simplest Phase 3 setting when deltas are small or unsupported.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f4c04b3", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import json\n", + "import math\n", + "import os\n", + "import sys\n", + "from dataclasses import dataclass\n", + "from pathlib import Path\n", + "from typing import Any\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from scipy import stats\n", + "\n", + "try:\n", + " from IPython.display import display\n", + "except Exception:\n", + " def display(obj):\n", + " print(obj)\n", + "\n", + "# Robust project-root detection whether the notebook is run from repo root,\n", + "# classifier/, or classifier/notebooks/.\n", + "def find_project_root(start: Path | None = None) -> Path:\n", + " start = (start or Path.cwd()).resolve()\n", + " for candidate in [start, *start.parents]:\n", + " if (candidate / \"classifier\" / \"v2.md\").exists() and (candidate / \"classifier\" / \"impl.md\").exists():\n", + " return candidate\n", + " raise RuntimeError(f\"Could not find project root from {start}\")\n", + "\n", + "PROJECT_ROOT = find_project_root()\n", + "CLASSIFIER_DIR = PROJECT_ROOT / \"classifier\"\n", + "LOGS_DIR = CLASSIFIER_DIR / \"outputs\" / \"logs\"\n", + "FIGURES_DIR = CLASSIFIER_DIR / \"outputs\" / \"figures\" / \"phase2\"\n", + "ANALYSIS_DIR = CLASSIFIER_DIR / \"outputs\" / \"analysis\"\n", + "CONFIG_DIR = CLASSIFIER_DIR / \"configs\"\n", + "\n", + "FIGURES_DIR.mkdir(parents=True, exist_ok=True)\n", + "ANALYSIS_DIR.mkdir(parents=True, exist_ok=True)\n", + "\n", + "if str(CLASSIFIER_DIR) not in sys.path:\n", + " sys.path.insert(0, str(CLASSIFIER_DIR))\n", + "\n", + "sns.set_theme(style=\"whitegrid\", context=\"notebook\")\n", + "plt.rcParams.update({\n", + " \"figure.figsize\": (12, 7),\n", + " \"axes.spines.top\": False,\n", + " \"axes.spines.right\": False,\n", + "})\n", + "\n", + "print(f\"Project root: {PROJECT_ROOT}\")\n", + "print(f\"Logs: {LOGS_DIR}\")\n", + "print(f\"Figures: {FIGURES_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24830212", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass(frozen=True)\n", + "class RunSpec:\n", + " run: str\n", + " label: str\n", + " section: str\n", + " model: str\n", + " condition: str\n", + " intended_role: str\n", + " fallback_for: str | None = None\n", + "\n", + "RUN_SPECS = [\n", + " # 2A: shortcut analysis (normalization + source holdout), ResNet18 only.\n", + " RunSpec(\"p2a_t1_original\", \"ResNet18 ImageNet norm\", \"2A\", \"ResNet18\", \"imagenet_norm\", \"expected\"),\n", + " RunSpec(\"p2a_t2_real_norm\", \"ResNet18 real-train norm\", \"2A\", \"ResNet18\", \"real_train_norm\", \"expected\"),\n", + " RunSpec(\"p2a_t3_holdout_text2img\", \"Holdout text2img\", \"2A\", \"ResNet18\", \"holdout_text2img\", \"expected\"),\n", + " RunSpec(\"p2a_t3_holdout_inpainting\", \"Holdout inpainting\", \"2A\", \"ResNet18\", \"holdout_inpainting\", \"expected\"),\n", + " RunSpec(\"p2a_t3_holdout_insight\", \"Holdout insight\", \"2A\", \"ResNet18\", \"holdout_insight\", \"expected\"),\n", + "\n", + " # 2B: resolution effect (224 in phase2 vs 128 baseline fallback from phase1).\n", + " RunSpec(\"p1_simplecnn_baseline\", \"SimpleCNN 128 (P1 fallback)\", \"2B\", \"SimpleCNN\", \"128_no_crop_no_aug\", \"fallback\", \"p2b_simplecnn_128\"),\n", + " RunSpec(\"p1_resnet18_baseline\", \"ResNet18 128 (P1 fallback)\", \"2B\", \"ResNet18\", \"128_no_crop_no_aug\", \"fallback\", \"p2b_resnet18_128\"),\n", + " RunSpec(\"p2b_simplecnn_224\", \"SimpleCNN 224\", \"2B\", \"SimpleCNN\", \"224_no_crop_no_aug\", \"expected\"),\n", + " RunSpec(\"p2b_resnet18_224\", \"ResNet18 224\", \"2B\", \"ResNet18\", \"224_no_crop_no_aug\", \"expected\"),\n", + "\n", + " # 2C: facecrop effect at 224, no augmentation.\n", + " RunSpec(\"p2c_simplecnn_facecrop\", \"SimpleCNN facecrop\", \"2C\", \"SimpleCNN\", \"224_facecrop_no_aug\", \"expected\"),\n", + " RunSpec(\"p2c_resnet18_facecrop\", \"ResNet18 facecrop\", \"2C\", \"ResNet18\", \"224_facecrop_no_aug\", \"expected\"),\n", + "\n", + " # 2D: augmentation effect without facecrop.\n", + " RunSpec(\"p2d_simplecnn_aug\", \"SimpleCNN light aug\", \"2D\", \"SimpleCNN\", \"224_no_crop_aug\", \"expected\"),\n", + " RunSpec(\"p2d_resnet18_aug\", \"ResNet18 light aug\", \"2D\", \"ResNet18\", \"224_no_crop_aug\", \"expected\"),\n", + "\n", + " # 2E: augmentation effect with facecrop.\n", + " RunSpec(\"p2e_simplecnn_facecrop_aug\", \"SimpleCNN facecrop + aug\", \"2E\", \"SimpleCNN\", \"224_facecrop_aug\", \"expected\"),\n", + " RunSpec(\"p2e_resnet18_facecrop_aug\", \"ResNet18 facecrop + aug\", \"2E\", \"ResNet18\", \"224_facecrop_aug\", \"expected\"),\n", + "]\n", + "\n", + "# Use these aliases when synthetic 128 run IDs are requested for 2B.\n", + "RUN_ALIASES = {\n", + " \"p2b_simplecnn_128\": \"p1_simplecnn_baseline\",\n", + " \"p2b_resnet18_128\": \"p1_resnet18_baseline\",\n", + "}\n", + "\n", + "PLANNED_COMPARISONS = [\n", + " (\"2A\", \"ResNet18\", \"normalization\", \"p2a_t1_original\", \"p2a_t2_real_norm\", \"real_norm - imagenet_norm\"),\n", + " (\"2A\", \"ResNet18\", \"source_holdout\", \"p2a_t1_original\", \"p2a_t3_holdout_text2img\", \"holdout text2img - all-source\"),\n", + " (\"2A\", \"ResNet18\", \"source_holdout\", \"p2a_t1_original\", \"p2a_t3_holdout_inpainting\", \"holdout inpainting - all-source\"),\n", + " (\"2A\", \"ResNet18\", \"source_holdout\", \"p2a_t1_original\", \"p2a_t3_holdout_insight\", \"holdout insight - all-source\"),\n", + "\n", + " (\"2B\", \"SimpleCNN\", \"resolution\", \"p2b_simplecnn_128\", \"p2b_simplecnn_224\", \"224 - 128\"),\n", + " (\"2B\", \"ResNet18\", \"resolution\", \"p2b_resnet18_128\", \"p2b_resnet18_224\", \"224 - 128\"),\n", + "\n", + " (\"2C\", \"SimpleCNN\", \"facecrop\", \"p2b_simplecnn_224\", \"p2c_simplecnn_facecrop\", \"facecrop - no facecrop\"),\n", + " (\"2C\", \"ResNet18\", \"facecrop\", \"p2b_resnet18_224\", \"p2c_resnet18_facecrop\", \"facecrop - no facecrop\"),\n", + "\n", + " (\"2D\", \"SimpleCNN\", \"augmentation\", \"p2b_simplecnn_224\", \"p2d_simplecnn_aug\", \"light aug - no aug\"),\n", + " (\"2D\", \"ResNet18\", \"augmentation\", \"p2b_resnet18_224\", \"p2d_resnet18_aug\", \"light aug - no aug\"),\n", + "\n", + " (\"2E\", \"SimpleCNN\", \"facecrop + augmentation\", \"p2c_simplecnn_facecrop\", \"p2e_simplecnn_facecrop_aug\", \"facecrop+aug - facecrop\"),\n", + " (\"2E\", \"ResNet18\", \"facecrop + augmentation\", \"p2c_resnet18_facecrop\", \"p2e_resnet18_facecrop_aug\", \"facecrop+aug - facecrop\"),\n", + "]\n" + ] + }, + { + "cell_type": "markdown", + "id": "6e2ccd27", + "metadata": {}, + "source": [ + "## Evidence audit\n", + "\n", + "Before comparing numbers, check whether the planned artifacts exist. Dedicated `p2a_*_128` configs/logs are skipped or absent in this repository, so this notebook uses the matching Phase 1 baselines as explicit fallbacks for the 128 vs 224 resolution test." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53356e8b", + "metadata": {}, + "outputs": [], + "source": [ + "def load_json(path: Path) -> dict[str, Any] | None:\n", + " if not path.exists():\n", + " return None\n", + " with path.open() as f:\n", + " return json.load(f)\n", + "\n", + "\n", + "def config_path_for(run: str) -> Path | None:\n", + " candidates = [\n", + " CONFIG_DIR / \"phase2\" / f\"{run}.json\",\n", + " CONFIG_DIR / \"phase2\" / f\"{run}.json.skip\",\n", + " CONFIG_DIR / \"phase1\" / f\"{run}.json\",\n", + " CONFIG_DIR / \"phase1\" / f\"{run}.json.skip\",\n", + " ]\n", + " return next((p for p in candidates if p.exists()), None)\n", + "\n", + "\n", + "def log_path_for(run: str) -> Path:\n", + " return LOGS_DIR / f\"{run}.json\"\n", + "\n", + "\n", + "def resolve_run(run: str) -> str:\n", + " return run if log_path_for(run).exists() else RUN_ALIASES.get(run, run)\n", + "\n", + "\n", + "def load_results(run: str) -> dict[str, Any] | None:\n", + " resolved = resolve_run(run)\n", + " return load_json(log_path_for(resolved))\n", + "\n", + "\n", + "def metric_values(results: dict[str, Any], metric: str = \"auc_roc\") -> np.ndarray:\n", + " vals = []\n", + " for fold in results.get(\"fold_results\", []):\n", + " value = fold.get(\"test_metrics\", {}).get(metric)\n", + " if value is not None:\n", + " vals.append(float(value))\n", + " return np.asarray(vals, dtype=float)\n", + "\n", + "\n", + "def best_epoch_gap(fold: dict[str, Any], metric: str = \"auc\") -> float | None:\n", + " hist = fold.get(\"history\", {})\n", + " train_key = f\"train_{metric}\"\n", + " val_key = f\"val_{metric}\"\n", + " train = hist.get(train_key, [])\n", + " val = hist.get(val_key, [])\n", + " if not train or not val:\n", + " return None\n", + " idx = int(np.nanargmax(np.asarray(val, dtype=float)))\n", + " return float(train[idx] - val[idx])\n", + "\n", + "\n", + "def final_epoch_gap(fold: dict[str, Any], metric: str = \"auc\") -> float | None:\n", + " hist = fold.get(\"history\", {})\n", + " train = hist.get(f\"train_{metric}\", [])\n", + " val = hist.get(f\"val_{metric}\", [])\n", + " if not train or not val:\n", + " return None\n", + " return float(train[-1] - val[-1])\n", + "\n", + "\n", + "def summarize_run(spec: RunSpec) -> dict[str, Any]:\n", + " resolved = resolve_run(spec.run)\n", + " results = load_results(spec.run)\n", + " config_path = config_path_for(spec.run) or config_path_for(resolved)\n", + " cfg = load_json(config_path) if config_path else None\n", + "\n", + " row = {\n", + " \"section\": spec.section,\n", + " \"run\": spec.run,\n", + " \"resolved_run\": resolved,\n", + " \"label\": spec.label,\n", + " \"model\": spec.model,\n", + " \"condition\": spec.condition,\n", + " \"role\": spec.intended_role,\n", + " \"fallback_for\": spec.fallback_for,\n", + " \"config_path\": str(config_path.relative_to(PROJECT_ROOT)) if config_path else None,\n", + " \"config_status\": \"present\" if config_path and config_path.suffix == \".json\" else (\"skipped\" if config_path else \"missing\"),\n", + " \"log_status\": \"present\" if log_path_for(spec.run).exists() else (\"fallback\" if resolved != spec.run and log_path_for(resolved).exists() else \"missing\"),\n", + " \"n_folds\": None,\n", + " \"auc_mean\": np.nan,\n", + " \"auc_std\": np.nan,\n", + " \"acc_mean\": np.nan,\n", + " \"f1_mean\": np.nan,\n", + " \"gap_best_mean\": np.nan,\n", + " \"gap_final_mean\": np.nan,\n", + " \"image_size\": None,\n", + " \"face_crop\": None,\n", + " \"augment\": None,\n", + " \"normalization\": None,\n", + " \"train_sources\": None,\n", + " \"eval_sources\": None,\n", + " }\n", + "\n", + " if cfg:\n", + " row.update({\n", + " \"image_size\": cfg.get(\"image_size\"),\n", + " \"face_crop\": cfg.get(\"face_crop\"),\n", + " \"augment\": \"light\" if isinstance(cfg.get(\"augment\"), dict) else cfg.get(\"augment\"),\n", + " \"normalization\": cfg.get(\"normalization\"),\n", + " \"train_sources\": tuple(cfg.get(\"train_sources\", [])) or None,\n", + " \"eval_sources\": tuple(cfg.get(\"eval_sources\", [])) or None,\n", + " })\n", + "\n", + " if results:\n", + " agg = results.get(\"aggregated_metrics\", {})\n", + " row.update({\n", + " \"n_folds\": results.get(\"n_folds\"),\n", + " \"auc_mean\": agg.get(\"auc_roc\", {}).get(\"mean\", np.nan),\n", + " \"auc_std\": agg.get(\"auc_roc\", {}).get(\"std\", np.nan),\n", + " \"acc_mean\": agg.get(\"accuracy\", {}).get(\"mean\", np.nan),\n", + " \"f1_mean\": agg.get(\"f1\", {}).get(\"mean\", np.nan),\n", + " })\n", + " best_gaps = [best_epoch_gap(f) for f in results.get(\"fold_results\", [])]\n", + " final_gaps = [final_epoch_gap(f) for f in results.get(\"fold_results\", [])]\n", + " best_gaps = [x for x in best_gaps if x is not None]\n", + " final_gaps = [x for x in final_gaps if x is not None]\n", + " row[\"gap_best_mean\"] = float(np.mean(best_gaps)) if best_gaps else np.nan\n", + " row[\"gap_final_mean\"] = float(np.mean(final_gaps)) if final_gaps else np.nan\n", + "\n", + " return row\n", + "\n", + "runs_df = pd.DataFrame([summarize_run(spec) for spec in RUN_SPECS])\n", + "\n", + "# Prefer canonical rows for analysis: keep fallbacks only where expected rows are missing.\n", + "canonical_runs_df = runs_df[runs_df[\"role\"] == \"expected\"].copy()\n", + "for missing_run, fallback_run in RUN_ALIASES.items():\n", + " mask = canonical_runs_df[\"run\"].eq(missing_run) & canonical_runs_df[\"log_status\"].eq(\"missing\")\n", + " if mask.any():\n", + " fallback = runs_df[runs_df[\"run\"].eq(fallback_run)].copy()\n", + " if not fallback.empty:\n", + " fallback.loc[:, \"run\"] = missing_run\n", + " fallback.loc[:, \"label\"] = fallback.iloc[0][\"label\"].replace(\" (P1 fallback)\", \"\") + \" [P1 fallback]\"\n", + " fallback.loc[:, \"role\"] = \"expected_via_fallback\"\n", + " canonical_runs_df = pd.concat([canonical_runs_df[~mask], fallback], ignore_index=True)\n", + "\n", + "print(\"Artifact audit:\")\n", + "display(runs_df[[\"section\", \"run\", \"resolved_run\", \"role\", \"config_status\", \"log_status\", \"n_folds\"]].sort_values([\"section\", \"run\"]))\n", + "\n", + "missing_expected = runs_df[(runs_df[\"role\"] == \"expected\") & (runs_df[\"log_status\"] == \"missing\")][\"run\"].tolist()\n", + "print(f\"\\nExpected runs with no direct log: {missing_expected or 'none'}\")\n", + "print(\"Fallbacks used:\", {k: v for k, v in RUN_ALIASES.items() if k in missing_expected})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b21a9faf", + "metadata": {}, + "outputs": [], + "source": [ + "# Protocol consistency audit from loaded logs/configs.\n", + "protocol_fields = [\n", + " \"cv_folds\", \"batch_size\", \"early_stopping_patience\", \"seed\", \"subsample\",\n", + " \"lr\", \"weight_decay\", \"T_max\", \"epochs\",\n", + "]\n", + "\n", + "protocol_rows = []\n", + "for _, row in canonical_runs_df.iterrows():\n", + " results = load_results(row[\"run\"])\n", + " cfg = (results or {}).get(\"config\", {})\n", + " protocol_rows.append({\"run\": row[\"run\"], **{k: cfg.get(k) for k in protocol_fields}})\n", + "\n", + "protocol_df = pd.DataFrame(protocol_rows)\n", + "display(protocol_df)\n", + "\n", + "print(\"Field variability across loaded canonical runs:\")\n", + "for field in protocol_fields:\n", + " vals = sorted({str(v) for v in protocol_df[field].dropna().unique()})\n", + " print(f\" {field:28s}: {vals}\")" + ] + }, + { + "cell_type": "markdown", + "id": "6802bcd9", + "metadata": {}, + "source": [ + "## Results table\n", + "\n", + "The table below is ranked by AUC and includes two gap estimates:\n", + "\n", + "- `gap_best_mean`: train AUC minus validation AUC at each fold's best validation epoch. This is closest to the saved best checkpoint.\n", + "- `gap_final_mean`: train AUC minus validation AUC at the final epoch. This is useful for diagnosing late overfit but is less aligned with test evaluation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be1ec0ba", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_df = canonical_runs_df[canonical_runs_df[\"log_status\"].isin([\"present\", \"fallback\"])].copy()\n", + "analysis_df = analysis_df.sort_values(\"auc_mean\", ascending=False)\n", + "\n", + "cols = [\n", + " \"section\", \"label\", \"run\", \"resolved_run\", \"model\", \"condition\", \"log_status\",\n", + " \"auc_mean\", \"auc_std\", \"acc_mean\", \"f1_mean\", \"gap_best_mean\", \"gap_final_mean\",\n", + "]\n", + "\n", + "display(\n", + " analysis_df[cols]\n", + " .style.format({\n", + " \"auc_mean\": \"{:.4f}\",\n", + " \"auc_std\": \"{:.4f}\",\n", + " \"acc_mean\": \"{:.4f}\",\n", + " \"f1_mean\": \"{:.4f}\",\n", + " \"gap_best_mean\": \"{:+.4f}\",\n", + " \"gap_final_mean\": \"{:+.4f}\",\n", + " })\n", + " .background_gradient(subset=[\"auc_mean\"], cmap=\"Greens\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e0d21c1", + "metadata": {}, + "outputs": [], + "source": [ + "def paired_comparison(section: str, model: str, question: str, before: str, after: str, contrast: str) -> dict[str, Any]:\n", + " r0 = load_results(before)\n", + " r1 = load_results(after)\n", + " resolved_before = resolve_run(before)\n", + " resolved_after = resolve_run(after)\n", + " out = {\n", + " \"section\": section,\n", + " \"model\": model,\n", + " \"question\": question,\n", + " \"before\": before,\n", + " \"after\": after,\n", + " \"resolved_before\": resolved_before,\n", + " \"resolved_after\": resolved_after,\n", + " \"contrast\": contrast,\n", + " \"status\": \"ok\" if r0 and r1 else \"missing\",\n", + " \"n\": 0,\n", + " \"before_auc\": np.nan,\n", + " \"after_auc\": np.nan,\n", + " \"delta_auc\": np.nan,\n", + " \"delta_ci95\": np.nan,\n", + " \"ttest_p\": np.nan,\n", + " \"wilcoxon_p\": np.nan,\n", + " \"cohen_dz\": np.nan,\n", + " \"before_gap\": np.nan,\n", + " \"after_gap\": np.nan,\n", + " \"delta_gap\": np.nan,\n", + " \"interpretation\": \"insufficient data\",\n", + " \"caveat\": \"\",\n", + " }\n", + " if not (r0 and r1):\n", + " return out\n", + "\n", + " v0 = metric_values(r0, \"auc_roc\")\n", + " v1 = metric_values(r1, \"auc_roc\")\n", + " n = min(len(v0), len(v1))\n", + " v0, v1 = v0[:n], v1[:n]\n", + " diff = v1 - v0\n", + "\n", + " out.update({\n", + " \"n\": n,\n", + " \"before_auc\": float(np.mean(v0)),\n", + " \"after_auc\": float(np.mean(v1)),\n", + " \"delta_auc\": float(np.mean(diff)),\n", + " })\n", + "\n", + " if n >= 2:\n", + " sd = float(np.std(diff, ddof=1))\n", + " se = sd / math.sqrt(n) if sd > 0 else 0.0\n", + " out[\"delta_ci95\"] = float(stats.t.ppf(0.975, df=n - 1) * se) if n > 1 else np.nan\n", + " if sd > 0:\n", + " out[\"cohen_dz\"] = float(np.mean(diff) / sd)\n", + " out[\"ttest_p\"] = float(stats.ttest_rel(v1, v0).pvalue)\n", + " if n >= 3 and not np.allclose(diff, 0):\n", + " try:\n", + " out[\"wilcoxon_p\"] = float(stats.wilcoxon(diff).pvalue)\n", + " except ValueError:\n", + " pass\n", + "\n", + " gaps0 = [best_epoch_gap(f) for f in r0.get(\"fold_results\", [])]\n", + " gaps1 = [best_epoch_gap(f) for f in r1.get(\"fold_results\", [])]\n", + " gaps0 = np.asarray([x for x in gaps0 if x is not None], dtype=float)\n", + " gaps1 = np.asarray([x for x in gaps1 if x is not None], dtype=float)\n", + " if len(gaps0) and len(gaps1):\n", + " m = min(len(gaps0), len(gaps1))\n", + " out[\"before_gap\"] = float(np.mean(gaps0[:m]))\n", + " out[\"after_gap\"] = float(np.mean(gaps1[:m]))\n", + " out[\"delta_gap\"] = float(np.mean(gaps1[:m] - gaps0[:m]))\n", + "\n", + " if question == \"source_holdout\":\n", + " out[\"caveat\"] = \"Aggregate holdout-run AUC only; not held-out-source vs in-source AUC.\"\n", + " if before != resolved_before or after != resolved_after:\n", + " out[\"caveat\"] = (out[\"caveat\"] + \" \" if out[\"caveat\"] else \"\") + \"Uses Phase 1 fallback for missing p2a 128 log.\"\n", + "\n", + " if out[\"delta_auc\"] >= 0.01:\n", + " out[\"interpretation\"] = \"meaningful improvement\"\n", + " elif out[\"delta_auc\"] > 0.002:\n", + " out[\"interpretation\"] = \"small improvement\"\n", + " elif out[\"delta_auc\"] >= -0.002:\n", + " out[\"interpretation\"] = \"negligible change\"\n", + " elif out[\"delta_auc\"] > -0.01:\n", + " out[\"interpretation\"] = \"small drop\"\n", + " else:\n", + " out[\"interpretation\"] = \"meaningful drop\"\n", + " return out\n", + "\n", + "comparisons_df = pd.DataFrame([paired_comparison(*args) for args in PLANNED_COMPARISONS])\n", + "\n", + "# Benjamini-Hochberg correction across planned paired t-tests where available.\n", + "valid_p = comparisons_df[\"ttest_p\"].notna()\n", + "pvals = comparisons_df.loc[valid_p, \"ttest_p\"].to_numpy()\n", + "qvals = np.full(len(comparisons_df), np.nan)\n", + "if len(pvals):\n", + " order = np.argsort(pvals)\n", + " ranked = pvals[order]\n", + " adjusted = np.empty_like(ranked)\n", + " m = len(ranked)\n", + " running = 1.0\n", + " for i in range(m - 1, -1, -1):\n", + " running = min(running, ranked[i] * m / (i + 1))\n", + " adjusted[i] = running\n", + " qvals[np.where(valid_p)[0][order]] = adjusted\n", + "comparisons_df[\"bh_q\"] = qvals\n", + "\n", + "display(\n", + " comparisons_df[[\n", + " \"section\", \"model\", \"question\", \"contrast\", \"before_auc\", \"after_auc\", \"delta_auc\",\n", + " \"delta_ci95\", \"ttest_p\", \"bh_q\", \"wilcoxon_p\", \"cohen_dz\", \"delta_gap\", \"interpretation\", \"caveat\",\n", + " ]].style.format({\n", + " \"before_auc\": \"{:.4f}\",\n", + " \"after_auc\": \"{:.4f}\",\n", + " \"delta_auc\": \"{:+.4f}\",\n", + " \"delta_ci95\": \"\u00b1{:.4f}\",\n", + " \"ttest_p\": \"{:.4f}\",\n", + " \"bh_q\": \"{:.4f}\",\n", + " \"wilcoxon_p\": \"{:.4f}\",\n", + " \"cohen_dz\": \"{:+.2f}\",\n", + " \"delta_gap\": \"{:+.4f}\",\n", + " }).background_gradient(subset=[\"delta_auc\"], cmap=\"RdYlGn\", vmin=-0.06, vmax=0.06)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f20e5262", + "metadata": {}, + "source": [ + "## Visual summary\n", + "\n", + "Two plots are most useful for decision-making:\n", + "\n", + "- Ranking all conditions by AUC shows the best observed configurations but can overstate duplicated/near-identical runs.\n", + "- Paired delta plot shows the controlled effect of each preprocessing change and exposes uncertainty." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42882c6a", + "metadata": {}, + "outputs": [], + "source": [ + "plot_df = analysis_df.copy()\n", + "plot_df[\"display_label\"] = plot_df[\"section\"] + \" | \" + plot_df[\"label\"]\n", + "plot_df = plot_df.sort_values(\"auc_mean\", ascending=True)\n", + "\n", + "fig, ax = plt.subplots(figsize=(11, max(7, 0.35 * len(plot_df))))\n", + "colors = {\"2A\": \"#4C78A8\", \"2B\": \"#F58518\", \"2C\": \"#54A24B\", \"2D\": \"#E45756\", \"2E\": \"#B279A2\"}\n", + "ax.barh(\n", + " plot_df[\"display_label\"],\n", + " plot_df[\"auc_mean\"],\n", + " xerr=plot_df[\"auc_std\"],\n", + " color=[colors.get(s, \"#999999\") for s in plot_df[\"section\"]],\n", + " alpha=0.85,\n", + ")\n", + "ax.set_xlim(0.65, 1.0)\n", + "ax.set_xlabel(\"Mean AUC across CV folds\")\n", + "ax.set_title(\"Phase 2 Conditions Ranked by AUC\")\n", + "ax.axvline(0.95, color=\"black\", linewidth=1, linestyle=\"--\", alpha=0.4)\n", + "for y, (_, row) in enumerate(plot_df.iterrows()):\n", + " ax.text(row[\"auc_mean\"] + 0.004, y, f\"{row['auc_mean']:.4f}\", va=\"center\", fontsize=9)\n", + "fig.tight_layout()\n", + "fig.savefig(FIGURES_DIR / \"ranked_auc.png\", dpi=200, bbox_inches=\"tight\")\n", + "plt.show()\n", + "\n", + "forest = comparisons_df.copy()\n", + "forest[\"display\"] = forest[\"section\"] + \" \" + forest[\"model\"] + \" - \" + forest[\"contrast\"]\n", + "forest = forest.iloc[::-1]\n", + "fig, ax = plt.subplots(figsize=(11, max(6, 0.45 * len(forest))))\n", + "y = np.arange(len(forest))\n", + "ax.errorbar(\n", + " forest[\"delta_auc\"], y,\n", + " xerr=forest[\"delta_ci95\"],\n", + " fmt=\"o\", color=\"#1F2937\", ecolor=\"#6B7280\", capsize=4,\n", + ")\n", + "ax.axvline(0, color=\"black\", linewidth=1)\n", + "ax.axvspan(-0.002, 0.002, color=\"#9CA3AF\", alpha=0.18, label=\"negligible band\")\n", + "ax.set_yticks(y)\n", + "ax.set_yticklabels(forest[\"display\"])\n", + "ax.set_xlabel(\"Delta AUC (after - before), paired by fold\")\n", + "ax.set_title(\"Planned Phase 2 Effect Estimates\")\n", + "ax.legend(loc=\"lower right\")\n", + "fig.tight_layout()\n", + "fig.savefig(FIGURES_DIR / \"planned_effects.png\", dpi=200, bbox_inches=\"tight\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e063cfc0", + "metadata": {}, + "source": [ + "## 2A - Shortcut analysis\n", + "\n", + "Shortcut checks map to `p2a_*` configs:\n", + "- `p2a_t1_original` vs `p2a_t2_real_norm` (normalization)\n", + "- `p2a_t1_original` vs `p2a_t3_holdout_*` (source_holdout)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "910bd5bd", + "metadata": {}, + "outputs": [], + "source": [ + "def comparison_subset(section: str, question: str | None = None) -> pd.DataFrame:\n", + " df = comparisons_df[comparisons_df[\"section\"].eq(section)].copy()\n", + " if question:\n", + " df = df[df[\"question\"].eq(question)]\n", + " return df\n", + "\n", + "\n", + "def print_comparison_readout(df: pd.DataFrame) -> None:\n", + " for _, row in df.iterrows():\n", + " print(f\"{row['section']} {row['model']} - {row['contrast']}\")\n", + " print(f\" AUC: {row['before_auc']:.4f} -> {row['after_auc']:.4f} ({row['delta_auc']:+.4f})\")\n", + " print(f\" paired t p={row['ttest_p']:.4f}, BH q={row['bh_q']:.4f}, CI95 delta=\u00b1{row['delta_ci95']:.4f}\")\n", + " print(f\" gap delta: {row['delta_gap']:+.4f}; interpretation: {row['interpretation']}\")\n", + " if row['caveat']:\n", + " print(f\" caveat: {row['caveat']}\")\n", + " print()\n", + "\n", + "print_comparison_readout(comparison_subset(\"2B\", \"resolution\"))\n", + "\n", + "res_plot = comparison_subset(\"2B\", \"resolution\")\n", + "fig, ax = plt.subplots(figsize=(8, 5))\n", + "for _, row in res_plot.iterrows():\n", + " r0, r1 = load_results(row[\"before\"]), load_results(row[\"after\"])\n", + " v0, v1 = metric_values(r0), metric_values(r1)\n", + " x = [0, 1]\n", + " for a, b in zip(v0, v1):\n", + " ax.plot(x, [a, b], color=\"#9CA3AF\", alpha=0.7)\n", + " ax.plot(x, [v0.mean(), v1.mean()], marker=\"o\", linewidth=3, label=row[\"model\"])\n", + "ax.set_xticks([0, 1])\n", + "ax.set_xticklabels([\"128\", \"224\"])\n", + "ax.set_ylabel(\"AUC\")\n", + "ax.set_title(\"2B Resolution: Fold-Paired AUC\")\n", + "ax.legend()\n", + "fig.tight_layout()\n", + "fig.savefig(FIGURES_DIR / \"2b_resolution_paired.png\", dpi=200, bbox_inches=\"tight\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "530e8675", + "metadata": {}, + "source": [ + "## 2B - Resolution impact\n", + "\n", + "This section compares 128 vs 224 using `p2b_*_224` and Phase 1 baselines as explicit 128 fallbacks.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13304d38", + "metadata": {}, + "outputs": [], + "source": [ + "print_comparison_readout(comparison_subset(\"2C\", \"facecrop\"))\n", + "\n", + "face_df = canonical_runs_df[canonical_runs_df[\"section\"].eq(\"2C\")].copy()\n", + "fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=False)\n", + "for ax, model in zip(axes, [\"SimpleCNN\", \"ResNet18\"]):\n", + " sub = face_df[face_df[\"model\"].eq(model)].sort_values(\"face_crop\")\n", + " ax.bar(sub[\"condition\"], sub[\"auc_mean\"], yerr=sub[\"auc_std\"], color=[\"#D97706\", \"#059669\"], alpha=0.85, capsize=5)\n", + " ax.set_title(model)\n", + " ax.set_ylim(0.70 if model == \"SimpleCNN\" else 0.94, 0.99)\n", + " ax.set_ylabel(\"AUC\")\n", + " ax.tick_params(axis=\"x\", rotation=20)\n", + "fig.suptitle(\"2C Facecrop Impact\")\n", + "fig.tight_layout()\n", + "fig.savefig(FIGURES_DIR / \"2c_facecrop.png\", dpi=200, bbox_inches=\"tight\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "8702d10d", + "metadata": {}, + "source": [ + "## 2C - Facecrop impact\n", + "\n", + "This section compares `p2c_*_facecrop` against the matching `p2b_*_224` no-facecrop baselines.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5e03ef", + "metadata": {}, + "outputs": [], + "source": [ + "print_comparison_readout(comparison_subset(\"2A\"))\n\n# Inspect whether logs contain the per-source data needed by v2.md.\nsource_audit = []\nfor run in [\"p2a_t1_original\", \"p2a_t3_holdout_text2img\", \"p2a_t3_holdout_inpainting\", \"p2a_t3_holdout_insight\"]:\n results = load_results(run)\n has_per_source = False\n has_records = False\n example_keys = []\n if results:\n for fold in results.get(\"fold_results\", []):\n tm = fold.get(\"test_metrics\", {})\n example_keys = sorted(tm.keys())\n has_per_source = has_per_source or any(k in tm for k in [\"per_source\", \"per_source_metrics\", \"pairwise_source_metrics\", \"source_metrics\", \"pair_metrics\"])\n has_records = has_records or any(k in fold for k in [\"records\", \"predictions\", \"test_records\"])\n source_audit.append({\n \"run\": run,\n \"has_per_source_metrics\": has_per_source,\n \"has_prediction_records\": has_records,\n \"test_metric_keys\": example_keys,\n })\nsource_audit_df = pd.DataFrame(source_audit)\ndisplay(source_audit_df)\n\nholdout_runs = [\"p2a_t1_original\", \"p2a_t3_holdout_text2img\", \"p2a_t3_holdout_inpainting\", \"p2a_t3_holdout_insight\"]\nholdout_df = canonical_runs_df[canonical_runs_df[\"run\"].isin(holdout_runs)].copy()\nholdout_df[\"delta_vs_all_source\"] = holdout_df[\"auc_mean\"] - float(holdout_df.loc[holdout_df[\"run\"].eq(\"p2a_t1_original\"), \"auc_mean\"].iloc[0])\n\nfig, ax = plt.subplots(figsize=(9, 5))\nax.bar(holdout_df[\"label\"], holdout_df[\"auc_mean\"], yerr=holdout_df[\"auc_std\"], color=\"#54A24B\", alpha=0.85, capsize=5)\nax.set_ylim(0.88, 0.99)\nax.set_ylabel(\"Aggregate AUC\")\nax.set_title(\"2C Source Holdout Proxy: Aggregate Test AUC\")\nax.tick_params(axis=\"x\", rotation=20)\nfor i, (_, row) in enumerate(holdout_df.iterrows()):\n ax.text(i, row[\"auc_mean\"] + 0.004, f\"{row['delta_vs_all_source']:+.3f}\", ha=\"center\", fontsize=9)\nfig.tight_layout()\nfig.savefig(FIGURES_DIR / \"2c_holdout_proxy.png\", dpi=200, bbox_inches=\"tight\")\nplt.show()\n\nprint(\"Geometry diagnostic evidence:\")\ngeometry_keys = []\nfor run in [\"p2a_t1_original\", \"p2a_t2_real_norm\"]:\n results = load_results(run)\n cfg = (results or {}).get(\"config\", {})\n geometry_keys.append({\n \"run\": run,\n \"config_geometry_condition\": cfg.get(\"geometry_condition\"),\n \"has_matched_geometry_metric\": any(\n \"geometry\" in str(k).lower() or \"matched\" in str(k).lower()\n for fold in (results or {}).get(\"fold_results\", [])\n for k in fold.get(\"test_metrics\", {}).keys()\n ),\n })\ndisplay(pd.DataFrame(geometry_keys))" + ] + }, + { + "cell_type": "markdown", + "id": "2c3b8812", + "metadata": {}, + "source": [ + "## 2D / 2E - Augmentation impact and test-set integrity\n", + "\n", + "The augmentation question has two parts:\n", + "\n", + "- Does light augmentation help at 224 without facecrop?\n", + "- Does it help once facecrop is enabled?\n", + "\n", + "The implementation also needs to guarantee that validation/test evaluation is not stochastic. The preprocessing pipeline keeps stochastic operations behind `self.train`, so `train=False` disables them even if augmentation settings exist." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f11c3257", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"2D (p2d): augmentation without facecrop\")\n", + "print_comparison_readout(comparison_subset(\"2D\", \"augmentation\"))\n", + "print(\"2E (p2e): augmentation with facecrop\")\n", + "print_comparison_readout(comparison_subset(\"2E\", \"facecrop + augmentation\"))\n", + "\n", + "aug_sections = comparisons_df[comparisons_df[\"section\"].isin([\"2D\", \"2E\"])].copy()\n", + "fig, ax = plt.subplots(figsize=(9, 5))\n", + "labels = aug_sections[\"section\"] + \" \" + aug_sections[\"model\"]\n", + "ax.bar(labels, aug_sections[\"delta_auc\"], yerr=aug_sections[\"delta_ci95\"], color=[\"#E45756\" if d < 0 else \"#059669\" for d in aug_sections[\"delta_auc\"]], alpha=0.85, capsize=5)\n", + "ax.axhline(0, color=\"black\", linewidth=1)\n", + "ax.set_ylabel(\"Delta AUC from adding augmentation\")\n", + "ax.set_title(\"Augmentation Effects Across Facecrop Conditions\")\n", + "ax.tick_params(axis=\"x\", rotation=20)\n", + "fig.tight_layout()\n", + "fig.savefig(FIGURES_DIR / \"2d_2e_augmentation_effects.png\", dpi=200, bbox_inches=\"tight\")\n", + "plt.show()\n", + "\n", + "# Static and behavioral audit of eval stochasticity.\n", + "try:\n", + " import inspect\n", + " from src.preprocessing.pipeline import DFFImagePipeline\n", + " from src.evaluation import evaluate as evaluate_module\n", + "\n", + " pipeline_src = inspect.getsource(DFFImagePipeline)\n", + " build_transforms_src = inspect.getsource(evaluate_module.build_transforms)\n", + " stochastic_guards = {\n", + " \"flip_guarded_by_train\": \"if self.train and random.random() < self.hflip_p\" in pipeline_src,\n", + " \"rotate_guarded_by_train\": \"if self.train and self.rotation_degrees > 0\" in pipeline_src,\n", + " \"color_jitter_returns_when_not_train\": \"if not self.train:\" in pipeline_src,\n", + " \"blur_guarded_by_train\": \"if self.train and random.random() < self.blur_p\" in pipeline_src,\n", + " \"jpeg_guarded_by_train\": \"if self.train and random.random() < self.jpeg_p\" in pipeline_src,\n", + " \"erase_guarded_by_train\": \"if self.train and random.random() < self.erase_p\" in pipeline_src,\n", + " \"noise_guarded_by_train\": \"if self.train and random.random() < self.noise_p\" in pipeline_src,\n", + " \"cv_transform_uses_train_flag\": \"get_transforms(train=train\" in build_transforms_src,\n", + " }\n", + " display(pd.DataFrame([stochastic_guards]).T.rename(columns={0: \"passes\"}))\n", + "except Exception as exc:\n", + " print(f\"Could not run transform audit: {exc}\")" + ] + }, + { + "cell_type": "markdown", + "id": "02e47658", + "metadata": {}, + "source": [ + "## Decision synthesis\n", + "\n", + "This section converts the evidence into Phase 3 settings. It intentionally distinguishes a recommendation from a claim:\n", + "\n", + "- Recommendation: choose the setting that is best supported for the next experiment.\n", + "- Claim: what the current evidence proves. Some Phase 2C claims remain incomplete without per-source or matched-geometry outputs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7034443c", + "metadata": {}, + "outputs": [], + "source": [ + "def get_delta(question: str, model: str | None = None, section: str | None = None) -> pd.DataFrame:\n", + " df = comparisons_df[comparisons_df[\"question\"].eq(question)].copy()\n", + " if model:\n", + " df = df[df[\"model\"].eq(model)]\n", + " if section:\n", + " df = df[df[\"section\"].eq(section)]\n", + " return df\n", + "\n", + "resolution_resnet = get_delta(\"resolution\", \"ResNet18\").iloc[0]\n", + "facecrop_resnet = get_delta(\"facecrop\", \"ResNet18\").iloc[0]\n", + "facecrop_simple = get_delta(\"facecrop\", \"SimpleCNN\").iloc[0]\n", + "aug_no_crop_resnet = get_delta(\"augmentation\", \"ResNet18\").iloc[0]\n", + "aug_no_crop_simple = get_delta(\"augmentation\", \"SimpleCNN\").iloc[0]\n", + "aug_crop_resnet = get_delta(\"facecrop + augmentation\", \"ResNet18\").iloc[0]\n", + "aug_crop_simple = get_delta(\"facecrop + augmentation\", \"SimpleCNN\").iloc[0]\n", + "norm = get_delta(\"normalization\", \"ResNet18\").iloc[0]\n", + "\n", + "recommendations = [\n", + " {\n", + " \"choice\": \"resolution\",\n", + " \"recommendation\": \"224x224\",\n", + " \"evidence\": f\"ResNet18 delta AUC {resolution_resnet.delta_auc:+.4f}; SimpleCNN does not determine Phase 3 capacity.\",\n", + " \"confidence\": \"high\" if resolution_resnet.delta_auc > 0.02 else \"medium\",\n", + " },\n", + " {\n", + " \"choice\": \"facecrop\",\n", + " \"recommendation\": \"use facecrop\",\n", + " \"evidence\": f\"Small positive deltas for both models: SimpleCNN {facecrop_simple.delta_auc:+.4f}, ResNet18 {facecrop_resnet.delta_auc:+.4f}.\",\n", + " \"confidence\": \"medium\",\n", + " },\n", + " {\n", + " \"choice\": \"augmentation\",\n", + " \"recommendation\": \"do not use light augmentation for Phase 3 at 20% data\",\n", + " \"evidence\": f\"SimpleCNN drops {aug_no_crop_simple.delta_auc:+.4f} without facecrop and {aug_crop_simple.delta_auc:+.4f} with facecrop; ResNet18 is neutral/slightly mixed ({aug_no_crop_resnet.delta_auc:+.4f}, {aug_crop_resnet.delta_auc:+.4f}).\",\n", + " \"confidence\": \"high for SimpleCNN, medium for ResNet18\",\n", + " },\n", + " {\n", + " \"choice\": \"normalization\",\n", + " \"recommendation\": \"ImageNet normalization\",\n", + " \"evidence\": f\"Real-train-only normalization delta AUC {norm.delta_auc:+.4f}; no useful gain and less standard for pretrained ResNet.\",\n", + " \"confidence\": \"medium\",\n", + " },\n", + " {\n", + " \"choice\": \"shortcut/source claims\",\n", + " \"recommendation\": \"do not overclaim; add per-source or prediction exports before final report\",\n", + " \"evidence\": \"Current CV logs lack held-out-source vs in-source AUC and matched-geometry test metrics.\",\n", + " \"confidence\": \"high\",\n", + " },\n", + "]\n", + "\n", + "recommendations_df = pd.DataFrame(recommendations)\n", + "display(recommendations_df)\n", + "\n", + "summary = {\n", + " \"phase\": \"phase2\",\n", + " \"source_documents\": [\"classifier/v2.md\", \"classifier/impl.md\"],\n", + " \"artifact_counts\": {\n", + " \"canonical_runs\": int(len(canonical_runs_df)),\n", + " \"loaded_canonical_runs\": int(canonical_runs_df[\"log_status\"].isin([\"present\", \"fallback\"]).sum()),\n", + " \"fallback_runs_used\": {k: v for k, v in RUN_ALIASES.items() if resolve_run(k) != k},\n", + " },\n", + " \"recommendations\": recommendations,\n", + " \"planned_comparisons\": comparisons_df.replace({np.nan: None}).to_dict(orient=\"records\"),\n", + " \"known_gaps\": [\n", + " \"Dedicated p2a_*_128 logs are absent/skipped; Phase 1 baselines are used as fallbacks.\",\n", + " \"Source holdout logs do not include prediction-level or per-source metrics, so held-out-source AUC vs in-source AUC cannot be computed.\",\n", + " \"No matched-geometry evaluation metric is present in p2c logs, so geometry shortcut analysis is incomplete.\",\n", + " ],\n", + "}\n", + "\n", + "summary_path = ANALYSIS_DIR / \"phase2_analysis_summary.json\"\n", + "with summary_path.open(\"w\") as f:\n", + " json.dump(summary, f, indent=2)\n", + "\n", + "print(f\"Saved summary: {summary_path.relative_to(PROJECT_ROOT)}\")\n", + "print(f\"Saved figures: {FIGURES_DIR.relative_to(PROJECT_ROOT)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "5a337f73", + "metadata": {}, + "source": [ + "## Report-ready conclusion\n", + "\n", + "The strongest Phase 2 result is the resolution effect for ResNet18: moving to 224x224 substantially improves AUC under the controlled CV protocol. Face cropping gives a small positive effect and is reasonable to carry forward, especially because it aligns the model with face evidence rather than background context. Light augmentation is not supported at this 20% data setting: it strongly hurts SimpleCNN and provides no reliable gain for ResNet18, with or without face cropping. ImageNet normalization remains preferable because real-train-only normalization does not improve AUC and is less aligned with pretrained ResNet expectations.\n", + "\n", + "Recommended Phase 3 preprocessing: **224x224, facecrop enabled, no light augmentation, ImageNet normalization**.\n", + "\n", + "Limitations to fix before the final report: export prediction-level records or per-source pairwise metrics for source holdout, and add the matched-geometry evaluation required by the shortcut-analysis plan. Without those artifacts, Phase 2C can only support a limited shortcut analysis." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "drl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/classifier/outputs/analysis/.gitkeep b/classifier/outputs/analysis/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/classifier/outputs/figures/.gitkeep b/classifier/outputs/figures/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/classifier/outputs/logs/.gitkeep b/classifier/outputs/logs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/classifier/outputs/logs/p1_resnet18_baseline.json b/classifier/outputs/logs/p1_resnet18_baseline.json new file mode 100644 index 0000000..9c2a1f2 --- /dev/null +++ b/classifier/outputs/logs/p1_resnet18_baseline.json @@ -0,0 +1,1617 @@ +{ + "run_name": "p1_resnet18_baseline", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.23958949717106642, + 0.143196833671795, + 0.09169542824814993, + 0.05906003904960084, + 0.03961496371506386, + 0.030794697152278214, + 0.019622314044008143, + 0.010980474815140176, + 0.009058485340109555 + ], + "train_acc": [ + 0.7507523148148149, + 0.8752893518518519, + 0.9286458333333333, + 0.9586226851851852, + 0.9741898148148148, + 0.9828125, + 0.9880208333333333, + 0.9919560185185186, + 0.9939236111111112 + ], + "train_auc": [ + 0.8491616565929355, + 0.9504348851165981, + 0.9801190093449932, + 0.9915338148719707, + 0.9959249346279149, + 0.9974669299411294, + 0.998836760902492, + 0.9996191629515319, + 0.9997816733110425 + ], + "train_f1": [ + 0.8176159220834216, + 0.9132273001811959, + 0.9514662468018107, + 0.9721193215051667, + 0.9826890234435647, + 0.9885012969917535, + 0.991993192279426, + 0.9946238638561207, + 0.9959442234153502 + ], + "val_loss": [ + 0.19691624582434694, + 0.1774458727488915, + 0.27195345516471814, + 0.23256856029232342, + 0.31288731123010316, + 0.42289219592894367, + 0.358063139153334, + 0.4632530469345511, + 0.474098086887534 + ], + "val_acc": [ + 0.8307291666666666, + 0.8380208333333333, + 0.8677083333333333, + 0.8635416666666667, + 0.846875, + 0.8770833333333333, + 0.8697916666666666, + 0.8817708333333333, + 0.8776041666666666 + ], + "val_auc": [ + 0.9075520833333334, + 0.928791232638889, + 0.9198922164351852, + 0.9320630787037036, + 0.9189192708333334, + 0.9173234953703704, + 0.9229318576388889, + 0.9175361689814816, + 0.9185105613425926 + ], + "val_f1": [ + 0.8822890257153205, + 0.8846011131725418, + 0.9121107266435986, + 0.906761565836299, + 0.8943965517241379, + 0.9186767746381805, + 0.9119718309859155, + 0.9215347390252333, + 0.9198225861480723 + ] + }, + "test_metrics": { + "accuracy": 0.8745833333333334, + "auc_roc": 0.9403236111111113, + "f1": 0.9146583498724128, + "confusion_matrix": [ + [ + 972, + 228 + ], + [ + 374, + 3226 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9141666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 103, + 1097 + ] + ], + "n": 1200, + "detection_rate": 0.9141666666666667, + "pairwise_auc": 0.9481944444444446, + "pairwise_f1": 0.8689108910891089 + }, + "insight": { + "accuracy": 0.8275, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 207, + 993 + ] + ], + "n": 1200, + "detection_rate": 0.8275, + "pairwise_auc": 0.9102131944444445, + "pairwise_f1": 0.8203221809169765 + }, + "text2img": { + "accuracy": 0.9466666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 64, + 1136 + ] + ], + "n": 1200, + "detection_rate": 0.9466666666666667, + "pairwise_auc": 0.9625631944444444, + "pairwise_f1": 0.8861154446177847 + }, + "wiki": { + "accuracy": 0.81, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 972, + 228 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.18999999999999995 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8620833333333333, + "auc_roc": 0.9481944444444446, + "f1": 0.8689108910891089, + "confusion_matrix": [ + [ + 972, + 228 + ], + [ + 103, + 1097 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.81875, + "auc_roc": 0.9102131944444445, + "f1": 0.8203221809169765, + "confusion_matrix": [ + [ + 972, + 228 + ], + [ + 207, + 993 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8783333333333333, + "auc_roc": 0.9625631944444444, + "f1": 0.8861154446177847, + "confusion_matrix": [ + [ + 972, + 228 + ], + [ + 64, + 1136 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.23669152959353393, + 0.13688683253540485, + 0.08269713706233435, + 0.05615546396260874, + 0.03753572171316827, + 0.031078304888527513, + 0.018910827419474825, + 0.011221361623675976, + 0.00673896867634472 + ], + "train_acc": [ + 0.7619212962962963, + 0.8825810185185186, + 0.9376736111111111, + 0.9609953703703704, + 0.9765625, + 0.9829282407407407, + 0.9889467592592592, + 0.9935185185185185, + 0.9955439814814815 + ], + "train_auc": [ + 0.8549236164694787, + 0.9544764785522405, + 0.9836320355366941, + 0.9923031603652263, + 0.996373626471765, + 0.9974486400462963, + 0.9989359174954275, + 0.9995332683184728, + 0.9997378507944673 + ], + "train_f1": [ + 0.8274184075845289, + 0.9185892549051077, + 0.9577597364395811, + 0.973719098494892, + 0.9843005000581463, + 0.9885910971883822, + 0.9926126474569715, + 0.9956709956709957, + 0.997024154589372 + ], + "val_loss": [ + 0.19221198732654254, + 0.20249952741432936, + 0.36335805027047174, + 0.2351263621201118, + 0.333601384274516, + 0.3226143382489681, + 0.421671195969005, + 0.40155297291930764, + 0.45411645335928674 + ], + "val_acc": [ + 0.8057291666666667, + 0.8567708333333334, + 0.8578125, + 0.8729166666666667, + 0.8791666666666667, + 0.8546875, + 0.8755208333333333, + 0.8739583333333333, + 0.8755208333333333 + ], + "val_auc": [ + 0.9083029513888888, + 0.9268113425925926, + 0.9027604166666667, + 0.9329014756944443, + 0.9275462962962964, + 0.9208470775462965, + 0.9181640624999998, + 0.9201468460648148, + 0.9158521412037037 + ], + "val_f1": [ + 0.8588724933787363, + 0.9019607843137255, + 0.9068577277379734, + 0.9143859649122807, + 0.9193884642112579, + 0.9003215434083601, + 0.9172724125995154, + 0.9156794425087108, + 0.9170426935091982 + ] + }, + "test_metrics": { + "accuracy": 0.8727083333333333, + "auc_roc": 0.9351839120370371, + "f1": 0.9141733389521001, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 346, + 3254 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9208333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 95, + 1105 + ] + ], + "n": 1200, + "detection_rate": 0.9208333333333333, + "pairwise_auc": 0.9446645833333335, + "pairwise_f1": 0.8599221789883269 + }, + "insight": { + "accuracy": 0.8508333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 179, + 1021 + ] + ], + "n": 1200, + "detection_rate": 0.8508333333333333, + "pairwise_auc": 0.906173263888889, + "pairwise_f1": 0.8213998390989542 + }, + "text2img": { + "accuracy": 0.94, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 72, + 1128 + ] + ], + "n": 1200, + "detection_rate": 0.94, + "pairwise_auc": 0.9547138888888889, + "pairwise_f1": 0.8700347088314694 + }, + "wiki": { + "accuracy": 0.7791666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.22083333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.85, + "auc_roc": 0.9446645833333335, + "f1": 0.8599221789883269, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 95, + 1105 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.815, + "auc_roc": 0.906173263888889, + "f1": 0.8213998390989542, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 179, + 1021 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8595833333333334, + "auc_roc": 0.9547138888888889, + "f1": 0.8700347088314694, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 72, + 1128 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.23555645521868157, + 0.14330073559863699, + 0.08902234849864962, + 0.0596706295705021, + 0.04015614009652011, + 0.027025900888973546, + 0.020126515977770192, + 0.014928802549049444, + 0.010074229028281546 + ], + "train_acc": [ + 0.7581597222222223, + 0.8776041666666666, + 0.9295717592592593, + 0.9575231481481481, + 0.9728587962962963, + 0.9838541666666667, + 0.9872106481481482, + 0.9912615740740741, + 0.9932291666666667 + ], + "train_auc": [ + 0.8572669020776176, + 0.950374871399177, + 0.981142037822931, + 0.9912892232510288, + 0.9959913515946502, + 0.9978648155292638, + 0.9987725230052584, + 0.9994318790723594, + 0.9996619405864198 + ], + "train_f1": [ + 0.8229462356480108, + 0.9149817100132652, + 0.9521111242277575, + 0.9713437963613649, + 0.9817984243412117, + 0.9892048752176437, + 0.9914490230218611, + 0.994165604111124, + 0.9954789597743344 + ], + "val_loss": [ + 0.16975419651716947, + 0.18313004115285972, + 0.19825653905281798, + 0.2172380828880705, + 0.3135680136232016, + 0.27533191804541274, + 0.37277434571054374, + 0.3967378988515217, + 0.3754097615553292 + ], + "val_acc": [ + 0.7963541666666667, + 0.8666666666666667, + 0.8807291666666667, + 0.8796875, + 0.8838541666666667, + 0.8739583333333333, + 0.8838541666666667, + 0.8833333333333333, + 0.8885416666666667 + ], + "val_auc": [ + 0.9309266493055555, + 0.9361125578703705, + 0.9378175636574075, + 0.9413896122685185, + 0.9364113136574075, + 0.9379557291666667, + 0.9314489293981483, + 0.9296419270833334, + 0.934572482638889 + ], + "val_f1": [ + 0.8475633528265107, + 0.9092198581560283, + 0.9189380530973451, + 0.918804920913884, + 0.923394022672621, + 0.9138790035587189, + 0.9233413544173256, + 0.9231297185998627, + 0.925642807505212 + ] + }, + "test_metrics": { + "accuracy": 0.8754166666666666, + "auc_roc": 0.9345337962962963, + "f1": 0.9168982768204558, + "confusion_matrix": [ + [ + 903, + 297 + ], + [ + 301, + 3299 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.925, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 90, + 1110 + ] + ], + "n": 1200, + "detection_rate": 0.925, + "pairwise_auc": 0.9392815972222223, + "pairwise_f1": 0.8515535097813579 + }, + "insight": { + "accuracy": 0.8683333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 158, + 1042 + ] + ], + "n": 1200, + "detection_rate": 0.8683333333333333, + "pairwise_auc": 0.9052475694444445, + "pairwise_f1": 0.8207955888144939 + }, + "text2img": { + "accuracy": 0.9558333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 53, + 1147 + ] + ], + "n": 1200, + "detection_rate": 0.9558333333333333, + "pairwise_auc": 0.9590722222222223, + "pairwise_f1": 0.867624810892587 + }, + "wiki": { + "accuracy": 0.7525, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 903, + 297 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.24750000000000005 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.83875, + "auc_roc": 0.9392815972222223, + "f1": 0.8515535097813579, + "confusion_matrix": [ + [ + 903, + 297 + ], + [ + 90, + 1110 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8104166666666667, + "auc_roc": 0.9052475694444445, + "f1": 0.8207955888144939, + "confusion_matrix": [ + [ + 903, + 297 + ], + [ + 158, + 1042 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8541666666666666, + "auc_roc": 0.9590722222222223, + "f1": 0.867624810892587, + "confusion_matrix": [ + [ + 903, + 297 + ], + [ + 53, + 1147 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.23815842890353114, + 0.14449559769558687, + 0.08685155709757021, + 0.054759029798744314, + 0.04165420382557419, + 0.02688520766279436, + 0.022700919520435424 + ], + "train_acc": [ + 0.7540509259259259, + 0.8746527777777777, + 0.9336805555555555, + 0.9614583333333333, + 0.9749421296296297, + 0.983738425925926, + 0.9868055555555556 + ], + "train_auc": [ + 0.8530149748513945, + 0.9498039016060815, + 0.9820211405464107, + 0.9928622345821903, + 0.9958028263603109, + 0.9981013606681528, + 0.9986642839791953 + ], + "train_f1": [ + 0.8199457719030673, + 0.9131027842413544, + 0.9549492884660744, + 0.9740674402305116, + 0.9832046856211939, + 0.9891224402895521, + 0.991183976490604 + ], + "val_loss": [ + 0.18737150949115555, + 0.16689071032839517, + 0.20768918559576074, + 0.25156240473346164, + 0.37945238465520864, + 0.31712872185744345, + 0.3442687667479428 + ], + "val_acc": [ + 0.80625, + 0.8416666666666667, + 0.8677083333333333, + 0.86875, + 0.8729166666666667, + 0.8635416666666667, + 0.8791666666666667 + ], + "val_auc": [ + 0.9110677083333334, + 0.9324168113425926, + 0.9296209490740741, + 0.9280975115740739, + 0.92453125, + 0.9245305266203704, + 0.9291030092592594 + ], + "val_f1": [ + 0.8589840788476119, + 0.8883994126284875, + 0.910056657223796, + 0.9105748757984387, + 0.9170068027210885, + 0.906761565836299, + 0.9193884642112579 + ] + }, + "test_metrics": { + "accuracy": 0.8420833333333333, + "auc_roc": 0.9320532407407407, + "f1": 0.8883981154299175, + "confusion_matrix": [ + [ + 1025, + 175 + ], + [ + 583, + 3017 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.8416666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 190, + 1010 + ] + ], + "n": 1200, + "detection_rate": 0.8416666666666667, + "pairwise_auc": 0.9318243055555555, + "pairwise_f1": 0.8469601677148847 + }, + "insight": { + "accuracy": 0.7483333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 302, + 898 + ] + ], + "n": 1200, + "detection_rate": 0.7483333333333333, + "pairwise_auc": 0.9008222222222222, + "pairwise_f1": 0.7901451825780906 + }, + "text2img": { + "accuracy": 0.9241666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 91, + 1109 + ] + ], + "n": 1200, + "detection_rate": 0.9241666666666667, + "pairwise_auc": 0.9635131944444445, + "pairwise_f1": 0.892914653784219 + }, + "wiki": { + "accuracy": 0.8541666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1025, + 175 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.14583333333333337 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8479166666666667, + "auc_roc": 0.9318243055555555, + "f1": 0.8469601677148847, + "confusion_matrix": [ + [ + 1025, + 175 + ], + [ + 190, + 1010 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.80125, + "auc_roc": 0.9008222222222222, + "f1": 0.7901451825780906, + "confusion_matrix": [ + [ + 1025, + 175 + ], + [ + 302, + 898 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8891666666666667, + "auc_roc": 0.9635131944444445, + "f1": 0.892914653784219, + "confusion_matrix": [ + [ + 1025, + 175 + ], + [ + 91, + 1109 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.23552175492600158, + 0.14149037405020662, + 0.08607236144684807, + 0.05892389668194853, + 0.04063106574294916, + 0.026183983484683, + 0.019621156936153527 + ], + "train_acc": [ + 0.7596643518518519, + 0.8807291666666667, + 0.9306712962962963, + 0.9576388888888889, + 0.9722800925925926, + 0.983912037037037, + 0.9885416666666667 + ], + "train_auc": [ + 0.8568183888460219, + 0.9515533282607452, + 0.9821654056641518, + 0.9914928322902378, + 0.9959625771604939, + 0.9977760005858483, + 0.998805682370256 + ], + "train_f1": [ + 0.8240776041004787, + 0.9173782321106434, + 0.9528197857592943, + 0.971444175704143, + 0.9814218671217468, + 0.9892298155896482, + 0.9923463471202165 + ], + "val_loss": [ + 0.19069901450226703, + 0.15564801689858238, + 0.21845358079299332, + 0.254469917776684, + 0.24185769132260854, + 0.30473897442764913, + 0.35159544888107724 + ], + "val_acc": [ + 0.8317708333333333, + 0.8557291666666667, + 0.8572916666666667, + 0.878125, + 0.8802083333333334, + 0.8755208333333333, + 0.8828125 + ], + "val_auc": [ + 0.9150571469907408, + 0.9409252025462964, + 0.9299233217592591, + 0.934763454861111, + 0.9404072627314813, + 0.9310828993055555, + 0.930873119212963 + ], + "val_f1": [ + 0.882417182380779, + 0.8987202925045704, + 0.9016511127063891, + 0.91875, + 0.9192982456140351, + 0.9155178508306823, + 0.9222260629104736 + ] + }, + "test_metrics": { + "accuracy": 0.8604166666666667, + "auc_roc": 0.9406895833333333, + "f1": 0.9021612149532711, + "confusion_matrix": [ + [ + 1041, + 159 + ], + [ + 511, + 3089 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.8758333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 149, + 1051 + ] + ], + "n": 1200, + "detection_rate": 0.8758333333333334, + "pairwise_auc": 0.9438777777777778, + "pairwise_f1": 0.8721991701244813 + }, + "insight": { + "accuracy": 0.7725, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 273, + 927 + ] + ], + "n": 1200, + "detection_rate": 0.7725, + "pairwise_auc": 0.9121888888888889, + "pairwise_f1": 0.8110236220472441 + }, + "text2img": { + "accuracy": 0.9258333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 89, + 1111 + ] + ], + "n": 1200, + "detection_rate": 0.9258333333333333, + "pairwise_auc": 0.9660020833333334, + "pairwise_f1": 0.8995951417004049 + }, + "wiki": { + "accuracy": 0.8675, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1041, + 159 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.13249999999999995 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8716666666666667, + "auc_roc": 0.9438777777777778, + "f1": 0.8721991701244813, + "confusion_matrix": [ + [ + 1041, + 159 + ], + [ + 149, + 1051 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.82, + "auc_roc": 0.9121888888888889, + "f1": 0.8110236220472441, + "confusion_matrix": [ + [ + 1041, + 159 + ], + [ + 273, + 927 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8966666666666666, + "auc_roc": 0.9660020833333334, + "f1": 0.8995951417004049, + "confusion_matrix": [ + [ + 1041, + 159 + ], + [ + 89, + 1111 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.8650416666666667, + "std": 0.014194519188757335, + "ci_95": 0.012442044647082734, + "values": [ + 0.8745833333333334, + 0.8727083333333333, + 0.8754166666666666, + 0.8420833333333333, + 0.8604166666666667 + ] + }, + "auc_roc": { + "mean": 0.9365568287037037, + "std": 0.003792366522656302, + "ci_95": 0.003324155821379563, + "values": [ + 0.9403236111111113, + 0.9351839120370371, + 0.9345337962962963, + 0.9320532407407407, + 0.9406895833333333 + ] + }, + "f1": { + "mean": 0.9072578592056315, + "std": 0.012012436180286749, + "ci_95": 0.010529364558803643, + "values": [ + 0.9146583498724128, + 0.9141733389521001, + 0.9168982768204558, + 0.8883981154299175, + 0.9021612149532711 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.8955, + "std": 0.035865295822625584, + "ci_95": 0.03143731788107178, + "values": [ + 0.9141666666666667, + 0.9208333333333333, + 0.925, + 0.8416666666666667, + 0.8758333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8955, + "std": 0.035865295822625584, + "ci_95": 0.03143731788107178, + "values": [ + 0.9141666666666667, + 0.9208333333333333, + 0.925, + 0.8416666666666667, + 0.8758333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.9415685416666667, + "std": 0.0063047776590499085, + "ci_95": 0.005526381280033774, + "values": [ + 0.9481944444444446, + 0.9446645833333335, + 0.9392815972222223, + 0.9318243055555555, + 0.9438777777777778 + ] + }, + "pairwise_f1": { + "mean": 0.8599091835396319, + "std": 0.010834770102868352, + "ci_95": 0.00949709472847364, + "values": [ + 0.8689108910891089, + 0.8599221789883269, + 0.8515535097813579, + 0.8469601677148847, + 0.8721991701244813 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8135, + "std": 0.0512936805724318, + "ci_95": 0.044960893377838186, + "values": [ + 0.8275, + 0.8508333333333333, + 0.8683333333333333, + 0.7483333333333333, + 0.7725 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8135, + "std": 0.0512936805724318, + "ci_95": 0.044960893377838186, + "values": [ + 0.8275, + 0.8508333333333333, + 0.8683333333333333, + 0.7483333333333333, + 0.7725 + ] + }, + "pairwise_auc": { + "mean": 0.9069290277777778, + "std": 0.004448130081238437, + "ci_95": 0.003898957923889036, + "values": [ + 0.9102131944444445, + 0.906173263888889, + 0.9052475694444445, + 0.9008222222222222, + 0.9121888888888889 + ] + }, + "pairwise_f1": { + "mean": 0.812737282691152, + "std": 0.013330852209377185, + "ci_95": 0.01168500716136289, + "values": [ + 0.8203221809169765, + 0.8213998390989542, + 0.8207955888144939, + 0.7901451825780906, + 0.8110236220472441 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9385000000000001, + "std": 0.01355800296667781, + "ci_95": 0.011884113578873076, + "values": [ + 0.9466666666666667, + 0.94, + 0.9558333333333333, + 0.9241666666666667, + 0.9258333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9385000000000001, + "std": 0.01355800296667781, + "ci_95": 0.011884113578873076, + "values": [ + 0.9466666666666667, + 0.94, + 0.9558333333333333, + 0.9241666666666667, + 0.9258333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.9611729166666667, + "std": 0.0043835653312689635, + "ci_95": 0.003842364425295284, + "values": [ + 0.9625631944444444, + 0.9547138888888889, + 0.9590722222222223, + 0.9635131944444445, + 0.9660020833333334 + ] + }, + "pairwise_f1": { + "mean": 0.883256951965293, + "std": 0.014031840368382521, + "ci_95": 0.01229945037394657, + "values": [ + 0.8861154446177847, + 0.8700347088314694, + 0.867624810892587, + 0.892914653784219, + 0.8995951417004049 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.8126666666666666, + "std": 0.04867836617362312, + "ci_95": 0.04266846923275626, + "values": [ + 0.81, + 0.7791666666666667, + 0.7525, + 0.8541666666666666, + 0.8675 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.18733333333333332, + "std": 0.04867836617362312, + "ci_95": 0.04266846923275626, + "values": [ + 0.18999999999999995, + 0.22083333333333333, + 0.24750000000000005, + 0.14583333333333337, + 0.13249999999999995 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.8540833333333333, + "std": 0.012874258338940466, + "ci_95": 0.011284784987859647, + "values": [ + 0.8620833333333333, + 0.85, + 0.83875, + 0.8479166666666667, + 0.8716666666666667 + ] + }, + "auc_roc": { + "mean": 0.9415685416666667, + "std": 0.0063047776590499085, + "ci_95": 0.005526381280033774, + "values": [ + 0.9481944444444446, + 0.9446645833333335, + 0.9392815972222223, + 0.9318243055555555, + 0.9438777777777778 + ] + }, + "f1": { + "mean": 0.8599091835396319, + "std": 0.010834770102868352, + "ci_95": 0.00949709472847364, + "values": [ + 0.8689108910891089, + 0.8599221789883269, + 0.8515535097813579, + 0.8469601677148847, + 0.8721991701244813 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8130833333333334, + "std": 0.0075954573412153285, + "ci_95": 0.006657711902581657, + "values": [ + 0.81875, + 0.815, + 0.8104166666666667, + 0.80125, + 0.82 + ] + }, + "auc_roc": { + "mean": 0.9069290277777778, + "std": 0.004448130081238437, + "ci_95": 0.003898957923889036, + "values": [ + 0.9102131944444445, + 0.906173263888889, + 0.9052475694444445, + 0.9008222222222222, + 0.9121888888888889 + ] + }, + "f1": { + "mean": 0.812737282691152, + "std": 0.013330852209377185, + "ci_95": 0.01168500716136289, + "values": [ + 0.8203221809169765, + 0.8213998390989542, + 0.8207955888144939, + 0.7901451825780906, + 0.8110236220472441 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.8755833333333334, + "std": 0.018379676275712787, + "ci_95": 0.01611049657831811, + "values": [ + 0.8783333333333333, + 0.8595833333333334, + 0.8541666666666666, + 0.8891666666666667, + 0.8966666666666666 + ] + }, + "auc_roc": { + "mean": 0.9611729166666667, + "std": 0.0043835653312689635, + "ci_95": 0.003842364425295284, + "values": [ + 0.9625631944444444, + 0.9547138888888889, + 0.9590722222222223, + 0.9635131944444445, + 0.9660020833333334 + ] + }, + "f1": { + "mean": 0.883256951965293, + "std": 0.014031840368382521, + "ci_95": 0.01229945037394657, + "values": [ + 0.8861154446177847, + 0.8700347088314694, + 0.867624810892587, + 0.892914653784219, + 0.8995951417004049 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p1_resnet18_baseline", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 128, + "subsample": 0.2, + "augment": false + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p1_simplecnn_baseline.json b/classifier/outputs/logs/p1_simplecnn_baseline.json new file mode 100644 index 0000000..befe677 --- /dev/null +++ b/classifier/outputs/logs/p1_simplecnn_baseline.json @@ -0,0 +1,1890 @@ +{ + "run_name": "p1_simplecnn_baseline", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3346501660567743, + 0.3192252992479889, + 0.31206709083031725, + 0.30691634201341206, + 0.3023274021844069, + 0.29677660677719997, + 0.29325537408391633, + 0.29081125626409493, + 0.2881800217209039, + 0.2846500360303455, + 0.28296238032204135, + 0.2810634497967031, + 0.2794253593793622, + 0.277810283501943, + 0.2776510996123155 + ], + "train_acc": [ + 0.5081018518518519, + 0.6344907407407407, + 0.6483796296296296, + 0.6711805555555556, + 0.6748263888888889, + 0.6851851851851852, + 0.6902777777777778, + 0.6938657407407407, + 0.702025462962963, + 0.7045717592592593, + 0.7101273148148148, + 0.7141203703703703, + 0.7125, + 0.7185763888888889, + 0.7173032407407407 + ], + "train_auc": [ + 0.6287196448473937, + 0.6900440368512804, + 0.7102579696787836, + 0.7244584476451761, + 0.7365919799525605, + 0.7494974208390489, + 0.7580394268689987, + 0.7634302662037036, + 0.7690736364740511, + 0.7766107610310927, + 0.7808713777434841, + 0.784866933870599, + 0.7877255158321902, + 0.7914462144918838, + 0.7916608706990169 + ], + "train_f1": [ + 0.5633860694472981, + 0.7221537920112616, + 0.7324290998766955, + 0.7560326320309145, + 0.7567005845421086, + 0.7657797296133644, + 0.7689917127071824, + 0.772531819745442, + 0.7796465100355202, + 0.7813048879749818, + 0.7864421232146663, + 0.7895364689843217, + 0.787801127626858, + 0.7924898655856625, + 0.7918530827900635 + ], + "val_loss": [ + 0.32851970146099724, + 0.33090306669473646, + 0.30912243214746316, + 0.3026419480641683, + 0.30178220694263774, + 0.29687194476524986, + 0.2971705784400304, + 0.2919185152898232, + 0.2928813929359118, + 0.2944591929515203, + 0.2910800057152907, + 0.28581606037914753, + 0.2896332995345195, + 0.28544751058022183, + 0.2846951966484388 + ], + "val_acc": [ + 0.6276041666666666, + 0.4822916666666667, + 0.6713541666666667, + 0.6822916666666666, + 0.6296875, + 0.6411458333333333, + 0.7197916666666667, + 0.6979166666666666, + 0.7411458333333333, + 0.6223958333333334, + 0.6375, + 0.7255208333333333, + 0.734375, + 0.7104166666666667, + 0.7161458333333334 + ], + "val_auc": [ + 0.6543258101851852, + 0.687290943287037, + 0.7249045138888889, + 0.7365899884259259, + 0.7390733506944444, + 0.7485373263888888, + 0.7530570023148149, + 0.7628190104166667, + 0.7714670138888889, + 0.7614467592592593, + 0.7665675636574074, + 0.7777184606481481, + 0.774447337962963, + 0.7745818865740741, + 0.7780135995370371 + ], + "val_f1": [ + 0.7274113610369806, + 0.5005025125628141, + 0.7545702061454687, + 0.7644787644787645, + 0.702136573104315, + 0.7144633236634894, + 0.803218727139722, + 0.7736143637782982, + 0.8212873067241999, + 0.6876346402412753, + 0.7070707070707071, + 0.8016560030109146, + 0.8123620309050773, + 0.7871362940275651, + 0.7912677135197242 + ] + }, + "test_metrics": { + "accuracy": 0.7177083333333333, + "auc_roc": 0.7838011574074074, + "f1": 0.7943542267415389, + "confusion_matrix": [ + [ + 828, + 372 + ], + [ + 983, + 2617 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7316666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 322, + 878 + ] + ], + "n": 1200, + "detection_rate": 0.7316666666666667, + "pairwise_auc": 0.7876569444444443, + "pairwise_f1": 0.7167346938775511 + }, + "insight": { + "accuracy": 0.5616666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 526, + 674 + ] + ], + "n": 1200, + "detection_rate": 0.5616666666666666, + "pairwise_auc": 0.6755444444444445, + "pairwise_f1": 0.6001780943900267 + }, + "text2img": { + "accuracy": 0.8875, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 135, + 1065 + ] + ], + "n": 1200, + "detection_rate": 0.8875, + "pairwise_auc": 0.8882020833333333, + "pairwise_f1": 0.8077360637087599 + }, + "wiki": { + "accuracy": 0.69, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 828, + 372 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.31000000000000005 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.7108333333333333, + "auc_roc": 0.7876569444444443, + "f1": 0.7167346938775511, + "confusion_matrix": [ + [ + 828, + 372 + ], + [ + 322, + 878 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6258333333333334, + "auc_roc": 0.6755444444444445, + "f1": 0.6001780943900267, + "confusion_matrix": [ + [ + 828, + 372 + ], + [ + 526, + 674 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.78875, + "auc_roc": 0.8882020833333333, + "f1": 0.8077360637087599, + "confusion_matrix": [ + [ + 828, + 372 + ], + [ + 135, + 1065 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33287020769936065, + 0.31724516280271386, + 0.31064122178488307, + 0.3058183306069286, + 0.3016094679909724, + 0.29988867938518526, + 0.2974788663563905, + 0.2937378372859072, + 0.29088224037377924, + 0.28740419626787855, + 0.28645941275689335, + 0.28386374767179845, + 0.28320960984737786, + 0.282447586098203, + 0.28205930833463316 + ], + "train_acc": [ + 0.5519097222222222, + 0.6278935185185185, + 0.6530671296296297, + 0.6608217592592592, + 0.6713541666666667, + 0.6750578703703703, + 0.6860532407407407, + 0.6871527777777777, + 0.690625, + 0.6932291666666667, + 0.7, + 0.6988425925925926, + 0.7033564814814814, + 0.7064236111111111, + 0.7067129629629629 + ], + "train_auc": [ + 0.6354667048896891, + 0.6927244173668268, + 0.7121327821359169, + 0.7256365740740741, + 0.7351972504429585, + 0.7401355041866713, + 0.7455139478309329, + 0.7538817265374942, + 0.7599668317043895, + 0.767111848779721, + 0.770281019590192, + 0.7745531657235939, + 0.7767934813671696, + 0.7789927787065616, + 0.7798625578703704 + ], + "train_f1": [ + 0.628187274909964, + 0.7128951598499732, + 0.7384722767526065, + 0.7443179339527985, + 0.7535049264290985, + 0.7566630552546045, + 0.7664758297103009, + 0.7676038173845757, + 0.7694695989650712, + 0.771715257740838, + 0.7773769646998197, + 0.7758249332299475, + 0.7796785008166424, + 0.7824334176780889, + 0.7826944515907727 + ], + "val_loss": [ + 0.3259876777728399, + 0.3222517820696036, + 0.3081994076569875, + 0.330890358487765, + 0.3316652526458104, + 0.3155295995374521, + 0.30306527639428776, + 0.3021728872011105, + 0.32027635145932437, + 0.29995816548665366, + 0.2953853040933609, + 0.29479864947497847, + 0.29465246871113776, + 0.29583008649448556, + 0.2929837185889482 + ], + "val_acc": [ + 0.5276041666666667, + 0.7109375, + 0.6317708333333333, + 0.4557291666666667, + 0.7463541666666667, + 0.5364583333333334, + 0.7036458333333333, + 0.7020833333333333, + 0.7526041666666666, + 0.7119791666666667, + 0.6854166666666667, + 0.6994791666666667, + 0.7109375, + 0.7192708333333333, + 0.6864583333333333 + ], + "val_auc": [ + 0.6886089409722222, + 0.7032530381944445, + 0.7218272569444445, + 0.734505931712963, + 0.7297547743055555, + 0.7397858796296296, + 0.7479029224537037, + 0.7465581597222222, + 0.7514113136574074, + 0.7553891782407408, + 0.7594950810185186, + 0.7613961226851851, + 0.7645341435185187, + 0.7631582754629629, + 0.7647099247685185 + ], + "val_f1": [ + 0.5731764705882353, + 0.8060118839566585, + 0.7079719124328789, + 0.4444444444444444, + 0.8385813722240636, + 0.576593720266413, + 0.7848771266540643, + 0.7870439314966493, + 0.8392554991539763, + 0.7931163486719043, + 0.7629513343799058, + 0.7762698720434277, + 0.7896930655551345, + 0.7971396311629657, + 0.7633647798742138 + ] + }, + "test_metrics": { + "accuracy": 0.6933333333333334, + "auc_roc": 0.7694782407407408, + "f1": 0.7703588143525741, + "confusion_matrix": [ + [ + 859, + 341 + ], + [ + 1131, + 2469 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.6766666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 388, + 812 + ] + ], + "n": 1200, + "detection_rate": 0.6766666666666666, + "pairwise_auc": 0.7611951388888889, + "pairwise_f1": 0.6901827454313643 + }, + "insight": { + "accuracy": 0.5225, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 573, + 627 + ] + ], + "n": 1200, + "detection_rate": 0.5225, + "pairwise_auc": 0.6666326388888889, + "pairwise_f1": 0.5784132841328413 + }, + "text2img": { + "accuracy": 0.8583333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 170, + 1030 + ] + ], + "n": 1200, + "detection_rate": 0.8583333333333333, + "pairwise_auc": 0.8806069444444444, + "pairwise_f1": 0.8012446518864255 + }, + "wiki": { + "accuracy": 0.7158333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 859, + 341 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.2841666666666667 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.69625, + "auc_roc": 0.7611951388888889, + "f1": 0.6901827454313643, + "confusion_matrix": [ + [ + 859, + 341 + ], + [ + 388, + 812 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6191666666666666, + "auc_roc": 0.6666326388888889, + "f1": 0.5784132841328413, + "confusion_matrix": [ + [ + 859, + 341 + ], + [ + 573, + 627 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7870833333333334, + "auc_roc": 0.8806069444444444, + "f1": 0.8012446518864255, + "confusion_matrix": [ + [ + 859, + 341 + ], + [ + 170, + 1030 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33655071553808674, + 0.321854180842638, + 0.3146107524909355, + 0.30974730443623333, + 0.30594114744000966, + 0.30158254981592847, + 0.29831719867609163, + 0.29523005148878806, + 0.2938832444449266, + 0.29102997652910373, + 0.2891627273073903, + 0.28717040626539125, + 0.2854978916545709, + 0.2848287137294257, + 0.28433997159202895 + ], + "train_acc": [ + 0.6290509259259259, + 0.6122685185185185, + 0.6407407407407407, + 0.6531828703703704, + 0.6608217592592592, + 0.6714699074074074, + 0.6833333333333333, + 0.6828703703703703, + 0.687962962962963, + 0.6897569444444445, + 0.6927083333333334, + 0.6972800925925926, + 0.6988425925925926, + 0.6990740740740741, + 0.7028356481481481 + ], + "train_auc": [ + 0.6172178283607681, + 0.6760196884287837, + 0.6983209644347279, + 0.713200928069273, + 0.7234593353480795, + 0.7350504490312071, + 0.7437294685213763, + 0.7502861636231138, + 0.7550577364111797, + 0.7604768589963421, + 0.7648405349794238, + 0.7698305416237997, + 0.7737597432984682, + 0.774700592278235, + 0.7764962973679699 + ], + "train_f1": [ + 0.7338481979737586, + 0.6984427041137816, + 0.7275520056174845, + 0.7378504877301956, + 0.744741082705457, + 0.7537200121469785, + 0.7647058823529411, + 0.7625444145939856, + 0.7676863420939251, + 0.7683933123082904, + 0.7714949651432997, + 0.7754646521011289, + 0.7762490325909365, + 0.776536312849162, + 0.7795096397440852 + ], + "val_loss": [ + 0.3331386372447014, + 0.33060519645611447, + 0.3087902384499709, + 0.32562951805690926, + 0.30596346855163575, + 0.3003089755773544, + 0.29601568169891834, + 0.29855594610174496, + 0.29671997502446174, + 0.302661178757747, + 0.2891139057775339, + 0.28976994206508, + 0.2897218874345223, + 0.2861236933618784, + 0.2861021713664134 + ], + "val_acc": [ + 0.6770833333333334, + 0.46041666666666664, + 0.615625, + 0.7239583333333334, + 0.6541666666666667, + 0.6296875, + 0.684375, + 0.6682291666666667, + 0.6447916666666667, + 0.7416666666666667, + 0.6958333333333333, + 0.7026041666666667, + 0.7104166666666667, + 0.6895833333333333, + 0.684375 + ], + "val_auc": [ + 0.6454716435185186, + 0.688054832175926, + 0.7210807291666667, + 0.7221657986111112, + 0.7302473958333334, + 0.7481141493055554, + 0.7527408854166666, + 0.7419885706018519, + 0.7501721643518519, + 0.763677662037037, + 0.7683622685185185, + 0.7682725694444444, + 0.7707986111111111, + 0.7764127604166666, + 0.7757638888888888 + ], + "val_f1": [ + 0.7818437719915552, + 0.4581589958158996, + 0.690176322418136, + 0.8199728260869565, + 0.7331189710610932, + 0.6988564167725541, + 0.766023166023166, + 0.7506849315068493, + 0.7184145334434352, + 0.8252290345313601, + 0.773467804499612, + 0.7809742999616417, + 0.7900302114803626, + 0.7677318784099766, + 0.7619795758051846 + ] + }, + "test_metrics": { + "accuracy": 0.695, + "auc_roc": 0.7747501157407407, + "f1": 0.7706048260733313, + "confusion_matrix": [ + [ + 877, + 323 + ], + [ + 1141, + 2459 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.665, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 402, + 798 + ] + ], + "n": 1200, + "detection_rate": 0.665, + "pairwise_auc": 0.7623197916666667, + "pairwise_f1": 0.6876346402412753 + }, + "insight": { + "accuracy": 0.5241666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 571, + 629 + ] + ], + "n": 1200, + "detection_rate": 0.5241666666666667, + "pairwise_auc": 0.6778611111111111, + "pairwise_f1": 0.5845724907063197 + }, + "text2img": { + "accuracy": 0.86, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 168, + 1032 + ] + ], + "n": 1200, + "detection_rate": 0.86, + "pairwise_auc": 0.8840694444444445, + "pairwise_f1": 0.8078277886497065 + }, + "wiki": { + "accuracy": 0.7308333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 877, + 323 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.26916666666666667 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6979166666666666, + "auc_roc": 0.7623197916666667, + "f1": 0.6876346402412753, + "confusion_matrix": [ + [ + 877, + 323 + ], + [ + 402, + 798 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6275, + "auc_roc": 0.6778611111111111, + "f1": 0.5845724907063197, + "confusion_matrix": [ + [ + 877, + 323 + ], + [ + 571, + 629 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7954166666666667, + "auc_roc": 0.8840694444444445, + "f1": 0.8078277886497065, + "confusion_matrix": [ + [ + 877, + 323 + ], + [ + 168, + 1032 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33558250480779894, + 0.32017956525087354, + 0.31335426673845007, + 0.30739296031770885, + 0.30210169360593514, + 0.2993158172954012, + 0.2966337667571174, + 0.29390896244181525, + 0.2896360481778781, + 0.2874575526901969, + 0.2853725731648781, + 0.28404737746825925, + 0.28240771815180776, + 0.28153970249825055, + 0.28095253192716174 + ], + "train_acc": [ + 0.6363425925925926, + 0.6223958333333334, + 0.6483217592592593, + 0.6615162037037037, + 0.6716435185185186, + 0.6791666666666667, + 0.6831597222222222, + 0.6952546296296296, + 0.6954861111111111, + 0.7033564814814814, + 0.7071759259259259, + 0.702488425925926, + 0.7075231481481481, + 0.7070601851851852, + 0.7079282407407408 + ], + "train_auc": [ + 0.6226339234682213, + 0.6819230824188386, + 0.7038211680526978, + 0.7208862561442616, + 0.7350942090334933, + 0.7414628611539781, + 0.7476869802383401, + 0.7551400230767034, + 0.7643345800468679, + 0.7707347125771604, + 0.7741903595821902, + 0.776510809613626, + 0.7813147915952218, + 0.7824827103337906, + 0.7839221643518518 + ], + "train_f1": [ + 0.7411008569545154, + 0.7088223481636843, + 0.7339200490389246, + 0.744974929147591, + 0.7538608363699462, + 0.7613430908308222, + 0.7639577495149816, + 0.7752454118651302, + 0.7733069102188523, + 0.7803205622696494, + 0.7832419465387251, + 0.7793278104476972, + 0.783776846068281, + 0.782821348893084, + 0.7834184439771703 + ], + "val_loss": [ + 0.3329686331252257, + 0.3172327414155006, + 0.32925771673520404, + 0.3023487875858943, + 0.30110858430465065, + 0.3045055687427521, + 0.2880402889102697, + 0.2909245935579141, + 0.2851847360531489, + 0.28721089474856853, + 0.28238330483436586, + 0.28091805552442867, + 0.27951585613191127, + 0.27924353666603563, + 0.27984871752560136 + ], + "val_acc": [ + 0.6786458333333333, + 0.5588541666666667, + 0.4609375, + 0.6270833333333333, + 0.6958333333333333, + 0.73125, + 0.7010416666666667, + 0.7083333333333334, + 0.6713541666666667, + 0.6578125, + 0.70625, + 0.7151041666666667, + 0.7, + 0.7010416666666667, + 0.7203125 + ], + "val_auc": [ + 0.6489416956018519, + 0.7007465277777777, + 0.710368923611111, + 0.7392505787037037, + 0.7437825520833332, + 0.7508210358796297, + 0.7698379629629629, + 0.765999710648148, + 0.7754774305555556, + 0.7733347800925926, + 0.7819343171296296, + 0.7852010995370371, + 0.786001880787037, + 0.7867426215277777, + 0.7875716145833334 + ], + "val_f1": [ + 0.785838250607428, + 0.6179521876409563, + 0.4572627163083377, + 0.6968670618120237, + 0.7804511278195488, + 0.8177966101694916, + 0.7794004611837049, + 0.7927461139896373, + 0.744637798462161, + 0.7252195734002509, + 0.7829099307159353, + 0.7908221797323136, + 0.7735849056603774, + 0.7738376674546887, + 0.7944890929965557 + ] + }, + "test_metrics": { + "accuracy": 0.7125, + "auc_roc": 0.7855067129629629, + "f1": 0.7879532882606023, + "confusion_matrix": [ + [ + 856, + 344 + ], + [ + 1036, + 2564 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.6883333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 374, + 826 + ] + ], + "n": 1200, + "detection_rate": 0.6883333333333334, + "pairwise_auc": 0.7672638888888889, + "pairwise_f1": 0.6970464135021097 + }, + "insight": { + "accuracy": 0.5441666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 547, + 653 + ] + ], + "n": 1200, + "detection_rate": 0.5441666666666667, + "pairwise_auc": 0.6868111111111111, + "pairwise_f1": 0.594446973145198 + }, + "text2img": { + "accuracy": 0.9041666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 115, + 1085 + ] + ], + "n": 1200, + "detection_rate": 0.9041666666666667, + "pairwise_auc": 0.902445138888889, + "pairwise_f1": 0.8254089007227082 + }, + "wiki": { + "accuracy": 0.7133333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 856, + 344 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.2866666666666666 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.7008333333333333, + "auc_roc": 0.7672638888888889, + "f1": 0.6970464135021097, + "confusion_matrix": [ + [ + 856, + 344 + ], + [ + 374, + 826 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.62875, + "auc_roc": 0.6868111111111111, + "f1": 0.594446973145198, + "confusion_matrix": [ + [ + 856, + 344 + ], + [ + 547, + 653 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.80875, + "auc_roc": 0.902445138888889, + "f1": 0.8254089007227082, + "confusion_matrix": [ + [ + 856, + 344 + ], + [ + 115, + 1085 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33431985822540744, + 0.31918365593861653, + 0.31112482442899986, + 0.3061287344881782, + 0.3020313317852992, + 0.2975266167687045, + 0.29550419957549484, + 0.2913145370229527, + 0.289265175798425, + 0.2858682273162736, + 0.28368009836585434, + 0.2820883631706238, + 0.2811974667564586, + 0.2801076131010497, + 0.28014156498290876 + ], + "train_acc": [ + 0.5418981481481482, + 0.6327546296296296, + 0.653587962962963, + 0.6692708333333334, + 0.6817129629629629, + 0.6865162037037037, + 0.6912615740740741, + 0.6932870370370371, + 0.6991898148148148, + 0.7071759259259259, + 0.7096064814814815, + 0.711400462962963, + 0.7132523148148148, + 0.7140046296296296, + 0.7130208333333333 + ], + "train_auc": [ + 0.6280291298725423, + 0.6886572198645404, + 0.7127941297296525, + 0.7274225626571789, + 0.7367016657378829, + 0.7485746742112482, + 0.7528753357910378, + 0.7610719325131459, + 0.7666531725108597, + 0.7739525105738454, + 0.7787101248142432, + 0.7829224001200275, + 0.7844403631544353, + 0.7865075767318245, + 0.786264271119113 + ], + "train_f1": [ + 0.6176953540036705, + 0.719550998762595, + 0.7376862401402279, + 0.7518992836987194, + 0.7637254059627115, + 0.7660750528997712, + 0.7716865665254419, + 0.7721018231854145, + 0.7767757450828824, + 0.7835571905210027, + 0.7858666894256209, + 0.7869440765582946, + 0.7884377268263524, + 0.7888936351986331, + 0.7886819789491627 + ], + "val_loss": [ + 0.32599874412020047, + 0.3156606926272313, + 0.3056748901804288, + 0.30458813918133576, + 0.2982352774590254, + 0.31187910462419194, + 0.2944885056465864, + 0.29124660082161424, + 0.2959661457687616, + 0.28813540091117223, + 0.285796557739377, + 0.2868611551821232, + 0.2848579311122497, + 0.28392290882766247, + 0.28408890453477703 + ], + "val_acc": [ + 0.5875, + 0.6958333333333333, + 0.6130208333333333, + 0.7, + 0.6333333333333333, + 0.7317708333333334, + 0.7177083333333333, + 0.6828125, + 0.7416666666666667, + 0.6765625, + 0.7072916666666667, + 0.659375, + 0.7177083333333333, + 0.7161458333333334, + 0.715625 + ], + "val_auc": [ + 0.6682689525462964, + 0.707142650462963, + 0.7284150752314815, + 0.7342404513888889, + 0.7437934027777777, + 0.7387912326388888, + 0.7560691550925926, + 0.7592816840277777, + 0.7636552372685185, + 0.7648054108796296, + 0.7714214409722222, + 0.7730041956018519, + 0.7741608796296297, + 0.7753616898148148, + 0.7752047164351852 + ], + "val_f1": [ + 0.665257819103973, + 0.7863935625457206, + 0.6863655550865344, + 0.7865085248332098, + 0.7090909090909091, + 0.8216141323172844, + 0.7982129560685034, + 0.7588118811881188, + 0.8241134751773049, + 0.751500600240096, + 0.7821705426356589, + 0.7295285359801489, + 0.7940729483282675, + 0.7911077041011882, + 0.7906441717791411 + ] + }, + "test_metrics": { + "accuracy": 0.7008333333333333, + "auc_roc": 0.779677662037037, + "f1": 0.7773643410852713, + "confusion_matrix": [ + [ + 857, + 343 + ], + [ + 1093, + 2507 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.675, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 390, + 810 + ] + ], + "n": 1200, + "detection_rate": 0.675, + "pairwise_auc": 0.7660281250000001, + "pairwise_f1": 0.6884827879303017 + }, + "insight": { + "accuracy": 0.5425, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 549, + 651 + ] + ], + "n": 1200, + "detection_rate": 0.5425, + "pairwise_auc": 0.6831673611111111, + "pairwise_f1": 0.593436645396536 + }, + "text2img": { + "accuracy": 0.8716666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 154, + 1046 + ] + ], + "n": 1200, + "detection_rate": 0.8716666666666667, + "pairwise_auc": 0.8898375, + "pairwise_f1": 0.8080339899575125 + }, + "wiki": { + "accuracy": 0.7141666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 857, + 343 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.2858333333333334 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6945833333333333, + "auc_roc": 0.7660281250000001, + "f1": 0.6884827879303017, + "confusion_matrix": [ + [ + 857, + 343 + ], + [ + 390, + 810 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6283333333333333, + "auc_roc": 0.6831673611111111, + "f1": 0.593436645396536, + "confusion_matrix": [ + [ + 857, + 343 + ], + [ + 549, + 651 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7929166666666667, + "auc_roc": 0.8898375, + "f1": 0.8080339899575125, + "confusion_matrix": [ + [ + 857, + 343 + ], + [ + 154, + 1046 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.7038749999999999, + "std": 0.010780721605409037, + "ci_95": 0.009449719131628547, + "values": [ + 0.7177083333333333, + 0.6933333333333334, + 0.695, + 0.7125, + 0.7008333333333333 + ] + }, + "auc_roc": { + "mean": 0.7786427777777778, + "std": 0.006594221502529234, + "ci_95": 0.005780089995031698, + "values": [ + 0.7838011574074074, + 0.7694782407407408, + 0.7747501157407407, + 0.7855067129629629, + 0.779677662037037 + ] + }, + "f1": { + "mean": 0.7801270993026636, + "std": 0.010693295797546843, + "ci_95": 0.009373087032276406, + "values": [ + 0.7943542267415389, + 0.7703588143525741, + 0.7706048260733313, + 0.7879532882606023, + 0.7773643410852713 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.6873333333333334, + "std": 0.026130016796354683, + "ci_95": 0.022903969573465258, + "values": [ + 0.7316666666666667, + 0.6766666666666666, + 0.665, + 0.6883333333333334, + 0.675 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.6873333333333334, + "std": 0.026130016796354683, + "ci_95": 0.022903969573465258, + "values": [ + 0.7316666666666667, + 0.6766666666666666, + 0.665, + 0.6883333333333334, + 0.675 + ] + }, + "pairwise_auc": { + "mean": 0.7688927777777778, + "std": 0.010786696159211689, + "ci_95": 0.009454956058936226, + "values": [ + 0.7876569444444443, + 0.7611951388888889, + 0.7623197916666667, + 0.7672638888888889, + 0.7660281250000001 + ] + }, + "pairwise_f1": { + "mean": 0.6960162561965204, + "std": 0.01215883795849285, + "ci_95": 0.010657691375417153, + "values": [ + 0.7167346938775511, + 0.6901827454313643, + 0.6876346402412753, + 0.6970464135021097, + 0.6884827879303017 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.5389999999999999, + "std": 0.016163229875244614, + "ci_95": 0.014167695648904941, + "values": [ + 0.5616666666666666, + 0.5225, + 0.5241666666666667, + 0.5441666666666667, + 0.5425 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.5389999999999999, + "std": 0.016163229875244614, + "ci_95": 0.014167695648904941, + "values": [ + 0.5616666666666666, + 0.5225, + 0.5241666666666667, + 0.5441666666666667, + 0.5425 + ] + }, + "pairwise_auc": { + "mean": 0.6780033333333334, + "std": 0.007739560487354743, + "ci_95": 0.006784023879353069, + "values": [ + 0.6755444444444445, + 0.6666326388888889, + 0.6778611111111111, + 0.6868111111111111, + 0.6831673611111111 + ] + }, + "pairwise_f1": { + "mean": 0.5902094975541843, + "std": 0.008641037303778168, + "ci_95": 0.007574203148484918, + "values": [ + 0.6001780943900267, + 0.5784132841328413, + 0.5845724907063197, + 0.594446973145198, + 0.593436645396536 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.8763333333333334, + "std": 0.019441864908262052, + "ci_95": 0.01704154596534273, + "values": [ + 0.8875, + 0.8583333333333333, + 0.86, + 0.9041666666666667, + 0.8716666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8763333333333334, + "std": 0.019441864908262052, + "ci_95": 0.01704154596534273, + "values": [ + 0.8875, + 0.8583333333333333, + 0.86, + 0.9041666666666667, + 0.8716666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.8890322222222222, + "std": 0.008319531316208516, + "ci_95": 0.007292390725080371, + "values": [ + 0.8882020833333333, + 0.8806069444444444, + 0.8840694444444445, + 0.902445138888889, + 0.8898375 + ] + }, + "pairwise_f1": { + "mean": 0.8100502789850225, + "std": 0.009052441761187644, + "ci_95": 0.007934815055026407, + "values": [ + 0.8077360637087599, + 0.8012446518864255, + 0.8078277886497065, + 0.8254089007227082, + 0.8080339899575125 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.7128333333333334, + "std": 0.014632061448142664, + "ci_95": 0.012825567347208395, + "values": [ + 0.69, + 0.7158333333333333, + 0.7308333333333333, + 0.7133333333333334, + 0.7141666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.2871666666666667, + "std": 0.014632061448142664, + "ci_95": 0.012825567347208395, + "values": [ + 0.31000000000000005, + 0.2841666666666667, + 0.26916666666666667, + 0.2866666666666666, + 0.2858333333333334 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.7000833333333333, + "std": 0.006437466289016363, + "ci_95": 0.005642687992240727, + "values": [ + 0.7108333333333333, + 0.69625, + 0.6979166666666666, + 0.7008333333333333, + 0.6945833333333333 + ] + }, + "auc_roc": { + "mean": 0.7688927777777778, + "std": 0.010786696159211689, + "ci_95": 0.009454956058936226, + "values": [ + 0.7876569444444443, + 0.7611951388888889, + 0.7623197916666667, + 0.7672638888888889, + 0.7660281250000001 + ] + }, + "f1": { + "mean": 0.6960162561965204, + "std": 0.01215883795849285, + "ci_95": 0.010657691375417153, + "values": [ + 0.7167346938775511, + 0.6901827454313643, + 0.6876346402412753, + 0.6970464135021097, + 0.6884827879303017 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.6259166666666667, + "std": 0.0039352396510392035, + "ci_95": 0.003449389640050674, + "values": [ + 0.6258333333333334, + 0.6191666666666666, + 0.6275, + 0.62875, + 0.6283333333333333 + ] + }, + "auc_roc": { + "mean": 0.6780033333333334, + "std": 0.007739560487354743, + "ci_95": 0.006784023879353069, + "values": [ + 0.6755444444444445, + 0.6666326388888889, + 0.6778611111111111, + 0.6868111111111111, + 0.6831673611111111 + ] + }, + "f1": { + "mean": 0.5902094975541843, + "std": 0.008641037303778168, + "ci_95": 0.007574203148484918, + "values": [ + 0.6001780943900267, + 0.5784132841328413, + 0.5845724907063197, + 0.594446973145198, + 0.593436645396536 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.7945833333333333, + "std": 0.008579691784155822, + "ci_95": 0.007520431429704598, + "values": [ + 0.78875, + 0.7870833333333334, + 0.7954166666666667, + 0.80875, + 0.7929166666666667 + ] + }, + "auc_roc": { + "mean": 0.8890322222222222, + "std": 0.008319531316208516, + "ci_95": 0.007292390725080371, + "values": [ + 0.8882020833333333, + 0.8806069444444444, + 0.8840694444444445, + 0.902445138888889, + 0.8898375 + ] + }, + "f1": { + "mean": 0.8100502789850225, + "std": 0.009052441761187644, + "ci_95": 0.007934815055026407, + "values": [ + 0.8077360637087599, + 0.8012446518864255, + 0.8078277886497065, + 0.8254089007227082, + 0.8080339899575125 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p1_simplecnn_baseline", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 128, + "subsample": 0.2, + "augment": false + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2a_t1_original.json b/classifier/outputs/logs/p2a_t1_original.json new file mode 100644 index 0000000..817c508 --- /dev/null +++ b/classifier/outputs/logs/p2a_t1_original.json @@ -0,0 +1,1714 @@ +{ + "run_name": "p2a_t1_original", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19722581385048452, + 0.10264252484603613, + 0.05920427175566416, + 0.0366648331324945, + 0.027175970428840998, + 0.016415510239635802, + 0.010992527571498713, + 0.006630335180751798, + 0.005149287341508305, + 0.0026562969974941518, + 0.0014051771179260362, + 0.0008840179361898124, + 0.0004661208866591719 + ], + "train_acc": [ + 0.811863425925926, + 0.9195023148148148, + 0.9572916666666667, + 0.9757523148148148, + 0.9828703703703704, + 0.9909143518518518, + 0.9928819444444444, + 0.9958912037037037, + 0.9965277777777778, + 0.9983217592592593, + 0.9991898148148148, + 0.9995949074074074, + 0.9997685185185186 + ], + "train_auc": [ + 0.9015951949731367, + 0.9747272858796296, + 0.9914408382630315, + 0.9967484800097165, + 0.9982575392232511, + 0.9990713323759717, + 0.9995895847622315, + 0.9998830982796068, + 0.9999293499228395, + 0.9999699663494512, + 0.9999906585791039, + 0.9999967135345222, + 0.9999998213877457 + ], + "train_f1": [ + 0.8662635237977704, + 0.9450913827813524, + 0.9712078651685393, + 0.9837527628058474, + 0.9885422311682279, + 0.9939328361092863, + 0.9952441712098364, + 0.9972579461630556, + 0.9976841130152848, + 0.9988807842229169, + 0.9994596263702331, + 0.999729886166313, + 0.9998456551937027 + ], + "val_loss": [ + 0.14872334962710737, + 0.15591826991488536, + 0.18881629154008503, + 0.18318608577440804, + 0.2586083890666487, + 0.21436443397154412, + 0.4333447159598412, + 0.21543170170140608, + 0.2493383682408118, + 0.28536541290159223, + 0.27383037212348427, + 0.31338123660993916, + 0.26701862792315295 + ], + "val_acc": [ + 0.8822916666666667, + 0.8833333333333333, + 0.9041666666666667, + 0.9015625, + 0.9015625, + 0.8963541666666667, + 0.9036458333333334, + 0.9052083333333333, + 0.91875, + 0.9208333333333333, + 0.9270833333333334, + 0.9229166666666667, + 0.9270833333333334 + ], + "val_auc": [ + 0.9479615162037038, + 0.9493648726851852, + 0.9572620081018519, + 0.9601273148148147, + 0.9543258101851851, + 0.9585423900462964, + 0.9473763020833333, + 0.9606532118055556, + 0.9584085648148147, + 0.9560619212962964, + 0.95689453125, + 0.9558637152777778, + 0.9571223958333335 + ], + "val_f1": [ + 0.919054441260745, + 0.92, + 0.9360222531293463, + 0.9338929695697796, + 0.9347600966517087, + 0.9286993908993192, + 0.9385177799933533, + 0.9352773826458037, + 0.9456824512534819, + 0.9479095270733379, + 0.9519560741249142, + 0.949419002050581, + 0.9516574585635359 + ] + }, + "test_metrics": { + "accuracy": 0.914375, + "auc_roc": 0.9687729166666665, + "f1": 0.9419409521118802, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 266, + 3334 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9333333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 80, + 1120 + ] + ], + "n": 1200, + "detection_rate": 0.9333333333333333, + "pairwise_auc": 0.9714215277777778, + "pairwise_f1": 0.9087221095334685 + }, + "insight": { + "accuracy": 0.8858333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 137, + 1063 + ] + ], + "n": 1200, + "detection_rate": 0.8858333333333334, + "pairwise_auc": 0.9548541666666667, + "pairwise_f1": 0.882890365448505 + }, + "text2img": { + "accuracy": 0.9591666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 49, + 1151 + ] + ], + "n": 1200, + "detection_rate": 0.9591666666666666, + "pairwise_auc": 0.9800430555555556, + "pairwise_f1": 0.9222756410256411 + }, + "wiki": { + "accuracy": 0.8791666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.12083333333333335 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.90625, + "auc_roc": 0.9714215277777778, + "f1": 0.9087221095334685, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 80, + 1120 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8825, + "auc_roc": 0.9548541666666667, + "f1": 0.882890365448505, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 137, + 1063 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9191666666666667, + "auc_roc": 0.9800430555555556, + "f1": 0.9222756410256411, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 49, + 1151 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.20000588447545414, + 0.10408794169003764, + 0.058041512501788226, + 0.034372955076267114, + 0.025223094800377214, + 0.015196698343168936, + 0.010545113944270405, + 0.007307375633192553 + ], + "train_acc": [ + 0.8090277777777778, + 0.9175925925925926, + 0.9587962962962963, + 0.9762731481481481, + 0.9840277777777777, + 0.9907986111111111, + 0.9944444444444445, + 0.9953703703703703 + ], + "train_auc": [ + 0.8993097082904664, + 0.9743819390860768, + 0.9915667866941016, + 0.9970922103623686, + 0.9983073184585048, + 0.9992988843878601, + 0.9995584437157066, + 0.9998731674382716 + ], + "train_f1": [ + 0.8646098301468779, + 0.9437198640423682, + 0.9722157184109889, + 0.9840974323171204, + 0.9893130953302873, + 0.993854122376406, + 0.9962934362934363, + 0.9969111969111969 + ], + "val_loss": [ + 0.14092905839594702, + 0.13848712935190027, + 0.15262191738778105, + 0.2710734717196222, + 0.29285608716936623, + 0.25471851951188, + 0.20351220176477605, + 0.2155651534909945 + ], + "val_acc": [ + 0.884375, + 0.8989583333333333, + 0.9020833333333333, + 0.9114583333333334, + 0.9135416666666667, + 0.909375, + 0.9078125, + 0.9104166666666667 + ], + "val_auc": [ + 0.9523307291666666, + 0.9595883969907405, + 0.9632103587962961, + 0.9511574074074074, + 0.9564756944444445, + 0.9586291956018518, + 0.9618590856481481, + 0.9612919560185186 + ], + "val_f1": [ + 0.9203158650394831, + 0.9312544294826365, + 0.9338959212376934, + 0.9420980926430518, + 0.943728813559322, + 0.9403292181069959, + 0.9373007438894793, + 0.9392226148409893 + ] + }, + "test_metrics": { + "accuracy": 0.905625, + "auc_roc": 0.9629736111111112, + "f1": 0.9361881955204958, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 277, + 3323 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9516666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 58, + 1142 + ] + ], + "n": 1200, + "detection_rate": 0.9516666666666667, + "pairwise_auc": 0.9716375000000002, + "pairwise_f1": 0.9070691024622717 + }, + "insight": { + "accuracy": 0.8775, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 147, + 1053 + ] + ], + "n": 1200, + "detection_rate": 0.8775, + "pairwise_auc": 0.9462055555555556, + "pairwise_f1": 0.8670234664470976 + }, + "text2img": { + "accuracy": 0.94, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 72, + 1128 + ] + ], + "n": 1200, + "detection_rate": 0.94, + "pairwise_auc": 0.9710777777777778, + "pairwise_f1": 0.9009584664536742 + }, + "wiki": { + "accuracy": 0.8533333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.1466666666666666 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9025, + "auc_roc": 0.9716375000000002, + "f1": 0.9070691024622717, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 58, + 1142 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8654166666666666, + "auc_roc": 0.9462055555555556, + "f1": 0.8670234664470976, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 147, + 1053 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8966666666666666, + "auc_roc": 0.9710777777777778, + "f1": 0.9009584664536742, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 72, + 1128 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.1954930212141739, + 0.10281271817921489, + 0.06016657212274839, + 0.03787446872965019, + 0.026305097892300518, + 0.017765212520370603, + 0.010911685225935932, + 0.009139342036825029, + 0.004632529768130351, + 0.004187346089508571, + 0.0017877653261907484, + 0.0007197948908006529, + 0.0007150228552733198 + ], + "train_acc": [ + 0.8079282407407408, + 0.9170717592592592, + 0.9578125, + 0.9766782407407407, + 0.9836226851851851, + 0.9901620370370371, + 0.9935763888888889, + 0.9946759259259259, + 0.9974537037037037, + 0.9972222222222222, + 0.9992476851851851, + 0.9995949074074074, + 0.9996527777777777 + ], + "train_auc": [ + 0.9030463123713992, + 0.9746345593278463, + 0.9914829728938043, + 0.996350594421582, + 0.9981514256830133, + 0.9991553515803613, + 0.9995712055612711, + 0.9995691515203475, + 0.9999019329418153, + 0.9999532482424555, + 0.9999925340077732, + 0.9999994820244628, + 0.9999990890775035 + ], + "train_f1": [ + 0.8626298580356774, + 0.943236284412755, + 0.9716154654830044, + 0.9843780284529209, + 0.9890501064035597, + 0.9934271574389112, + 0.9957121335033028, + 0.9964456807294081, + 0.9983014206300185, + 0.9981467181467182, + 0.999498437439716, + 0.9997299070108423, + 0.9997685006559148 + ], + "val_loss": [ + 0.19500481635332106, + 0.1268259635893628, + 0.13148466631925354, + 0.20869781834092768, + 0.2388368578608303, + 0.22051208737151076, + 0.21102181890407035, + 0.19602018855754674, + 0.2702031286339964, + 0.3234737387955799, + 0.2604731284258378, + 0.25106819690430104, + 0.25952417930657246 + ], + "val_acc": [ + 0.7515625, + 0.9072916666666667, + 0.9057291666666667, + 0.9171875, + 0.9114583333333334, + 0.9234375, + 0.9083333333333333, + 0.9270833333333334, + 0.9255208333333333, + 0.9197916666666667, + 0.9302083333333333, + 0.9270833333333334, + 0.9348958333333334 + ], + "val_auc": [ + 0.9487427662037037, + 0.9660561342592593, + 0.9677799479166665, + 0.9676699942129628, + 0.9617209201388888, + 0.9637868923611111, + 0.9642853009259259, + 0.9688606770833333, + 0.9638433159722222, + 0.9533347800925925, + 0.9644741030092593, + 0.9622547743055555, + 0.9640183738425926 + ], + "val_f1": [ + 0.8039457459926017, + 0.9366096866096866, + 0.9354263289332858, + 0.9457523029682702, + 0.9418604651162791, + 0.9491876944348427, + 0.9376328844790929, + 0.9510831586303284, + 0.9508083935328517, + 0.9471879286694102, + 0.9538567493112947, + 0.9514899514899515, + 0.9568221070811744 + ] + }, + "test_metrics": { + "accuracy": 0.9239583333333333, + "auc_roc": 0.9678178240740741, + "f1": 0.9493547939503261, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 179, + 3421 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9625, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 45, + 1155 + ] + ], + "n": 1200, + "detection_rate": 0.9625, + "pairwise_auc": 0.9738322916666667, + "pairwise_f1": 0.9090909090909091 + }, + "insight": { + "accuracy": 0.9158333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 101, + 1099 + ] + ], + "n": 1200, + "detection_rate": 0.9158333333333334, + "pairwise_auc": 0.9491239583333334, + "pairwise_f1": 0.8845070422535212 + }, + "text2img": { + "accuracy": 0.9725, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 33, + 1167 + ] + ], + "n": 1200, + "detection_rate": 0.9725, + "pairwise_auc": 0.9804972222222222, + "pairwise_f1": 0.9142185663924794 + }, + "wiki": { + "accuracy": 0.845, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.15500000000000003 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.90375, + "auc_roc": 0.9738322916666667, + "f1": 0.9090909090909091, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 45, + 1155 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8804166666666666, + "auc_roc": 0.9491239583333334, + "f1": 0.8845070422535212, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 101, + 1099 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.90875, + "auc_roc": 0.9804972222222222, + "f1": 0.9142185663924794, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 33, + 1167 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19490147646240613, + 0.10359700576022819, + 0.06170826292924445, + 0.04011154976345737, + 0.02456697733448689, + 0.017018179965196868, + 0.014746911858236234, + 0.009014791383426065 + ], + "train_acc": [ + 0.8076967592592592, + 0.9194444444444444, + 0.9548032407407407, + 0.975, + 0.9853009259259259, + 0.9898726851851852, + 0.992824074074074, + 0.9953703703703703 + ], + "train_auc": [ + 0.9043987822216506, + 0.9743296414180384, + 0.9908415941500914, + 0.9959990587134202, + 0.9984063857453134, + 0.99924799775663, + 0.9991502343392775, + 0.9995339827674896 + ], + "train_f1": [ + 0.8621790883828958, + 0.9450106660346054, + 0.9695052906954044, + 0.9832376222256712, + 0.990171800030955, + 0.9932367149758454, + 0.9952123552123552, + 0.9969107198022861 + ], + "val_loss": [ + 0.22769305457671482, + 0.17312400991407534, + 0.14870864135834078, + 0.1872468259287416, + 0.2433047288354525, + 0.3240897962289788, + 0.2584284867192613, + 0.23186856481673507 + ], + "val_acc": [ + 0.746875, + 0.8453125, + 0.9104166666666667, + 0.9161458333333333, + 0.9234375, + 0.9229166666666667, + 0.9208333333333333, + 0.9223958333333333 + ], + "val_auc": [ + 0.9224739583333335, + 0.9507523148148148, + 0.9644314236111111, + 0.9619386574074074, + 0.9605664062500001, + 0.9496766493055555, + 0.9632212094907407, + 0.9596093749999999 + ], + "val_f1": [ + 0.8008196721311476, + 0.8879668049792531, + 0.9398180545836249, + 0.943921978404737, + 0.9496402877697842, + 0.9499323410013532, + 0.9481935923653715, + 0.9479566887879847 + ] + }, + "test_metrics": { + "accuracy": 0.9035416666666667, + "auc_roc": 0.9612076388888888, + "f1": 0.935126803979263, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 263, + 3337 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9475, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 63, + 1137 + ] + ], + "n": 1200, + "detection_rate": 0.9475, + "pairwise_auc": 0.9671291666666667, + "pairwise_f1": 0.8963342530547891 + }, + "insight": { + "accuracy": 0.8941666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 127, + 1073 + ] + ], + "n": 1200, + "detection_rate": 0.8941666666666667, + "pairwise_auc": 0.9488027777777779, + "pairwise_f1": 0.8677719369187222 + }, + "text2img": { + "accuracy": 0.9391666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 73, + 1127 + ] + ], + "n": 1200, + "detection_rate": 0.9391666666666667, + "pairwise_auc": 0.9676909722222221, + "pairwise_f1": 0.8919667590027701 + }, + "wiki": { + "accuracy": 0.8333333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.16666666666666663 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8904166666666666, + "auc_roc": 0.9671291666666667, + "f1": 0.8963342530547891, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 63, + 1137 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.86375, + "auc_roc": 0.9488027777777779, + "f1": 0.8677719369187222, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 127, + 1073 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.88625, + "auc_roc": 0.9676909722222221, + "f1": 0.8919667590027701, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 73, + 1127 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19957470789689707, + 0.10425364971505822, + 0.06086116745007328, + 0.04081417578606677, + 0.028762150054670334, + 0.019413475335063088, + 0.011313071529999703, + 0.006697674415297823, + 0.004091889076434436, + 0.003935401036381302, + 0.0018815870159793727 + ], + "train_acc": [ + 0.8087962962962963, + 0.9174189814814815, + 0.9564236111111111, + 0.9734953703703704, + 0.982175925925926, + 0.9881365740740741, + 0.9929976851851852, + 0.9956597222222222, + 0.9976273148148148, + 0.9975694444444444, + 0.9990740740740741 + ], + "train_auc": [ + 0.8990732167352539, + 0.97397518539952, + 0.9910111954160951, + 0.9960041848851166, + 0.9979256508630543, + 0.9990071837848651, + 0.9996136527634888, + 0.9998784186385459, + 0.9999477291237998, + 0.9999236879143805, + 0.9999895779749657 + ], + "train_f1": [ + 0.8639433371767419, + 0.943696981653186, + 0.9706238052510435, + 0.9822273961971284, + 0.9880712625871417, + 0.9920742315870869, + 0.9953240329249913, + 0.9971021212472471, + 0.9984171717561673, + 0.9983787539566124, + 0.9993825254708243 + ], + "val_loss": [ + 0.16892356189588706, + 0.23779966595272223, + 0.15918385590387818, + 0.18508020597219002, + 0.33955128504943183, + 0.17744567066595968, + 0.18761360666442972, + 0.23809361916598087, + 0.24717846481992942, + 0.3055811640280202, + 0.2333151061244583 + ], + "val_acc": [ + 0.8296875, + 0.7703125, + 0.9161458333333333, + 0.903125, + 0.9098958333333333, + 0.9177083333333333, + 0.9177083333333333, + 0.9213541666666667, + 0.9182291666666667, + 0.9239583333333333, + 0.9291666666666667 + ], + "val_auc": [ + 0.945900607638889, + 0.924205005787037, + 0.9659411168981483, + 0.9623741319444445, + 0.9572952835648147, + 0.9675354456018519, + 0.9642780671296296, + 0.9610481770833332, + 0.9629398148148148, + 0.9584772858796297, + 0.9648719618055557 + ], + "val_f1": [ + 0.8744721689059501, + 0.8242327620565962, + 0.9441166261714682, + 0.9346451159522137, + 0.9422370617696161, + 0.9443661971830986, + 0.9439716312056737, + 0.9474416985729203, + 0.945353289244692, + 0.9501706484641638, + 0.9527449617790132 + ] + }, + "test_metrics": { + "accuracy": 0.9185416666666667, + "auc_roc": 0.9692168981481482, + "f1": 0.9447974022306932, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 254, + 3346 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9375, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 75, + 1125 + ] + ], + "n": 1200, + "detection_rate": 0.9375, + "pairwise_auc": 0.9709586805555556, + "pairwise_f1": 0.9138911454102355 + }, + "insight": { + "accuracy": 0.8783333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 146, + 1054 + ] + ], + "n": 1200, + "detection_rate": 0.8783333333333333, + "pairwise_auc": 0.9507531250000001, + "pairwise_f1": 0.8816394813885403 + }, + "text2img": { + "accuracy": 0.9725, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 33, + 1167 + ] + ], + "n": 1200, + "detection_rate": 0.9725, + "pairwise_auc": 0.9859388888888889, + "pairwise_f1": 0.9321086261980831 + }, + "wiki": { + "accuracy": 0.8858333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.11416666666666664 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9116666666666666, + "auc_roc": 0.9709586805555556, + "f1": 0.9138911454102355, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 75, + 1125 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8820833333333333, + "auc_roc": 0.9507531250000001, + "f1": 0.8816394813885403, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 146, + 1054 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9291666666666667, + "auc_roc": 0.9859388888888889, + "f1": 0.9321086261980831, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 33, + 1167 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.9132083333333334, + "std": 0.008606965848143645, + "ci_95": 0.007544338200855582, + "values": [ + 0.914375, + 0.905625, + 0.9239583333333333, + 0.9035416666666667, + 0.9185416666666667 + ] + }, + "auc_roc": { + "mean": 0.9659977777777777, + "std": 0.0036560810622455297, + "ci_95": 0.003204696348280812, + "values": [ + 0.9687729166666665, + 0.9629736111111112, + 0.9678178240740741, + 0.9612076388888888, + 0.9692168981481482 + ] + }, + "f1": { + "mean": 0.9414816295585317, + "std": 0.0059497120774801284, + "ci_95": 0.005215152575504448, + "values": [ + 0.9419409521118802, + 0.9361881955204958, + 0.9493547939503261, + 0.935126803979263, + 0.9447974022306932 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9465, + "std": 0.011598012281804541, + "ci_95": 0.01016610599546902, + "values": [ + 0.9333333333333333, + 0.9516666666666667, + 0.9625, + 0.9475, + 0.9375 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9465, + "std": 0.011598012281804541, + "ci_95": 0.01016610599546902, + "values": [ + 0.9333333333333333, + 0.9516666666666667, + 0.9625, + 0.9475, + 0.9375 + ] + }, + "pairwise_auc": { + "mean": 0.9709958333333335, + "std": 0.0024285252308698644, + "ci_95": 0.0021286962205089677, + "values": [ + 0.9714215277777778, + 0.9716375000000002, + 0.9738322916666667, + 0.9671291666666667, + 0.9709586805555556 + ] + }, + "pairwise_f1": { + "mean": 0.9070215039103348, + "std": 0.006492034890839102, + "ci_95": 0.00569051948066093, + "values": [ + 0.9087221095334685, + 0.9070691024622717, + 0.9090909090909091, + 0.8963342530547891, + 0.9138911454102355 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8903333333333332, + "std": 0.015763001688200867, + "ci_95": 0.013816880175270344, + "values": [ + 0.8858333333333334, + 0.8775, + 0.9158333333333334, + 0.8941666666666667, + 0.8783333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8903333333333332, + "std": 0.015763001688200867, + "ci_95": 0.013816880175270344, + "values": [ + 0.8858333333333334, + 0.8775, + 0.9158333333333334, + 0.8941666666666667, + 0.8783333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.9499479166666667, + "std": 0.0031904218615427878, + "ci_95": 0.0027965280624499492, + "values": [ + 0.9548541666666667, + 0.9462055555555556, + 0.9491239583333334, + 0.9488027777777779, + 0.9507531250000001 + ] + }, + "pairwise_f1": { + "mean": 0.8767664584912772, + "std": 0.008616735719668613, + "ci_95": 0.007552901870825199, + "values": [ + 0.882890365448505, + 0.8670234664470976, + 0.8845070422535212, + 0.8677719369187222, + 0.8816394813885403 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9566666666666667, + "std": 0.01652018966799919, + "ci_95": 0.014480584702743658, + "values": [ + 0.9591666666666666, + 0.94, + 0.9725, + 0.9391666666666667, + 0.9725 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9566666666666667, + "std": 0.01652018966799919, + "ci_95": 0.014480584702743658, + "values": [ + 0.9591666666666666, + 0.94, + 0.9725, + 0.9391666666666667, + 0.9725 + ] + }, + "pairwise_auc": { + "mean": 0.9770495833333334, + "std": 0.007468482219229771, + "ci_95": 0.006546413300930931, + "values": [ + 0.9800430555555556, + 0.9710777777777778, + 0.9804972222222222, + 0.9676909722222221, + 0.9859388888888889 + ] + }, + "pairwise_f1": { + "mean": 0.9123056118145296, + "std": 0.01610626351688794, + "ci_95": 0.014117762434216218, + "values": [ + 0.9222756410256411, + 0.9009584664536742, + 0.9142185663924794, + 0.8919667590027701, + 0.9321086261980831 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.8593333333333334, + "std": 0.022433543832593387, + "ci_95": 0.01966386816247279, + "values": [ + 0.8791666666666667, + 0.8533333333333334, + 0.845, + 0.8333333333333334, + 0.8858333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.14066666666666666, + "std": 0.022433543832593383, + "ci_95": 0.019663868162472786, + "values": [ + 0.12083333333333335, + 0.1466666666666666, + 0.15500000000000003, + 0.16666666666666663, + 0.11416666666666664 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.9029166666666665, + "std": 0.007822909731466082, + "ci_95": 0.006857082713029891, + "values": [ + 0.90625, + 0.9025, + 0.90375, + 0.8904166666666666, + 0.9116666666666666 + ] + }, + "auc_roc": { + "mean": 0.9709958333333335, + "std": 0.0024285252308698644, + "ci_95": 0.0021286962205089677, + "values": [ + 0.9714215277777778, + 0.9716375000000002, + 0.9738322916666667, + 0.9671291666666667, + 0.9709586805555556 + ] + }, + "f1": { + "mean": 0.9070215039103348, + "std": 0.006492034890839102, + "ci_95": 0.00569051948066093, + "values": [ + 0.9087221095334685, + 0.9070691024622717, + 0.9090909090909091, + 0.8963342530547891, + 0.9138911454102355 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8748333333333334, + "std": 0.009407812946931092, + "ci_95": 0.008246311633424692, + "values": [ + 0.8825, + 0.8654166666666666, + 0.8804166666666666, + 0.86375, + 0.8820833333333333 + ] + }, + "auc_roc": { + "mean": 0.9499479166666667, + "std": 0.0031904218615427878, + "ci_95": 0.0027965280624499492, + "values": [ + 0.9548541666666667, + 0.9462055555555556, + 0.9491239583333334, + 0.9488027777777779, + 0.9507531250000001 + ] + }, + "f1": { + "mean": 0.8767664584912772, + "std": 0.008616735719668613, + "ci_95": 0.007552901870825199, + "values": [ + 0.882890365448505, + 0.8670234664470976, + 0.8845070422535212, + 0.8677719369187222, + 0.8816394813885403 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.908, + "std": 0.01713761700665141, + "ci_95": 0.015021783626897774, + "values": [ + 0.9191666666666667, + 0.8966666666666666, + 0.90875, + 0.88625, + 0.9291666666666667 + ] + }, + "auc_roc": { + "mean": 0.9770495833333334, + "std": 0.007468482219229771, + "ci_95": 0.006546413300930931, + "values": [ + 0.9800430555555556, + 0.9710777777777778, + 0.9804972222222222, + 0.9676909722222221, + 0.9859388888888889 + ] + }, + "f1": { + "mean": 0.9123056118145296, + "std": 0.01610626351688794, + "ci_95": 0.014117762434216218, + "values": [ + 0.9222756410256411, + 0.9009584664536742, + 0.9142185663924794, + 0.8919667590027701, + 0.9321086261980831 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2a_t1_original", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "normalization": "imagenet" + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2a_t2_real_norm.json b/classifier/outputs/logs/p2a_t2_real_norm.json new file mode 100644 index 0000000..9164364 --- /dev/null +++ b/classifier/outputs/logs/p2a_t2_real_norm.json @@ -0,0 +1,1698 @@ +{ + "run_name": "p2a_t2_real_norm", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19635879239412368, + 0.10250425095049044, + 0.06135965930815372, + 0.036744935404837, + 0.022988288751393728, + 0.017310182141119623, + 0.012009648638006722, + 0.00723629916516007 + ], + "train_acc": [ + 0.8133680555555556, + 0.9196180555555555, + 0.9576388888888889, + 0.9769097222222223, + 0.9848958333333333, + 0.9891203703703704, + 0.9923611111111111, + 0.9959490740740741 + ], + "train_auc": [ + 0.9026075335076589, + 0.9748221825703017, + 0.9909446087677184, + 0.9966968253457933, + 0.9986351791123687, + 0.9992185267346823, + 0.9993304362425697, + 0.9997806462905807 + ], + "train_f1": [ + 0.8675292667898953, + 0.9451486790664613, + 0.9714953271028037, + 0.9845342842745843, + 0.9899005533413303, + 0.9927300850734725, + 0.9948967756900952, + 0.9972960444993819 + ], + "val_loss": [ + 0.15108177385603389, + 0.15344043526177606, + 0.1912794195076761, + 0.227002287551295, + 0.2509125987601389, + 0.22423927540269992, + 0.28618341417237997, + 0.24878037015926868 + ], + "val_acc": [ + 0.8697916666666666, + 0.8697916666666666, + 0.9052083333333333, + 0.9078125, + 0.9036458333333334, + 0.8958333333333334, + 0.9213541666666667, + 0.9114583333333334 + ], + "val_auc": [ + 0.9453624131944445, + 0.948683449074074, + 0.9581669560185186, + 0.9581069155092593, + 0.9493959780092592, + 0.9546412037037038, + 0.9552770543981483, + 0.957332175925926 + ], + "val_f1": [ + 0.9095513748191028, + 0.9088256746900073, + 0.9368932038834952, + 0.9394043135912359, + 0.9357861853523082, + 0.9283667621776505, + 0.9484817468440805, + 0.9406838799720866 + ] + }, + "test_metrics": { + "accuracy": 0.9114583333333334, + "auc_roc": 0.9665831018518519, + "f1": 0.941241531867828, + "confusion_matrix": [ + [ + 971, + 229 + ], + [ + 196, + 3404 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9558333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 53, + 1147 + ] + ], + "n": 1200, + "detection_rate": 0.9558333333333333, + "pairwise_auc": 0.9706750000000001, + "pairwise_f1": 0.890527950310559 + }, + "insight": { + "accuracy": 0.9025, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 117, + 1083 + ] + ], + "n": 1200, + "detection_rate": 0.9025, + "pairwise_auc": 0.946836111111111, + "pairwise_f1": 0.8622611464968153 + }, + "text2img": { + "accuracy": 0.9783333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 26, + 1174 + ] + ], + "n": 1200, + "detection_rate": 0.9783333333333334, + "pairwise_auc": 0.9822381944444445, + "pairwise_f1": 0.9020361121782559 + }, + "wiki": { + "accuracy": 0.8091666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 971, + 229 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.1908333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8825, + "auc_roc": 0.9706750000000001, + "f1": 0.890527950310559, + "confusion_matrix": [ + [ + 971, + 229 + ], + [ + 53, + 1147 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8558333333333333, + "auc_roc": 0.946836111111111, + "f1": 0.8622611464968153, + "confusion_matrix": [ + [ + 971, + 229 + ], + [ + 117, + 1083 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.89375, + "auc_roc": 0.9822381944444445, + "f1": 0.9020361121782559, + "confusion_matrix": [ + [ + 971, + 229 + ], + [ + 26, + 1174 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19605547449103108, + 0.1016867975462918, + 0.05938513064843223, + 0.039155238193214044, + 0.02408102286992491, + 0.016763887942512087, + 0.012361552757432004, + 0.006711566793920712, + 0.004867987432482079, + 0.0033510766653097124, + 0.0017171694248005493 + ], + "train_acc": [ + 0.8157407407407408, + 0.9196180555555555, + 0.9558449074074075, + 0.9753472222222223, + 0.9855902777777777, + 0.9895833333333334, + 0.9928819444444444, + 0.9956597222222222, + 0.997337962962963, + 0.997800925925926, + 0.9990740740740741 + ], + "train_auc": [ + 0.9033538469507316, + 0.9751240462105624, + 0.9914933413351623, + 0.9962482763917466, + 0.9984501189557613, + 0.9991688635973937, + 0.9995644718792867, + 0.999859351780407, + 0.9999119173668268, + 0.9999706540066301, + 0.9999934895833333 + ], + "train_f1": [ + 0.8693153833524873, + 0.9451226739362333, + 0.9702174167609977, + 0.9834960483496048, + 0.990361912134701, + 0.9930405196412001, + 0.9952485803685247, + 0.9971021212472471, + 0.99822476072862, + 0.998533384793516, + 0.9993825254708243 + ], + "val_loss": [ + 0.14180575897917153, + 0.16796943203856549, + 0.1819291006385659, + 0.19614342823624611, + 0.1975471700677493, + 0.20672754806964805, + 0.2890045994245156, + 0.2595113074560686, + 0.30948789948679406, + 0.25677137968485414, + 0.2594324775731062 + ], + "val_acc": [ + 0.890625, + 0.8380208333333333, + 0.8984375, + 0.91875, + 0.9088541666666666, + 0.9182291666666667, + 0.9171875, + 0.9197916666666667, + 0.9177083333333333, + 0.9255208333333333, + 0.9182291666666667 + ], + "val_auc": [ + 0.9531922743055556, + 0.9427358217592593, + 0.9559722222222222, + 0.9617787905092593, + 0.958308738425926, + 0.9635467303240741, + 0.9556517650462962, + 0.9584063946759258, + 0.947162181712963, + 0.9610836226851852, + 0.9603349247685184 + ], + "val_f1": [ + 0.9253731343283582, + 0.8833020637898686, + 0.9316508937960042, + 0.9462809917355371, + 0.9390456287008011, + 0.945391304347826, + 0.9452667814113598, + 0.9468599033816425, + 0.9452908587257618, + 0.9500523925951799, + 0.945391304347826 + ] + }, + "test_metrics": { + "accuracy": 0.9158333333333334, + "auc_roc": 0.9642475694444445, + "f1": 0.943732590529248, + "confusion_matrix": [ + [ + 1008, + 192 + ], + [ + 212, + 3388 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9591666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 49, + 1151 + ] + ], + "n": 1200, + "detection_rate": 0.9591666666666666, + "pairwise_auc": 0.970038888888889, + "pairwise_f1": 0.9052300432559969 + }, + "insight": { + "accuracy": 0.8908333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 131, + 1069 + ] + ], + "n": 1200, + "detection_rate": 0.8908333333333334, + "pairwise_auc": 0.9423524305555556, + "pairwise_f1": 0.8687525396180414 + }, + "text2img": { + "accuracy": 0.9733333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 32, + 1168 + ] + ], + "n": 1200, + "detection_rate": 0.9733333333333334, + "pairwise_auc": 0.980351388888889, + "pairwise_f1": 0.9125 + }, + "wiki": { + "accuracy": 0.84, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1008, + 192 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.16000000000000003 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8995833333333333, + "auc_roc": 0.970038888888889, + "f1": 0.9052300432559969, + "confusion_matrix": [ + [ + 1008, + 192 + ], + [ + 49, + 1151 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8654166666666666, + "auc_roc": 0.9423524305555556, + "f1": 0.8687525396180414, + "confusion_matrix": [ + [ + 1008, + 192 + ], + [ + 131, + 1069 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9066666666666666, + "auc_roc": 0.980351388888889, + "f1": 0.9125, + "confusion_matrix": [ + [ + 1008, + 192 + ], + [ + 32, + 1168 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19928204308919334, + 0.10327634860382036, + 0.05515722113368274, + 0.035082230208489044, + 0.025447382428898924, + 0.017044535008877842, + 0.012585550207478702, + 0.007383903622266425, + 0.0065240937882107474, + 0.003589426765918486, + 0.0018262106446854886, + 0.0014432606080622126 + ], + "train_acc": [ + 0.8112268518518518, + 0.9170138888888889, + 0.960300925925926, + 0.9774305555555556, + 0.9856481481481482, + 0.9886574074074074, + 0.9927662037037037, + 0.9962962962962963, + 0.9962384259259259, + 0.9982060185185185, + 0.998900462962963, + 0.9992476851851851 + ], + "train_auc": [ + 0.8996633605538409, + 0.974294213677412, + 0.9924166684527892, + 0.9968912001314587, + 0.9983060145890489, + 0.999246265217764, + 0.9995093878600823, + 0.9997427001171696, + 0.999896556712963, + 0.9998388649548468, + 0.9999534893689986, + 0.9999838087991542 + ], + "train_f1": [ + 0.8659709096885528, + 0.9433067130544793, + 0.9732470166133687, + 0.9848837209302326, + 0.9904128653162209, + 0.9924195544554455, + 0.9951698288187333, + 0.9975281940367681, + 0.9974918001157631, + 0.9988035045737003, + 0.9992667206977731, + 0.9994982825826869 + ], + "val_loss": [ + 0.12679128739982842, + 0.2473485317993133, + 0.14678561501204967, + 0.1554646889999276, + 0.15785321037013395, + 0.2617460626413049, + 0.17361461978216539, + 0.20700737896950158, + 0.23985129223819968, + 0.22690686947868624, + 0.26887194706073386, + 0.2289405580937455 + ], + "val_acc": [ + 0.8609375, + 0.8739583333333333, + 0.8817708333333333, + 0.9171875, + 0.9109375, + 0.9067708333333333, + 0.9208333333333333, + 0.9223958333333333, + 0.9166666666666666, + 0.9234375, + 0.9161458333333333, + 0.9255208333333333 + ], + "val_auc": [ + 0.9617122395833333, + 0.9316594328703705, + 0.9648596643518519, + 0.9680758101851852, + 0.9689894386574074, + 0.9582892071759259, + 0.9722294560185186, + 0.9673271122685185, + 0.9659454571759258, + 0.9686675347222222, + 0.9653291377314814, + 0.9667903645833332 + ], + "val_f1": [ + 0.9003359462486002, + 0.9181326116373477, + 0.9162670601254149, + 0.9444250262146103, + 0.9396825396825397, + 0.9383396486393386, + 0.9467414155571128, + 0.9483535528596188, + 0.9446366782006921, + 0.9490467937608319, + 0.9446545204537642, + 0.9502954466458116 + ] + }, + "test_metrics": { + "accuracy": 0.919375, + "auc_roc": 0.9684114583333332, + "f1": 0.9460777483628257, + "confusion_matrix": [ + [ + 1018, + 182 + ], + [ + 205, + 3395 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9616666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 46, + 1154 + ] + ], + "n": 1200, + "detection_rate": 0.9616666666666667, + "pairwise_auc": 0.9754291666666666, + "pairwise_f1": 0.9100946372239748 + }, + "insight": { + "accuracy": 0.9066666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 112, + 1088 + ] + ], + "n": 1200, + "detection_rate": 0.9066666666666666, + "pairwise_auc": 0.9537607638888889, + "pairwise_f1": 0.8809716599190284 + }, + "text2img": { + "accuracy": 0.9608333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 47, + 1153 + ] + ], + "n": 1200, + "detection_rate": 0.9608333333333333, + "pairwise_auc": 0.9760444444444445, + "pairwise_f1": 0.9096646942800789 + }, + "wiki": { + "accuracy": 0.8483333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1018, + 182 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.15166666666666662 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.905, + "auc_roc": 0.9754291666666666, + "f1": 0.9100946372239748, + "confusion_matrix": [ + [ + 1018, + 182 + ], + [ + 46, + 1154 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8775, + "auc_roc": 0.9537607638888889, + "f1": 0.8809716599190284, + "confusion_matrix": [ + [ + 1018, + 182 + ], + [ + 112, + 1088 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9045833333333333, + "auc_roc": 0.9760444444444445, + "f1": 0.9096646942800789, + "confusion_matrix": [ + [ + 1018, + 182 + ], + [ + 47, + 1153 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.1946798356939797, + 0.09956369337246374, + 0.06275961574197195, + 0.037021170328349666, + 0.025708021630567533, + 0.021358401671346252, + 0.01339459377892059, + 0.007324876425533418 + ], + "train_acc": [ + 0.8107060185185185, + 0.9216435185185186, + 0.9565393518518519, + 0.9758680555555556, + 0.9834490740740741, + 0.9872685185185185, + 0.9924768518518519, + 0.9950231481481482 + ], + "train_auc": [ + 0.9044035154463876, + 0.9761751793267033, + 0.9906578825160036, + 0.9966033129000914, + 0.9982404281692958, + 0.9988388774577046, + 0.9993337941529492, + 0.9998615219192957 + ], + "train_f1": [ + 0.8651967854935092, + 0.9465540380516302, + 0.9707383596337424, + 0.9838340763713898, + 0.9889336016096579, + 0.991492652745553, + 0.9949779803754926, + 0.9966774841600989 + ], + "val_loss": [ + 0.1366950663117071, + 0.14299837291861575, + 0.14879056867212057, + 0.19593870592943857, + 0.18934340609703212, + 0.24294537242579584, + 0.22201119096826005, + 0.3828427222149609 + ], + "val_acc": [ + 0.8875, + 0.875, + 0.9161458333333333, + 0.9119791666666667, + 0.8869791666666667, + 0.9078125, + 0.9213541666666667, + 0.9151041666666667 + ], + "val_auc": [ + 0.9550998263888889, + 0.9605808738425925, + 0.9650802951388888, + 0.9626374421296297, + 0.9632081886574075, + 0.9592136863425926, + 0.9635951967592593, + 0.9525991030092593 + ], + "val_f1": [ + 0.9228020014295926, + 0.9121522693997072, + 0.9438437391001047, + 0.9414617249740215, + 0.9210622044379775, + 0.9391124871001032, + 0.9476240027748872, + 0.9451732256979481 + ] + }, + "test_metrics": { + "accuracy": 0.915, + "auc_roc": 0.9685586805555556, + "f1": 0.9427127211457456, + "confusion_matrix": [ + [ + 1035, + 165 + ], + [ + 243, + 3357 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9425, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 69, + 1131 + ] + ], + "n": 1200, + "detection_rate": 0.9425, + "pairwise_auc": 0.9706340277777777, + "pairwise_f1": 0.90625 + }, + "insight": { + "accuracy": 0.8816666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 142, + 1058 + ] + ], + "n": 1200, + "detection_rate": 0.8816666666666667, + "pairwise_auc": 0.9508114583333334, + "pairwise_f1": 0.8732975650020636 + }, + "text2img": { + "accuracy": 0.9733333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 32, + 1168 + ] + ], + "n": 1200, + "detection_rate": 0.9733333333333334, + "pairwise_auc": 0.9842305555555555, + "pairwise_f1": 0.9222266087643111 + }, + "wiki": { + "accuracy": 0.8625, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1035, + 165 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.13749999999999996 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9025, + "auc_roc": 0.9706340277777777, + "f1": 0.90625, + "confusion_matrix": [ + [ + 1035, + 165 + ], + [ + 69, + 1131 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8720833333333333, + "auc_roc": 0.9508114583333334, + "f1": 0.8732975650020636, + "confusion_matrix": [ + [ + 1035, + 165 + ], + [ + 142, + 1058 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9179166666666667, + "auc_roc": 0.9842305555555555, + "f1": 0.9222266087643111, + "confusion_matrix": [ + [ + 1035, + 165 + ], + [ + 32, + 1168 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19603962583674325, + 0.10220566353829647, + 0.0641474825364572, + 0.038550203464735575, + 0.02755131951842621, + 0.016848129556375272, + 0.014130299409757923, + 0.008122794064207415, + 0.004999924033172984, + 0.0031690383527381563, + 0.001879638221826267, + 0.0010143579254055888 + ], + "train_acc": [ + 0.8101851851851852, + 0.9185185185185185, + 0.955324074074074, + 0.9729745370370371, + 0.9843171296296296, + 0.9900462962962963, + 0.9925925925925926, + 0.9954282407407408, + 0.9974537037037037, + 0.9979745370370371, + 0.9987847222222223, + 0.9991898148148148 + ], + "train_auc": [ + 0.9027364826245999, + 0.9749666263002972, + 0.9903671732110196, + 0.9963543452789209, + 0.9976980274062643, + 0.9991864658350481, + 0.9992046842849793, + 0.999598667195359, + 0.9999064339706218, + 0.9999665727166209, + 0.9999912301383174, + 0.9999978745141747 + ], + "train_f1": [ + 0.8645523620746614, + 0.944391785150079, + 0.9699259836384885, + 0.9818605554476597, + 0.9895095420586072, + 0.9933559950556242, + 0.9950552422158696, + 0.9969499247133315, + 0.9983010271063403, + 0.9986488051577037, + 0.9991894082680357, + 0.9994597097869713 + ], + "val_loss": [ + 0.16352217756211757, + 0.13888414253791173, + 0.15331077139514188, + 0.17716894903569483, + 0.17669353319021563, + 0.22967796006262992, + 0.17005534635294073, + 0.22448178055371196, + 0.3090995565579002, + 0.25438256103980167, + 0.21965038712469323, + 0.234941724254107 + ], + "val_acc": [ + 0.884375, + 0.9046875, + 0.8979166666666667, + 0.9229166666666667, + 0.8979166666666667, + 0.9265625, + 0.91875, + 0.9239583333333333, + 0.9244791666666666, + 0.9223958333333333, + 0.9317708333333333, + 0.9328125 + ], + "val_auc": [ + 0.9458608217592593, + 0.9611993634259258, + 0.9629803240740741, + 0.9689583333333334, + 0.9654275173611111, + 0.9623119212962963, + 0.9730598958333334, + 0.965689380787037, + 0.9596838831018518, + 0.9648972800925926, + 0.9698365162037036, + 0.9703088831018518 + ], + "val_f1": [ + 0.9217207334273625, + 0.9353128313891834, + 0.93, + 0.949070887818307, + 0.9291907514450867, + 0.9513960703205792, + 0.9451090781140042, + 0.9492350486787204, + 0.9508641138597086, + 0.9487796493640426, + 0.9546869595295745, + 0.95562435500516 + ] + }, + "test_metrics": { + "accuracy": 0.9208333333333333, + "auc_roc": 0.9711554398148149, + "f1": 0.9463579898362507, + "confusion_matrix": [ + [ + 1068, + 132 + ], + [ + 248, + 3352 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9441666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 67, + 1133 + ] + ], + "n": 1200, + "detection_rate": 0.9441666666666667, + "pairwise_auc": 0.9743878472222222, + "pairwise_f1": 0.9192697768762678 + }, + "insight": { + "accuracy": 0.9041666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 115, + 1085 + ] + ], + "n": 1200, + "detection_rate": 0.9041666666666667, + "pairwise_auc": 0.9620392361111111, + "pairwise_f1": 0.8978071990070335 + }, + "text2img": { + "accuracy": 0.945, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 66, + 1134 + ] + ], + "n": 1200, + "detection_rate": 0.945, + "pairwise_auc": 0.9770392361111111, + "pairwise_f1": 0.9197080291970803 + }, + "wiki": { + "accuracy": 0.89, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1068, + 132 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.10999999999999999 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9170833333333334, + "auc_roc": 0.9743878472222222, + "f1": 0.9192697768762678, + "confusion_matrix": [ + [ + 1068, + 132 + ], + [ + 67, + 1133 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8970833333333333, + "auc_roc": 0.9620392361111111, + "f1": 0.8978071990070335, + "confusion_matrix": [ + [ + 1068, + 132 + ], + [ + 115, + 1085 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9175, + "auc_roc": 0.9770392361111111, + "f1": 0.9197080291970803, + "confusion_matrix": [ + [ + 1068, + 132 + ], + [ + 66, + 1134 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.9165000000000001, + "std": 0.0037133626943782138, + "ci_95": 0.0032549059126186527, + "values": [ + 0.9114583333333334, + 0.9158333333333334, + 0.919375, + 0.915, + 0.9208333333333333 + ] + }, + "auc_roc": { + "mean": 0.9677912500000001, + "std": 0.0025645991388585803, + "ci_95": 0.0022479702597338817, + "values": [ + 0.9665831018518519, + 0.9642475694444445, + 0.9684114583333332, + 0.9685586805555556, + 0.9711554398148149 + ] + }, + "f1": { + "mean": 0.9440245163483796, + "std": 0.002191568512519297, + "ci_95": 0.0019209944991657685, + "values": [ + 0.941241531867828, + 0.943732590529248, + 0.9460777483628257, + 0.9427127211457456, + 0.9463579898362507 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9526666666666668, + "std": 0.008787617550976023, + "ci_95": 0.007702686400067023, + "values": [ + 0.9558333333333333, + 0.9591666666666666, + 0.9616666666666667, + 0.9425, + 0.9441666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9526666666666668, + "std": 0.008787617550976023, + "ci_95": 0.007702686400067023, + "values": [ + 0.9558333333333333, + 0.9591666666666666, + 0.9616666666666667, + 0.9425, + 0.9441666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.9722329861111112, + "std": 0.002482793167311941, + "ci_95": 0.0021762641641031514, + "values": [ + 0.9706750000000001, + 0.970038888888889, + 0.9754291666666666, + 0.9706340277777777, + 0.9743878472222222 + ] + }, + "pairwise_f1": { + "mean": 0.9062744815333597, + "std": 0.010398509349436593, + "ci_95": 0.009114695317842876, + "values": [ + 0.890527950310559, + 0.9052300432559969, + 0.9100946372239748, + 0.90625, + 0.9192697768762678 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8971666666666668, + "std": 0.010583661412237646, + "ci_95": 0.009276988256493082, + "values": [ + 0.9025, + 0.8908333333333334, + 0.9066666666666666, + 0.8816666666666667, + 0.9041666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8971666666666668, + "std": 0.010583661412237646, + "ci_95": 0.009276988256493082, + "values": [ + 0.9025, + 0.8908333333333334, + 0.9066666666666666, + 0.8816666666666667, + 0.9041666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.95116, + "std": 0.007441648458215133, + "ci_95": 0.006522892472352412, + "values": [ + 0.946836111111111, + 0.9423524305555556, + 0.9537607638888889, + 0.9508114583333334, + 0.9620392361111111 + ] + }, + "pairwise_f1": { + "mean": 0.8766180220085964, + "std": 0.013665161061577277, + "ci_95": 0.011978041790410631, + "values": [ + 0.8622611464968153, + 0.8687525396180414, + 0.8809716599190284, + 0.8732975650020636, + 0.8978071990070335 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9661666666666667, + "std": 0.013483529046787299, + "ci_95": 0.011818834309882061, + "values": [ + 0.9783333333333334, + 0.9733333333333334, + 0.9608333333333333, + 0.9733333333333334, + 0.945 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9661666666666667, + "std": 0.013483529046787299, + "ci_95": 0.011818834309882061, + "values": [ + 0.9783333333333334, + 0.9733333333333334, + 0.9608333333333333, + 0.9733333333333334, + 0.945 + ] + }, + "pairwise_auc": { + "mean": 0.979980763888889, + "std": 0.0034438815764337577, + "ci_95": 0.0030186952980550876, + "values": [ + 0.9822381944444445, + 0.980351388888889, + 0.9760444444444445, + 0.9842305555555555, + 0.9770392361111111 + ] + }, + "pairwise_f1": { + "mean": 0.9132270888839453, + "std": 0.008084726628118197, + "ci_95": 0.00708657534143018, + "values": [ + 0.9020361121782559, + 0.9125, + 0.9096646942800789, + 0.9222266087643111, + 0.9197080291970803 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.85, + "std": 0.029691702918866445, + "ci_95": 0.026025925108971202, + "values": [ + 0.8091666666666667, + 0.84, + 0.8483333333333334, + 0.8625, + 0.89 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.14999999999999997, + "std": 0.029691702918866445, + "ci_95": 0.026025925108971202, + "values": [ + 0.1908333333333333, + 0.16000000000000003, + 0.15166666666666662, + 0.13749999999999996, + 0.10999999999999999 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.9013333333333333, + "std": 0.012456173168353141, + "ci_95": 0.010918317178027048, + "values": [ + 0.8825, + 0.8995833333333333, + 0.905, + 0.9025, + 0.9170833333333334 + ] + }, + "auc_roc": { + "mean": 0.9722329861111112, + "std": 0.002482793167311941, + "ci_95": 0.0021762641641031514, + "values": [ + 0.9706750000000001, + 0.970038888888889, + 0.9754291666666666, + 0.9706340277777777, + 0.9743878472222222 + ] + }, + "f1": { + "mean": 0.9062744815333597, + "std": 0.010398509349436593, + "ci_95": 0.009114695317842876, + "values": [ + 0.890527950310559, + 0.9052300432559969, + 0.9100946372239748, + 0.90625, + 0.9192697768762678 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8735833333333334, + "std": 0.015423984749589056, + "ci_95": 0.01351971872652847, + "values": [ + 0.8558333333333333, + 0.8654166666666666, + 0.8775, + 0.8720833333333333, + 0.8970833333333333 + ] + }, + "auc_roc": { + "mean": 0.95116, + "std": 0.007441648458215133, + "ci_95": 0.006522892472352412, + "values": [ + 0.946836111111111, + 0.9423524305555556, + 0.9537607638888889, + 0.9508114583333334, + 0.9620392361111111 + ] + }, + "f1": { + "mean": 0.8766180220085964, + "std": 0.013665161061577277, + "ci_95": 0.011978041790410631, + "values": [ + 0.8622611464968153, + 0.8687525396180414, + 0.8809716599190284, + 0.8732975650020636, + 0.8978071990070335 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.9080833333333335, + "std": 0.010063168542318618, + "ci_95": 0.008820756140427464, + "values": [ + 0.89375, + 0.9066666666666666, + 0.9045833333333333, + 0.9179166666666667, + 0.9175 + ] + }, + "auc_roc": { + "mean": 0.979980763888889, + "std": 0.0034438815764337577, + "ci_95": 0.0030186952980550876, + "values": [ + 0.9822381944444445, + 0.980351388888889, + 0.9760444444444445, + 0.9842305555555555, + 0.9770392361111111 + ] + }, + "f1": { + "mean": 0.9132270888839453, + "std": 0.008084726628118197, + "ci_95": 0.00708657534143018, + "values": [ + 0.9020361121782559, + 0.9125, + 0.9096646942800789, + 0.9222266087643111, + 0.9197080291970803 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2a_t2_real_norm", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "normalization": "real_norm" + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2a_t3_holdout_inpainting.json b/classifier/outputs/logs/p2a_t3_holdout_inpainting.json new file mode 100644 index 0000000..4f80d15 --- /dev/null +++ b/classifier/outputs/logs/p2a_t3_holdout_inpainting.json @@ -0,0 +1,1749 @@ +{ + "run_name": "p2a_t3_holdout_inpainting", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.25781509587426243, + 0.1214831151712088, + 0.06513902116796852, + 0.03926007057724084, + 0.025138242795336187, + 0.017598178024032945, + 0.01244043387198838, + 0.0073325110016154256, + 0.005493426220417871, + 0.0031292137056453486, + 0.0015878780434124768, + 0.0015818994841785394 + ], + "train_acc": [ + 0.8106481481481481, + 0.9251543209876543, + 0.9647376543209877, + 0.9807098765432098, + 0.9880401234567902, + 0.9916666666666667, + 0.9955246913580247, + 0.995679012345679, + 0.9978395061728395, + 0.9984567901234568, + 0.9991512345679012, + 0.9991512345679012 + ], + "train_auc": [ + 0.9047242182141633, + 0.9798570387517147, + 0.9940130020790466, + 0.9977021176268862, + 0.9989674693501371, + 0.9994267216435185, + 0.9995074856395748, + 0.9999243130572703, + 0.999924862289952, + 0.9999896449545609, + 0.9999976959019203, + 0.9999973476080248 + ], + "train_f1": [ + 0.8494663231505337, + 0.943075117370892, + 0.9734194148781481, + 0.9854803113021257, + 0.991018137567364, + 0.9937413073713491, + 0.9966407969419668, + 0.9967570071809126, + 0.9983790668056038, + 0.9988427265362805, + 0.9993633153904035, + 0.9993633890850165 + ], + "val_loss": [ + 0.21581555555264156, + 0.23866513189342287, + 0.26745867414606944, + 0.3027038105660015, + 0.31426937364869645, + 0.3139076674977938, + 0.2954678490757942, + 0.3496660818656286, + 0.37485053605503504, + 0.3242863669991493, + 0.3113009749187364, + 0.31638388302591114 + ], + "val_acc": [ + 0.8305555555555556, + 0.8493055555555555, + 0.8888888888888888, + 0.8930555555555556, + 0.8986111111111111, + 0.8972222222222223, + 0.90625, + 0.9, + 0.90625, + 0.9111111111111111, + 0.9152777777777777, + 0.9118055555555555 + ], + "val_auc": [ + 0.9484396701388889, + 0.9524045138888888, + 0.9560709635416667, + 0.9544943576388889, + 0.9586425781249999, + 0.9616232638888889, + 0.9637868923611111, + 0.9537217881944444, + 0.9569476996527779, + 0.962119140625, + 0.9628407118055555, + 0.9597330729166667 + ], + "val_f1": [ + 0.8592848904267589, + 0.8764940239043825, + 0.9174406604747162, + 0.9217479674796748, + 0.9257375381485249, + 0.9236326109391125, + 0.9308755760368663, + 0.9246073298429319, + 0.9312977099236641, + 0.9338159255429163, + 0.936656282450675, + 0.9332632685233841 + ] + }, + "test_metrics": { + "accuracy": 0.900625, + "auc_roc": 0.9572935185185185, + "f1": 0.9330150259794973, + "confusion_matrix": [ + [ + 1001, + 199 + ], + [ + 278, + 3322 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.8733333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 152, + 1048 + ] + ], + "n": 1200, + "detection_rate": 0.8733333333333333, + "pairwise_auc": 0.9345378472222223, + "pairwise_f1": 0.856559051900286 + }, + "insight": { + "accuracy": 0.91, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 108, + 1092 + ] + ], + "n": 1200, + "detection_rate": 0.91, + "pairwise_auc": 0.9505375, + "pairwise_f1": 0.876756322761943 + }, + "text2img": { + "accuracy": 0.985, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 18, + 1182 + ] + ], + "n": 1200, + "detection_rate": 0.985, + "pairwise_auc": 0.9868052083333332, + "pairwise_f1": 0.9159240604416893 + }, + "wiki": { + "accuracy": 0.8341666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1001, + 199 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.1658333333333334 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.85375, + "auc_roc": 0.9345378472222223, + "f1": 0.856559051900286, + "confusion_matrix": [ + [ + 1001, + 199 + ], + [ + 152, + 1048 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8720833333333333, + "auc_roc": 0.9505375, + "f1": 0.876756322761943, + "confusion_matrix": [ + [ + 1001, + 199 + ], + [ + 108, + 1092 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9095833333333333, + "auc_roc": 0.9868052083333332, + "f1": 0.9159240604416893, + "confusion_matrix": [ + [ + 1001, + 199 + ], + [ + 18, + 1182 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2616682181019842, + 0.1188754732042183, + 0.05891478061331091, + 0.03700997960505099, + 0.024348501453857387, + 0.01709066363749307, + 0.00995518854475831, + 0.005565247291024993, + 0.005189225596301567, + 0.0025071319830708995, + 0.0019205705988025767, + 0.0013182760330240808, + 0.0010205088183845223, + 0.0007541327060697554 + ], + "train_acc": [ + 0.8100308641975309, + 0.9273919753086419, + 0.9678240740740741, + 0.9814043209876543, + 0.9880401234567902, + 0.9915895061728395, + 0.995216049382716, + 0.9966049382716049, + 0.9975308641975309, + 0.9986111111111111, + 0.9989197530864198, + 0.9992283950617284, + 0.999537037037037, + 0.9996913580246913 + ], + "train_auc": [ + 0.9022979091649521, + 0.9805314295053156, + 0.9949215401020233, + 0.9977866726680386, + 0.9991186289008918, + 0.9995314643347051, + 0.9998513052983539, + 0.9999579100222908, + 0.9999439112868655, + 0.999992417909808, + 0.9999957267018178, + 0.9999982853223595, + 0.9999991962448559, + 0.9999996249142662 + ], + "train_f1": [ + 0.8506249241596894, + 0.9445655375552283, + 0.9757233509926064, + 0.986015203388847, + 0.9910233393177738, + 0.9936895733225265, + 0.9964120370370371, + 0.9974507531865585, + 0.9981472904122278, + 0.9989578508568782, + 0.9991895334028019, + 0.9994211623060894, + 0.9996526973836536, + 0.9997684917235791 + ], + "val_loss": [ + 0.21600718332661523, + 0.2957409880227513, + 0.22589426951275932, + 0.24126873562733334, + 0.3698683015174336, + 0.2967011847429805, + 0.2744103494617674, + 0.357445028424263, + 0.2727206458648046, + 0.3384700695673625, + 0.31687013970481026, + 0.29177635510762534, + 0.27786254816585115, + 0.295849294298225 + ], + "val_acc": [ + 0.8847222222222222, + 0.8715277777777778, + 0.8944444444444445, + 0.89375, + 0.8791666666666667, + 0.8958333333333334, + 0.9020833333333333, + 0.9159722222222222, + 0.9145833333333333, + 0.9125, + 0.9208333333333333, + 0.9125, + 0.9145833333333333, + 0.9152777777777777 + ], + "val_auc": [ + 0.9456239149305555, + 0.9418055555555556, + 0.9617426215277778, + 0.9591026475694444, + 0.9505240885416667, + 0.9608409288194444, + 0.9619173177083334, + 0.9560286458333334, + 0.9675824652777779, + 0.9592643229166666, + 0.9615299479166666, + 0.965993923611111, + 0.9661056857638891, + 0.9657486979166667 + ], + "val_f1": [ + 0.9136316337148803, + 0.9065184436584134, + 0.9202518363064008, + 0.9176976869284562, + 0.9127382146439318, + 0.9214659685863874, + 0.9246392303580973, + 0.9377892030848329, + 0.9363683393688567, + 0.9357798165137615, + 0.9412371134020618, + 0.9346473029045643, + 0.9355007865757735, + 0.9367875647668393 + ] + }, + "test_metrics": { + "accuracy": 0.9027083333333333, + "auc_roc": 0.957677662037037, + "f1": 0.9335609617299758, + "confusion_matrix": [ + [ + 1052, + 148 + ], + [ + 319, + 3281 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.8625, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 165, + 1035 + ] + ], + "n": 1200, + "detection_rate": 0.8625, + "pairwise_auc": 0.9378954861111112, + "pairwise_f1": 0.8686529584557281 + }, + "insight": { + "accuracy": 0.9058333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 113, + 1087 + ] + ], + "n": 1200, + "detection_rate": 0.9058333333333334, + "pairwise_auc": 0.9527034722222223, + "pairwise_f1": 0.8928131416837782 + }, + "text2img": { + "accuracy": 0.9658333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 41, + 1159 + ] + ], + "n": 1200, + "detection_rate": 0.9658333333333333, + "pairwise_auc": 0.9824340277777778, + "pairwise_f1": 0.9246110889509374 + }, + "wiki": { + "accuracy": 0.8766666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1052, + 148 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.1233333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8695833333333334, + "auc_roc": 0.9378954861111112, + "f1": 0.8686529584557281, + "confusion_matrix": [ + [ + 1052, + 148 + ], + [ + 165, + 1035 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.89125, + "auc_roc": 0.9527034722222223, + "f1": 0.8928131416837782, + "confusion_matrix": [ + [ + 1052, + 148 + ], + [ + 113, + 1087 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.92125, + "auc_roc": 0.9824340277777778, + "f1": 0.9246110889509374, + "confusion_matrix": [ + [ + 1052, + 148 + ], + [ + 41, + 1159 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.258498662011123, + 0.1176260883655445, + 0.0654098799302714, + 0.037252595336866326, + 0.02377791873374839, + 0.018387907146789523, + 0.009700597554641447 + ], + "train_acc": [ + 0.8074845679012346, + 0.9280864197530864, + 0.963966049382716, + 0.9800154320987654, + 0.9895833333333334, + 0.990895061728395, + 0.9951388888888889 + ], + "train_auc": [ + 0.9035050154320987, + 0.9809749014060357, + 0.9938377432698903, + 0.9978404438871742, + 0.9990140335648149, + 0.9993165670010289, + 0.9998635625643004 + ], + "train_f1": [ + 0.8467350574359604, + 0.9451183606171241, + 0.9727712669815171, + 0.9849479862846516, + 0.9921779940900399, + 0.9931586270871985, + 0.9963505763772229 + ], + "val_loss": [ + 0.2280357327726152, + 0.17328365511364408, + 0.2710409538613425, + 0.3359412006205983, + 0.2570320785045624, + 0.3309959472881423, + 0.3894395540157954 + ], + "val_acc": [ + 0.8555555555555555, + 0.8951388888888889, + 0.8868055555555555, + 0.8986111111111111, + 0.8979166666666667, + 0.9020833333333333, + 0.9013888888888889 + ], + "val_auc": [ + 0.9406705729166666, + 0.9621983506944444, + 0.954404296875, + 0.9527202690972223, + 0.95984375, + 0.9560427517361111, + 0.9547222222222222 + ], + "val_f1": [ + 0.8881720430107527, + 0.9185983827493261, + 0.9155002592016589, + 0.9262626262626262, + 0.9226722777485534, + 0.926829268292683, + 0.9279187817258884 + ] + }, + "test_metrics": { + "accuracy": 0.8583333333333333, + "auc_roc": 0.9431243055555555, + "f1": 0.900497512437811, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 523, + 3077 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7933333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 248, + 952 + ] + ], + "n": 1200, + "detection_rate": 0.7933333333333333, + "pairwise_auc": 0.9168444444444446, + "pairwise_f1": 0.8245993936769164 + }, + "insight": { + "accuracy": 0.8291666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 205, + 995 + ] + ], + "n": 1200, + "detection_rate": 0.8291666666666667, + "pairwise_auc": 0.9380381944444445, + "pairwise_f1": 0.8460884353741497 + }, + "text2img": { + "accuracy": 0.9416666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 70, + 1130 + ] + ], + "n": 1200, + "detection_rate": 0.9416666666666667, + "pairwise_auc": 0.9744902777777779, + "pairwise_f1": 0.9087253719340571 + }, + "wiki": { + "accuracy": 0.8691666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.13083333333333336 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.83125, + "auc_roc": 0.9168444444444446, + "f1": 0.8245993936769164, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 248, + 952 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8491666666666666, + "auc_roc": 0.9380381944444445, + "f1": 0.8460884353741497, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 205, + 995 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9054166666666666, + "auc_roc": 0.9744902777777779, + "f1": 0.9087253719340571, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 70, + 1130 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.255315103115123, + 0.11947704857146299, + 0.06434044011086686, + 0.04031944110052499, + 0.029753698322298235, + 0.020548909077982066, + 0.011400221812558205, + 0.007843143238736687, + 0.00462050657789169, + 0.002652634877925681 + ], + "train_acc": [ + 0.8118827160493827, + 0.9268518518518518, + 0.9647376543209877, + 0.9800925925925926, + 0.9862654320987654, + 0.9891203703703704, + 0.9951388888888889, + 0.9955246913580247, + 0.9979938271604938, + 0.9987654320987654 + ], + "train_auc": [ + 0.9068986705889918, + 0.9805671028377916, + 0.994131850673011, + 0.9971608957047325, + 0.99846478748714, + 0.9992882078403635, + 0.9997807222007886, + 0.9999103545096021, + 0.9999719623413923, + 0.9999916543424211 + ], + "train_f1": [ + 0.8505028207014962, + 0.9442484121383204, + 0.9733574301871393, + 0.9850191615375682, + 0.9896883327540262, + 0.991822768659746, + 0.9963522668056279, + 0.9966415749855241, + 0.9984953703703704, + 0.999073752460345 + ], + "val_loss": [ + 0.21595470474825965, + 0.18476848436726465, + 0.25963548537757664, + 0.221693916618824, + 0.2073221100701226, + 0.27290250145726735, + 0.32775740358564587, + 0.282966121368938, + 0.2715285387304094, + 0.2812750697135925 + ], + "val_acc": [ + 0.8715277777777778, + 0.8826388888888889, + 0.9006944444444445, + 0.9138888888888889, + 0.9138888888888889, + 0.9041666666666667, + 0.9083333333333333, + 0.9201388888888888, + 0.9215277777777777, + 0.9256944444444445 + ], + "val_auc": [ + 0.9434147135416666, + 0.9583582899305555, + 0.9613606770833333, + 0.9649305555555556, + 0.9694596354166667, + 0.9649370659722223, + 0.9651095920138889, + 0.9651519097222222, + 0.9672309027777778, + 0.9664073350694444 + ], + "val_f1": [ + 0.9034950443401147, + 0.9082020640956002, + 0.9278870398386283, + 0.9351464435146444, + 0.9352818371607515, + 0.9277486910994764, + 0.9325842696629213, + 0.9403836184551582, + 0.9416623644811565, + 0.9442998438313378 + ] + }, + "test_metrics": { + "accuracy": 0.8816666666666667, + "auc_roc": 0.9552260416666667, + "f1": 0.9182263173049237, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 411, + 3189 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.825, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 210, + 990 + ] + ], + "n": 1200, + "detection_rate": 0.825, + "pairwise_auc": 0.9323631944444444, + "pairwise_f1": 0.8436301661695782 + }, + "insight": { + "accuracy": 0.885, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 138, + 1062 + ] + ], + "n": 1200, + "detection_rate": 0.885, + "pairwise_auc": 0.9560093749999999, + "pairwise_f1": 0.8780487804878049 + }, + "text2img": { + "accuracy": 0.9475, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 63, + 1137 + ] + ], + "n": 1200, + "detection_rate": 0.9475, + "pairwise_auc": 0.9773055555555555, + "pairwise_f1": 0.9117882919005613 + }, + "wiki": { + "accuracy": 0.8691666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.13083333333333336 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8470833333333333, + "auc_roc": 0.9323631944444444, + "f1": 0.8436301661695782, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 210, + 990 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8770833333333333, + "auc_roc": 0.9560093749999999, + "f1": 0.8780487804878049, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 138, + 1062 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9083333333333333, + "auc_roc": 0.9773055555555555, + "f1": 0.9117882919005613, + "confusion_matrix": [ + [ + 1043, + 157 + ], + [ + 63, + 1137 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2661115872271267, + 0.12260635155310601, + 0.061083041942666894, + 0.03744319187057561, + 0.02693782269812396, + 0.01860594513781518, + 0.009805613456572044, + 0.006071538512768723, + 0.0048567407564585234, + 0.003096203221365961, + 0.0021833804519926082, + 0.0014407251077298295, + 0.0010350479342288268 + ], + "train_acc": [ + 0.8071759259259259, + 0.9243827160493827, + 0.965895061728395, + 0.9814814814814815, + 0.9866512345679013, + 0.9910493827160494, + 0.9952932098765432, + 0.9970679012345679, + 0.9976851851851852, + 0.9983796296296297, + 0.9986882716049382, + 0.9992283950617284, + 0.999537037037037 + ], + "train_auc": [ + 0.8994636541923867, + 0.9794328301826132, + 0.9947475539051784, + 0.9977694187242798, + 0.9987779304912552, + 0.9993228764789094, + 0.9997982708547668, + 0.9999342260373799, + 0.9999342796210562, + 0.9999849697788066, + 0.9999944005058301, + 0.9999982585305213, + 0.9999990890775035 + ], + "train_f1": [ + 0.8469406504562994, + 0.9423800564440263, + 0.9742153774355384, + 0.9860675722744688, + 0.9899669431073479, + 0.9932738026208976, + 0.9964672496669948, + 0.997800925925926, + 0.9982638888888888, + 0.998783807262408, + 0.999015577045573, + 0.9994210282538212, + 0.9996527375853687 + ], + "val_loss": [ + 0.2525812061296569, + 0.19011370407210457, + 0.26990462260113823, + 0.30721286684274673, + 0.31146920836634107, + 0.3030291530821059, + 0.34974887354506384, + 0.24959067867861853, + 0.27127854774395627, + 0.28494989193148085, + 0.287113440864616, + 0.2855055838823318, + 0.30339643855889636 + ], + "val_acc": [ + 0.8618055555555556, + 0.8784722222222222, + 0.8854166666666666, + 0.8951388888888889, + 0.8701388888888889, + 0.8944444444444445, + 0.9048611111111111, + 0.9076388888888889, + 0.9076388888888889, + 0.9131944444444444, + 0.9208333333333333, + 0.91875, + 0.9194444444444444 + ], + "val_auc": [ + 0.9342643229166666, + 0.9584114583333334, + 0.952666015625, + 0.9542567274305556, + 0.9524283854166666, + 0.9643315972222222, + 0.9595887586805556, + 0.9706347656250001, + 0.9679123263888888, + 0.9656477864583334, + 0.9677159288194445, + 0.9676204427083335, + 0.9678841145833333 + ], + "val_f1": [ + 0.8955380577427822, + 0.9035812672176309, + 0.9150797735460627, + 0.9230769230769231, + 0.8968560397131826, + 0.9219712525667351, + 0.930066360387953, + 0.9295177530471648, + 0.9286097691894794, + 0.9346576058546785, + 0.940809968847352, + 0.9390307451797811, + 0.9403292181069959 + ] + }, + "test_metrics": { + "accuracy": 0.8752083333333334, + "auc_roc": 0.9549771990740741, + "f1": 0.9121314361155933, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 491, + 3109 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7658333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 281, + 919 + ] + ], + "n": 1200, + "detection_rate": 0.7658333333333334, + "pairwise_auc": 0.9262888888888887, + "pairwise_f1": 0.8253255500673552 + }, + "insight": { + "accuracy": 0.8608333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 167, + 1033 + ] + ], + "n": 1200, + "detection_rate": 0.8608333333333333, + "pairwise_auc": 0.9517732638888888, + "pairwise_f1": 0.8825288338316959 + }, + "text2img": { + "accuracy": 0.9641666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 43, + 1157 + ] + ], + "n": 1200, + "detection_rate": 0.9641666666666666, + "pairwise_auc": 0.9868694444444445, + "pairwise_f1": 0.9387423935091278 + }, + "wiki": { + "accuracy": 0.91, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.08999999999999997 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8379166666666666, + "auc_roc": 0.9262888888888887, + "f1": 0.8253255500673552, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 281, + 919 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8854166666666666, + "auc_roc": 0.9517732638888888, + "f1": 0.8825288338316959, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 167, + 1033 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9370833333333334, + "auc_roc": 0.9868694444444445, + "f1": 0.9387423935091278, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 43, + 1157 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.8837083333333334, + "std": 0.018489744717605564, + "ci_95": 0.016206975821472007, + "values": [ + 0.900625, + 0.9027083333333333, + 0.8583333333333333, + 0.8816666666666667, + 0.8752083333333334 + ] + }, + "auc_roc": { + "mean": 0.9536597453703702, + "std": 0.0060110818924978176, + "ci_95": 0.005268945590137735, + "values": [ + 0.9572935185185185, + 0.957677662037037, + 0.9431243055555555, + 0.9552260416666667, + 0.9549771990740741 + ] + }, + "f1": { + "mean": 0.9194862507135602, + "std": 0.014118942609677002, + "ci_95": 0.012375798854697175, + "values": [ + 0.9330150259794973, + 0.9335609617299758, + 0.900497512437811, + 0.9182263173049237, + 0.9121314361155933 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.8240000000000001, + "std": 0.0453895178794987, + "ci_95": 0.039785666598244465, + "values": [ + 0.8733333333333333, + 0.8625, + 0.7933333333333333, + 0.825, + 0.7658333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8240000000000001, + "std": 0.0453895178794987, + "ci_95": 0.039785666598244465, + "values": [ + 0.8733333333333333, + 0.8625, + 0.7933333333333333, + 0.825, + 0.7658333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.9295859722222222, + "std": 0.008284010788496325, + "ci_95": 0.007261255609772411, + "values": [ + 0.9345378472222223, + 0.9378954861111112, + 0.9168444444444446, + 0.9323631944444444, + 0.9262888888888887 + ] + }, + "pairwise_f1": { + "mean": 0.8437534240539728, + "std": 0.01930317026954635, + "ci_95": 0.01691997475435176, + "values": [ + 0.856559051900286, + 0.8686529584557281, + 0.8245993936769164, + 0.8436301661695782, + 0.8253255500673552 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8781666666666667, + "std": 0.033641285812392945, + "ci_95": 0.029487887155387863, + "values": [ + 0.91, + 0.9058333333333334, + 0.8291666666666667, + 0.885, + 0.8608333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8781666666666667, + "std": 0.033641285812392945, + "ci_95": 0.029487887155387863, + "values": [ + 0.91, + 0.9058333333333334, + 0.8291666666666667, + 0.885, + 0.8608333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.9498123611111111, + "std": 0.006887733143102866, + "ci_95": 0.006037364291391668, + "values": [ + 0.9505375, + 0.9527034722222223, + 0.9380381944444445, + 0.9560093749999999, + 0.9517732638888888 + ] + }, + "pairwise_f1": { + "mean": 0.8752471028278743, + "std": 0.017478160355823974, + "ci_95": 0.01532028303348761, + "values": [ + 0.876756322761943, + 0.8928131416837782, + 0.8460884353741497, + 0.8780487804878049, + 0.8825288338316959 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9608333333333332, + "std": 0.017068082623553364, + "ci_95": 0.014960834052804522, + "values": [ + 0.985, + 0.9658333333333333, + 0.9416666666666667, + 0.9475, + 0.9641666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9608333333333332, + "std": 0.017068082623553364, + "ci_95": 0.014960834052804522, + "values": [ + 0.985, + 0.9658333333333333, + 0.9416666666666667, + 0.9475, + 0.9641666666666666 + ] + }, + "pairwise_auc": { + "mean": 0.9815809027777778, + "std": 0.005579995979494273, + "ci_95": 0.004891082127135289, + "values": [ + 0.9868052083333332, + 0.9824340277777778, + 0.9744902777777779, + 0.9773055555555555, + 0.9868694444444445 + ] + }, + "pairwise_f1": { + "mean": 0.9199582413472746, + "std": 0.012079873840998933, + "ci_95": 0.010588476274693278, + "values": [ + 0.9159240604416893, + 0.9246110889509374, + 0.9087253719340571, + 0.9117882919005613, + 0.9387423935091278 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.8718333333333333, + "std": 0.02698636486985401, + "ci_95": 0.023654591755325485, + "values": [ + 0.8341666666666666, + 0.8766666666666667, + 0.8691666666666666, + 0.8691666666666666, + 0.91 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.12816666666666668, + "std": 0.02698636486985401, + "ci_95": 0.023654591755325485, + "values": [ + 0.1658333333333334, + 0.1233333333333333, + 0.13083333333333336, + 0.13083333333333336, + 0.08999999999999997 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.8479166666666668, + "std": 0.014848774734337893, + "ci_95": 0.013015524917915876, + "values": [ + 0.85375, + 0.8695833333333334, + 0.83125, + 0.8470833333333333, + 0.8379166666666666 + ] + }, + "auc_roc": { + "mean": 0.9295859722222222, + "std": 0.008284010788496325, + "ci_95": 0.007261255609772411, + "values": [ + 0.9345378472222223, + 0.9378954861111112, + 0.9168444444444446, + 0.9323631944444444, + 0.9262888888888887 + ] + }, + "f1": { + "mean": 0.8437534240539728, + "std": 0.01930317026954635, + "ci_95": 0.01691997475435176, + "values": [ + 0.856559051900286, + 0.8686529584557281, + 0.8245993936769164, + 0.8436301661695782, + 0.8253255500673552 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.875, + "std": 0.016223268611609828, + "ci_95": 0.014220321921656898, + "values": [ + 0.8720833333333333, + 0.89125, + 0.8491666666666666, + 0.8770833333333333, + 0.8854166666666666 + ] + }, + "auc_roc": { + "mean": 0.9498123611111111, + "std": 0.006887733143102866, + "ci_95": 0.006037364291391668, + "values": [ + 0.9505375, + 0.9527034722222223, + 0.9380381944444445, + 0.9560093749999999, + 0.9517732638888888 + ] + }, + "f1": { + "mean": 0.8752471028278743, + "std": 0.017478160355823974, + "ci_95": 0.01532028303348761, + "values": [ + 0.876756322761943, + 0.8928131416837782, + 0.8460884353741497, + 0.8780487804878049, + 0.8825288338316959 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.9163333333333334, + "std": 0.013071650835130058, + "ci_95": 0.011457807139433139, + "values": [ + 0.9095833333333333, + 0.92125, + 0.9054166666666666, + 0.9083333333333333, + 0.9370833333333334 + ] + }, + "auc_roc": { + "mean": 0.9815809027777778, + "std": 0.005579995979494273, + "ci_95": 0.004891082127135289, + "values": [ + 0.9868052083333332, + 0.9824340277777778, + 0.9744902777777779, + 0.9773055555555555, + 0.9868694444444445 + ] + }, + "f1": { + "mean": 0.9199582413472746, + "std": 0.012079873840998933, + "ci_95": 0.010588476274693278, + "values": [ + 0.9159240604416893, + 0.9246110889509374, + 0.9087253719340571, + 0.9117882919005613, + 0.9387423935091278 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2a_t3_holdout_inpainting", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "normalization": "imagenet", + "train_sources": [ + "wiki", + "text2img", + "insight" + ], + "eval_sources": [ + "wiki", + "text2img", + "insight", + "inpainting" + ] + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2a_t3_holdout_insight.json b/classifier/outputs/logs/p2a_t3_holdout_insight.json new file mode 100644 index 0000000..c3a874c --- /dev/null +++ b/classifier/outputs/logs/p2a_t3_holdout_insight.json @@ -0,0 +1,1861 @@ +{ + "run_name": "p2a_t3_holdout_insight", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.221900131498222, + 0.09055700592673671, + 0.050355928940382315, + 0.03795463042879868, + 0.020803921496248403, + 0.015402267911616228, + 0.009459777763774503, + 0.007435759974616281, + 0.003887806649730425, + 0.003259596692067943, + 0.0015012719458815057, + 0.0016088033901993455, + 0.0008241117115995835, + 0.00107023416148877, + 0.0009690378105084546 + ], + "train_acc": [ + 0.850462962962963, + 0.9469907407407407, + 0.9729938271604939, + 0.9815586419753086, + 0.989429012345679, + 0.9932098765432099, + 0.9949845679012346, + 0.9965277777777778, + 0.9983796296296297, + 0.9984567901234568, + 0.9993055555555556, + 0.9996141975308642, + 0.999537037037037, + 0.9993827160493827, + 0.9994598765432099 + ], + "train_auc": [ + 0.9319749603480796, + 0.9887071464549039, + 0.9963385202331962, + 0.9976732762131345, + 0.9992920390732167, + 0.9994847259730797, + 0.9998105281207132, + 0.9999104482810356, + 0.9999664834104938, + 0.9999829469950274, + 0.9999967849794238, + 0.999874239111797, + 0.9999995713305898, + 0.9999989015346366, + 0.9999914801954732 + ], + "train_f1": [ + 0.8827444336882866, + 0.9597704514844527, + 0.9796322160148976, + 0.9861264294421548, + 0.9920611925595411, + 0.9949062282935864, + 0.9962334125282494, + 0.9973938727051601, + 0.998783807262408, + 0.9988425925925926, + 0.9994791968057404, + 0.9997106983741249, + 0.9996527375853687, + 0.9995369298448715, + 0.9995947901591896 + ], + "val_loss": [ + 0.16446967588530648, + 0.1840791697303454, + 0.19977748062875536, + 0.20411120371686087, + 0.32943161345190475, + 0.17416284415456984, + 0.1789202243089676, + 0.1872969721754392, + 0.14698216517766316, + 0.1865630802181032, + 0.18526823139852946, + 0.16406844076183108, + 0.17544087188111412, + 0.15160242136981752, + 0.16450001017914878 + ], + "val_acc": [ + 0.9041666666666667, + 0.8722222222222222, + 0.9305555555555556, + 0.9256944444444445, + 0.9138888888888889, + 0.9270833333333334, + 0.9506944444444444, + 0.9486111111111111, + 0.9416666666666667, + 0.9465277777777777, + 0.9444444444444444, + 0.9465277777777777, + 0.9465277777777777, + 0.9493055555555555, + 0.9479166666666666 + ], + "val_auc": [ + 0.9687575954861111, + 0.9815342881944444, + 0.9820431857638888, + 0.9803776041666667, + 0.9755219184027778, + 0.9816297743055555, + 0.9842654079861111, + 0.9816026475694444, + 0.9862662760416667, + 0.9854665798611111, + 0.9851030815972222, + 0.9868326822916668, + 0.9869791666666667, + 0.9873166232638889, + 0.9870941840277778 + ], + "val_f1": [ + 0.9278996865203761, + 0.8946162657502864, + 0.9494438827098078, + 0.9456576942610462, + 0.9383084577114428, + 0.944, + 0.9634209170530654, + 0.9618949536560247, + 0.9556962025316456, + 0.9602888086642599, + 0.9588053553038105, + 0.960082944530845, + 0.9603297269448737, + 0.9620779220779221, + 0.9612403100775194 + ] + }, + "test_metrics": { + "accuracy": 0.8404166666666667, + "auc_roc": 0.9406697916666668, + "f1": 0.8836926814454904, + "confusion_matrix": [ + [ + 1124, + 76 + ], + [ + 690, + 2910 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.94, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 72, + 1128 + ] + ], + "n": 1200, + "detection_rate": 0.94, + "pairwise_auc": 0.9856604166666667, + "pairwise_f1": 0.9384359400998337 + }, + "insight": { + "accuracy": 0.5008333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 599, + 601 + ] + ], + "n": 1200, + "detection_rate": 0.5008333333333334, + "pairwise_auc": 0.8413416666666667, + "pairwise_f1": 0.6403835908364411 + }, + "text2img": { + "accuracy": 0.9841666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 19, + 1181 + ] + ], + "n": 1200, + "detection_rate": 0.9841666666666666, + "pairwise_auc": 0.9950072916666667, + "pairwise_f1": 0.9613349613349613 + }, + "wiki": { + "accuracy": 0.9366666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1124, + 76 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.06333333333333335 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9383333333333334, + "auc_roc": 0.9856604166666667, + "f1": 0.9384359400998337, + "confusion_matrix": [ + [ + 1124, + 76 + ], + [ + 72, + 1128 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.71875, + "auc_roc": 0.8413416666666667, + "f1": 0.6403835908364411, + "confusion_matrix": [ + [ + 1124, + 76 + ], + [ + 599, + 601 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9604166666666667, + "auc_roc": 0.9950072916666667, + "f1": 0.9613349613349613, + "confusion_matrix": [ + [ + 1124, + 76 + ], + [ + 19, + 1181 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.22215608109110668, + 0.09765733032728786, + 0.04893466802687603, + 0.034209873639823245, + 0.02029450899701462, + 0.013510319246945724, + 0.012697673750346753, + 0.005298483473454303, + 0.0029991691134646204, + 0.0025113817023664545, + 0.0013458789497830658, + 0.0008007793183738321, + 0.00046533000990795116, + 0.0003107348998751318, + 0.0005777599946174441 + ], + "train_acc": [ + 0.8510030864197531, + 0.9439043209876543, + 0.9750771604938272, + 0.983641975308642, + 0.988966049382716, + 0.9929012345679012, + 0.9929012345679012, + 0.9975308641975309, + 0.998533950617284, + 0.9987654320987654, + 0.9993055555555556, + 0.9996141975308642, + 0.9997685185185186, + 0.9999228395061729, + 0.9998456790123457 + ], + "train_auc": [ + 0.9321080220121742, + 0.9867878729423869, + 0.9965020308213306, + 0.9980933052554869, + 0.9993586033950617, + 0.9997250621570646, + 0.9996713847093621, + 0.9999531142832648, + 0.9999883589463305, + 0.9999929403506516, + 0.9999983121141975, + 0.9999995981224281, + 0.9999998928326476, + 1.0, + 0.9999997320816186 + ], + "train_f1": [ + 0.8844473699958112, + 0.9573881952992205, + 0.9812307513510372, + 0.9876987350586051, + 0.9917048552700273, + 0.9946716089424302, + 0.9946734599351552, + 0.998147933788633, + 0.9989002720379696, + 0.9990739668943165, + 0.999479015918958, + 0.9997105643994211, + 0.9998263587428373, + 0.9999421329784156, + 0.9998842592592593 + ], + "val_loss": [ + 0.18173423045211368, + 0.15682731734381783, + 0.12995633516046737, + 0.3167274175418748, + 0.3881762362188763, + 0.16476303007867601, + 0.14555738866329193, + 0.15716644260618423, + 0.1588555042942365, + 0.16912019583914015, + 0.1316970290409194, + 0.14958005249500275, + 0.14952652553717297, + 0.13752244727479088, + 0.13813267449537914 + ], + "val_acc": [ + 0.8597222222222223, + 0.9326388888888889, + 0.9409722222222222, + 0.9125, + 0.8347222222222223, + 0.9375, + 0.9451388888888889, + 0.9527777777777777, + 0.9520833333333333, + 0.9423611111111111, + 0.9618055555555556, + 0.95625, + 0.9569444444444445, + 0.9583333333333334, + 0.95625 + ], + "val_auc": [ + 0.9741330295138888, + 0.9806532118055556, + 0.9829047309027779, + 0.977763671875, + 0.9776833767361112, + 0.9849565972222223, + 0.9864333767361111, + 0.9848068576388889, + 0.9858322482638889, + 0.9852788628472222, + 0.9905544704861111, + 0.9883355034722222, + 0.9883203125, + 0.9901714409722222, + 0.9904275173611111 + ], + "val_f1": [ + 0.8837744533947065, + 0.9502819067145054, + 0.9555206698063841, + 0.9378084896347483, + 0.8596698113207547, + 0.9519230769230769, + 0.9580010632642212, + 0.9645833333333333, + 0.9637414608512874, + 0.9560148383677796, + 0.9713392391870767, + 0.967067433350758, + 0.9677083333333333, + 0.9686192468619247, + 0.967032967032967 + ] + }, + "test_metrics": { + "accuracy": 0.845, + "auc_roc": 0.9390527777777778, + "f1": 0.8875113395827033, + "confusion_matrix": [ + [ + 1121, + 79 + ], + [ + 665, + 2935 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9491666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 61, + 1139 + ] + ], + "n": 1200, + "detection_rate": 0.9491666666666667, + "pairwise_auc": 0.9847184027777777, + "pairwise_f1": 0.9421009098428453 + }, + "insight": { + "accuracy": 0.515, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 582, + 618 + ] + ], + "n": 1200, + "detection_rate": 0.515, + "pairwise_auc": 0.839582638888889, + "pairwise_f1": 0.6515550869794412 + }, + "text2img": { + "accuracy": 0.9816666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 22, + 1178 + ] + ], + "n": 1200, + "detection_rate": 0.9816666666666667, + "pairwise_auc": 0.9928572916666668, + "pairwise_f1": 0.9588929588929589 + }, + "wiki": { + "accuracy": 0.9341666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1121, + 79 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.0658333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9416666666666667, + "auc_roc": 0.9847184027777777, + "f1": 0.9421009098428453, + "confusion_matrix": [ + [ + 1121, + 79 + ], + [ + 61, + 1139 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.7245833333333334, + "auc_roc": 0.839582638888889, + "f1": 0.6515550869794412, + "confusion_matrix": [ + [ + 1121, + 79 + ], + [ + 582, + 618 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9579166666666666, + "auc_roc": 0.9928572916666668, + "f1": 0.9588929588929589, + "confusion_matrix": [ + [ + 1121, + 79 + ], + [ + 22, + 1178 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2215230604380737, + 0.1014425605489516, + 0.05135551941535079, + 0.034227528057584285, + 0.020638354608726567, + 0.015166697286658296, + 0.009714811315392086, + 0.008263903152126457, + 0.0028723553302696042, + 0.002424952944478898, + 0.0017287055309174724 + ], + "train_acc": [ + 0.8475308641975309, + 0.9423611111111111, + 0.9719907407407408, + 0.9820216049382716, + 0.9898919753086419, + 0.9926697530864198, + 0.9952932098765432, + 0.9969907407407408, + 0.9986882716049382, + 0.9992283950617284, + 0.9989969135802469 + ], + "train_auc": [ + 0.9321050079303842, + 0.9859669308341907, + 0.9961787203146433, + 0.9982715781464335, + 0.9992778393990055, + 0.9995493612825788, + 0.9998193024477025, + 0.999740547839506, + 0.9999873676483196, + 0.99998922968107, + 0.9999947086119685 + ], + "train_f1": [ + 0.8802569385529027, + 0.9563081242323215, + 0.9788621673557328, + 0.9864888373441577, + 0.9924150309767819, + 0.9944969008862886, + 0.9964672496669948, + 0.9977416179280792, + 0.9990161467677527, + 0.9994212962962963, + 0.9992473803045215 + ], + "val_loss": [ + 0.13050816671715842, + 0.13505457623137368, + 0.31902390718460083, + 0.19341963032881418, + 0.20096096379889383, + 0.10236648867527644, + 0.14019040779934988, + 0.16024226794640223, + 0.1222893249657419, + 0.1388013172480795, + 0.13770783642927806 + ], + "val_acc": [ + 0.9090277777777778, + 0.9208333333333333, + 0.8993055555555556, + 0.9361111111111111, + 0.9354166666666667, + 0.9541666666666667, + 0.9444444444444444, + 0.95, + 0.9597222222222223, + 0.9541666666666667, + 0.95625 + ], + "val_auc": [ + 0.9779264322916666, + 0.9777419704861111, + 0.9720996093750001, + 0.9851204427083333, + 0.9850217013888889, + 0.99107421875, + 0.9885828993055557, + 0.9856336805555554, + 0.989360894097222, + 0.9886002604166667, + 0.9902560763888888 + ], + "val_f1": [ + 0.9289972899728998, + 0.9389067524115756, + 0.9286768322675848, + 0.9535822401614531, + 0.9528158295281582, + 0.9649309245483528, + 0.9574468085106383, + 0.9617834394904459, + 0.9695697796432319, + 0.9655172413793104, + 0.9672727272727273 + ] + }, + "test_metrics": { + "accuracy": 0.8120833333333334, + "auc_roc": 0.9362050925925925, + "f1": 0.8595015576323988, + "confusion_matrix": [ + [ + 1139, + 61 + ], + [ + 841, + 2759 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9083333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 110, + 1090 + ] + ], + "n": 1200, + "detection_rate": 0.9083333333333333, + "pairwise_auc": 0.98161875, + "pairwise_f1": 0.9272649936197362 + }, + "insight": { + "accuracy": 0.4241666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 691, + 509 + ] + ], + "n": 1200, + "detection_rate": 0.4241666666666667, + "pairwise_auc": 0.8343604166666666, + "pairwise_f1": 0.5751412429378531 + }, + "text2img": { + "accuracy": 0.9666666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 40, + 1160 + ] + ], + "n": 1200, + "detection_rate": 0.9666666666666667, + "pairwise_auc": 0.9926361111111112, + "pairwise_f1": 0.9582817017761256 + }, + "wiki": { + "accuracy": 0.9491666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1139, + 61 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.050833333333333286 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.92875, + "auc_roc": 0.98161875, + "f1": 0.9272649936197362, + "confusion_matrix": [ + [ + 1139, + 61 + ], + [ + 110, + 1090 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6866666666666666, + "auc_roc": 0.8343604166666666, + "f1": 0.5751412429378531, + "confusion_matrix": [ + [ + 1139, + 61 + ], + [ + 691, + 509 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9579166666666666, + "auc_roc": 0.9926361111111112, + "f1": 0.9582817017761256, + "confusion_matrix": [ + [ + 1139, + 61 + ], + [ + 40, + 1160 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.21201637929972308, + 0.09177248130357007, + 0.053863701856419165, + 0.031763664008273435, + 0.02140559677399672, + 0.014252267104974533, + 0.012688780943426537, + 0.005660201898093492, + 0.003774158633060313, + 0.0032219876190145353, + 0.0011941452978000463, + 0.0011382195415872384, + 0.0005081066835749498, + 0.0003702830817799645, + 0.0010320760736625362 + ], + "train_acc": [ + 0.8590277777777777, + 0.9468364197530864, + 0.9702932098765432, + 0.9831018518518518, + 0.9880401234567902, + 0.9922839506172839, + 0.9934413580246914, + 0.9972993827160493, + 0.9982253086419753, + 0.9987654320987654, + 0.9996141975308642, + 0.999537037037037, + 0.9998456790123457, + 0.9999228395061729, + 0.999537037037037 + ], + "train_auc": [ + 0.9380557029106653, + 0.9884040369941701, + 0.9961057393475652, + 0.9984620681155694, + 0.9993163392704048, + 0.9997259060999657, + 0.9997672726980453, + 0.999959946201989, + 0.9999786469050069, + 0.9999815538194444, + 0.9999978700488683, + 0.999998191550926, + 0.999999812457133, + 0.9999999732081619, + 0.9999981111754115 + ], + "train_f1": [ + 0.8898136421204994, + 0.959610762647283, + 0.9776097702820588, + 0.9872947728723096, + 0.9910087592087709, + 0.994208270589598, + 0.9950756039626905, + 0.9979737162044809, + 0.9986684420772304, + 0.9990740740740741, + 0.9997106314022802, + 0.9996527777777777, + 0.9998842592592593, + 0.9999421262804561, + 0.9996527375853687 + ], + "val_loss": [ + 0.20351142072015338, + 0.3276245656940672, + 0.4981435305542416, + 0.2889870650238461, + 0.4523913953039381, + 0.18176159494453006, + 0.19582118193308512, + 0.1700224777062734, + 0.1923278099960751, + 0.20986380179723105, + 0.20547801156838735, + 0.16092905484967762, + 0.18698191046714782, + 0.16638216276963552, + 0.1700377999080552 + ], + "val_acc": [ + 0.8472222222222222, + 0.8888888888888888, + 0.7659722222222223, + 0.9166666666666666, + 0.8326388888888889, + 0.9305555555555556, + 0.9423611111111111, + 0.9333333333333333, + 0.9409722222222222, + 0.9458333333333333, + 0.9472222222222222, + 0.9430555555555555, + 0.9472222222222222, + 0.9493055555555555, + 0.9527777777777777 + ], + "val_auc": [ + 0.9713064236111112, + 0.9733832465277779, + 0.9734754774305554, + 0.9773155381944445, + 0.975588107638889, + 0.9780989583333333, + 0.9797287326388889, + 0.9842198350694443, + 0.9822504340277778, + 0.9792198350694445, + 0.9828049045138889, + 0.985851779513889, + 0.9816916232638889, + 0.9851215277777778, + 0.9836208767361112 + ], + "val_f1": [ + 0.8722415795586528, + 0.9222546161321672, + 0.7879169288860919, + 0.9405351833498513, + 0.857312018946122, + 0.9476987447698745, + 0.9570615623383342, + 0.9487179487179487, + 0.9558899844317592, + 0.9600818833162743, + 0.9609856262833676, + 0.9567053854276664, + 0.96086508753862, + 0.9619593538301199, + 0.9646936656282451 + ] + }, + "test_metrics": { + "accuracy": 0.830625, + "auc_roc": 0.9401379629629629, + "f1": 0.8747882334822116, + "confusion_matrix": [ + [ + 1147, + 53 + ], + [ + 760, + 2840 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9183333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 98, + 1102 + ] + ], + "n": 1200, + "detection_rate": 0.9183333333333333, + "pairwise_auc": 0.9851236111111111, + "pairwise_f1": 0.9358811040339703 + }, + "insight": { + "accuracy": 0.4775, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 627, + 573 + ] + ], + "n": 1200, + "detection_rate": 0.4775, + "pairwise_auc": 0.8418135416666667, + "pairwise_f1": 0.6276013143483024 + }, + "text2img": { + "accuracy": 0.9708333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 35, + 1165 + ] + ], + "n": 1200, + "detection_rate": 0.9708333333333333, + "pairwise_auc": 0.9934767361111112, + "pairwise_f1": 0.9636062861869313 + }, + "wiki": { + "accuracy": 0.9558333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1147, + 53 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.04416666666666669 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9370833333333334, + "auc_roc": 0.9851236111111111, + "f1": 0.9358811040339703, + "confusion_matrix": [ + [ + 1147, + 53 + ], + [ + 98, + 1102 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.7166666666666667, + "auc_roc": 0.8418135416666667, + "f1": 0.6276013143483024, + "confusion_matrix": [ + [ + 1147, + 53 + ], + [ + 627, + 573 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9633333333333334, + "auc_roc": 0.9934767361111112, + "f1": 0.9636062861869313, + "confusion_matrix": [ + [ + 1147, + 53 + ], + [ + 35, + 1165 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.22282849704777752, + 0.09587798762583623, + 0.05643976701762711, + 0.03555202028494225, + 0.02578896039641997, + 0.016046983599652002, + 0.006410472841400159, + 0.00932835557124898, + 0.004159164803175732, + 0.0026431137199796027, + 0.0018033815937020267, + 0.0008877858658257695, + 0.0007059923287226726, + 0.0004132254239917574 + ], + "train_acc": [ + 0.848533950617284, + 0.9434413580246913, + 0.970216049382716, + 0.9804012345679012, + 0.9868055555555556, + 0.9925154320987655, + 0.9974537037037037, + 0.9964506172839506, + 0.9983796296296297, + 0.9986882716049382, + 0.9993055555555556, + 0.9998456790123457, + 0.9996141975308642, + 0.9999228395061729 + ], + "train_auc": [ + 0.9312621500985939, + 0.9873819149734224, + 0.9956183020404664, + 0.9981948731138546, + 0.9990388963906037, + 0.9995547732338821, + 0.9999329266332304, + 0.9997190473894034, + 0.9999515335648148, + 0.9999911051097393, + 0.9999951774691358, + 0.9999993302040466, + 0.9999996249142662, + 1.0000000000000002 + ], + "train_f1": [ + 0.881839523264913, + 0.9571069108783428, + 0.9775581395348837, + 0.9852548473238129, + 0.9900806311270955, + 0.9943849493487699, + 0.9980899461712103, + 0.9973385790326313, + 0.9987843704775687, + 0.9990158050136051, + 0.9994790762285118, + 0.999884272653628, + 0.9997105979047288, + 0.9999421262804561 + ], + "val_loss": [ + 0.15792499168051613, + 0.542742556002405, + 0.1823839321732521, + 0.208244545923339, + 0.1911680911978086, + 0.15773669332265855, + 0.21006372471650442, + 0.19471206135219998, + 0.1356222172578176, + 0.16341885344849694, + 0.14434544261958865, + 0.15009288258022732, + 0.14751002705759472, + 0.15342201209730572 + ], + "val_acc": [ + 0.9013888888888889, + 0.85, + 0.9256944444444445, + 0.9298611111111111, + 0.9215277777777777, + 0.9430555555555555, + 0.9152777777777777, + 0.9256944444444445, + 0.9444444444444444, + 0.9381944444444444, + 0.9479166666666666, + 0.9465277777777777, + 0.9493055555555555, + 0.95 + ], + "val_auc": [ + 0.9667914496527777, + 0.9641232638888889, + 0.9759342447916668, + 0.9801529947916667, + 0.9824077690972222, + 0.9852582465277778, + 0.9809787326388888, + 0.9832606336805556, + 0.9891286892361111, + 0.9856673177083333, + 0.9885264756944444, + 0.9885514322916666, + 0.9879828559027778, + 0.9884016927083332 + ], + "val_f1": [ + 0.9234088457389428, + 0.8984962406015038, + 0.9440083725798012, + 0.9485481406011207, + 0.9391491653204093, + 0.9571577847439916, + 0.9343379978471474, + 0.9425040300913488, + 0.9576271186440678, + 0.9529846804014791, + 0.9607535321821036, + 0.9595375722543352, + 0.9618400418191323, + 0.9625779625779626 + ] + }, + "test_metrics": { + "accuracy": 0.8233333333333334, + "auc_roc": 0.9446028935185184, + "f1": 0.8682820751786269, + "confusion_matrix": [ + [ + 1157, + 43 + ], + [ + 805, + 2795 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9216666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 94, + 1106 + ] + ], + "n": 1200, + "detection_rate": 0.9216666666666666, + "pairwise_auc": 0.9862177083333332, + "pairwise_f1": 0.9416773094934014 + }, + "insight": { + "accuracy": 0.42833333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 686, + 514 + ] + ], + "n": 1200, + "detection_rate": 0.42833333333333334, + "pairwise_auc": 0.8534635416666667, + "pairwise_f1": 0.585088218554354 + }, + "text2img": { + "accuracy": 0.9791666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 25, + 1175 + ] + ], + "n": 1200, + "detection_rate": 0.9791666666666666, + "pairwise_auc": 0.9941274305555554, + "pairwise_f1": 0.9718775847808105 + }, + "wiki": { + "accuracy": 0.9641666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1157, + 43 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.035833333333333384 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9429166666666666, + "auc_roc": 0.9862177083333332, + "f1": 0.9416773094934014, + "confusion_matrix": [ + [ + 1157, + 43 + ], + [ + 94, + 1106 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.69625, + "auc_roc": 0.8534635416666667, + "f1": 0.585088218554354, + "confusion_matrix": [ + [ + 1157, + 43 + ], + [ + 686, + 514 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9716666666666667, + "auc_roc": 0.9941274305555554, + "f1": 0.9718775847808105, + "confusion_matrix": [ + [ + 1157, + 43 + ], + [ + 25, + 1175 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.8302916666666667, + "std": 0.013218581723383837, + "ci_95": 0.011586597741452051, + "values": [ + 0.8404166666666667, + 0.845, + 0.8120833333333334, + 0.830625, + 0.8233333333333334 + ] + }, + "auc_roc": { + "mean": 0.9401337037037036, + "std": 0.003035763996338149, + "ci_95": 0.0026609644665077413, + "values": [ + 0.9406697916666668, + 0.9390527777777778, + 0.9362050925925925, + 0.9401379629629629, + 0.9446028935185184 + ] + }, + "f1": { + "mean": 0.8747551774642861, + "std": 0.011370726815842292, + "ci_95": 0.009966881500610814, + "values": [ + 0.8836926814454904, + 0.8875113395827033, + 0.8595015576323988, + 0.8747882334822116, + 0.8682820751786269 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9275, + "std": 0.016666666666666677, + "ci_95": 0.014608977452998634, + "values": [ + 0.94, + 0.9491666666666667, + 0.9083333333333333, + 0.9183333333333333, + 0.9216666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9275, + "std": 0.016666666666666677, + "ci_95": 0.014608977452998634, + "values": [ + 0.94, + 0.9491666666666667, + 0.9083333333333333, + 0.9183333333333333, + 0.9216666666666666 + ] + }, + "pairwise_auc": { + "mean": 0.9846677777777778, + "std": 0.001795447994153717, + "ci_95": 0.0015737795558773955, + "values": [ + 0.9856604166666667, + 0.9847184027777777, + 0.98161875, + 0.9851236111111111, + 0.9862177083333332 + ] + }, + "pairwise_f1": { + "mean": 0.9370720514179574, + "std": 0.006040586678143011, + "ci_95": 0.005294807675032507, + "values": [ + 0.9384359400998337, + 0.9421009098428453, + 0.9272649936197362, + 0.9358811040339703, + 0.9416773094934014 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.4691666666666666, + "std": 0.041428485905768316, + "ci_95": 0.03631366899055444, + "values": [ + 0.5008333333333334, + 0.515, + 0.4241666666666667, + 0.4775, + 0.42833333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.4691666666666666, + "std": 0.041428485905768316, + "ci_95": 0.03631366899055444, + "values": [ + 0.5008333333333334, + 0.515, + 0.4241666666666667, + 0.4775, + 0.42833333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.8421123611111112, + "std": 0.007000441060805703, + "ci_95": 0.006136157137101378, + "values": [ + 0.8413416666666667, + 0.839582638888889, + 0.8343604166666666, + 0.8418135416666667, + 0.8534635416666667 + ] + }, + "pairwise_f1": { + "mean": 0.6159538907312784, + "std": 0.03397896348215447, + "ci_95": 0.029783874683223497, + "values": [ + 0.6403835908364411, + 0.6515550869794412, + 0.5751412429378531, + 0.6276013143483024, + 0.585088218554354 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9765, + "std": 0.007439571373555206, + "ci_95": 0.006521071827374521, + "values": [ + 0.9841666666666666, + 0.9816666666666667, + 0.9666666666666667, + 0.9708333333333333, + 0.9791666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9765, + "std": 0.007439571373555206, + "ci_95": 0.006521071827374521, + "values": [ + 0.9841666666666666, + 0.9816666666666667, + 0.9666666666666667, + 0.9708333333333333, + 0.9791666666666666 + ] + }, + "pairwise_auc": { + "mean": 0.9936209722222223, + "std": 0.0009685485403824886, + "ci_95": 0.0008489702273149503, + "values": [ + 0.9950072916666667, + 0.9928572916666668, + 0.9926361111111112, + 0.9934767361111112, + 0.9941274305555554 + ] + }, + "pairwise_f1": { + "mean": 0.9627986985943575, + "std": 0.005497248442216119, + "ci_95": 0.004818550712752026, + "values": [ + 0.9613349613349613, + 0.9588929588929589, + 0.9582817017761256, + 0.9636062861869313, + 0.9718775847808105 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.9480000000000001, + "std": 0.01268748289544548, + "ci_95": 0.011121069093292123, + "values": [ + 0.9366666666666666, + 0.9341666666666667, + 0.9491666666666667, + 0.9558333333333333, + 0.9641666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.052000000000000005, + "std": 0.01268748289544548, + "ci_95": 0.011121069093292123, + "values": [ + 0.06333333333333335, + 0.0658333333333333, + 0.050833333333333286, + 0.04416666666666669, + 0.035833333333333384 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.93775, + "std": 0.005563709294266832, + "ci_95": 0.004876806218098983, + "values": [ + 0.9383333333333334, + 0.9416666666666667, + 0.92875, + 0.9370833333333334, + 0.9429166666666666 + ] + }, + "auc_roc": { + "mean": 0.9846677777777778, + "std": 0.001795447994153717, + "ci_95": 0.0015737795558773955, + "values": [ + 0.9856604166666667, + 0.9847184027777777, + 0.98161875, + 0.9851236111111111, + 0.9862177083333332 + ] + }, + "f1": { + "mean": 0.9370720514179574, + "std": 0.006040586678143011, + "ci_95": 0.005294807675032507, + "values": [ + 0.9384359400998337, + 0.9421009098428453, + 0.9272649936197362, + 0.9358811040339703, + 0.9416773094934014 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.7085833333333333, + "std": 0.01625694296122259, + "ci_95": 0.014249838790511133, + "values": [ + 0.71875, + 0.7245833333333334, + 0.6866666666666666, + 0.7166666666666667, + 0.69625 + ] + }, + "auc_roc": { + "mean": 0.8421123611111112, + "std": 0.007000441060805703, + "ci_95": 0.006136157137101378, + "values": [ + 0.8413416666666667, + 0.839582638888889, + 0.8343604166666666, + 0.8418135416666667, + 0.8534635416666667 + ] + }, + "f1": { + "mean": 0.6159538907312784, + "std": 0.03397896348215447, + "ci_95": 0.029783874683223497, + "values": [ + 0.6403835908364411, + 0.6515550869794412, + 0.5751412429378531, + 0.6276013143483024, + 0.585088218554354 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.96225, + "std": 0.005717601964304825, + "ci_95": 0.005011699090904991, + "values": [ + 0.9604166666666667, + 0.9579166666666666, + 0.9579166666666666, + 0.9633333333333334, + 0.9716666666666667 + ] + }, + "auc_roc": { + "mean": 0.9936209722222223, + "std": 0.0009685485403824886, + "ci_95": 0.0008489702273149503, + "values": [ + 0.9950072916666667, + 0.9928572916666668, + 0.9926361111111112, + 0.9934767361111112, + 0.9941274305555554 + ] + }, + "f1": { + "mean": 0.9627986985943575, + "std": 0.005497248442216119, + "ci_95": 0.004818550712752026, + "values": [ + 0.9613349613349613, + 0.9588929588929589, + 0.9582817017761256, + 0.9636062861869313, + 0.9718775847808105 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2a_t3_holdout_insight", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "normalization": "imagenet", + "train_sources": [ + "wiki", + "text2img", + "inpainting" + ], + "eval_sources": [ + "wiki", + "text2img", + "inpainting", + "insight" + ] + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2a_t3_holdout_text2img.json b/classifier/outputs/logs/p2a_t3_holdout_text2img.json new file mode 100644 index 0000000..7174a16 --- /dev/null +++ b/classifier/outputs/logs/p2a_t3_holdout_text2img.json @@ -0,0 +1,1725 @@ +{ + "run_name": "p2a_t3_holdout_text2img", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.25936111174983745, + 0.13471967871358365, + 0.07450783130494348, + 0.04490341250727206, + 0.02855289393200907, + 0.020640523931985996, + 0.01199944190863149, + 0.008351340312605945, + 0.005777361075798794 + ], + "train_acc": [ + 0.8108796296296297, + 0.9157407407407407, + 0.9574845679012346, + 0.9771604938271605, + 0.9857253086419753, + 0.9901234567901235, + 0.9948302469135802, + 0.9959876543209877, + 0.9974537037037037 + ], + "train_auc": [ + 0.9029168006258572, + 0.975353531700103, + 0.9923160070516119, + 0.9969480747385115, + 0.9986825247556583, + 0.9992779197745199, + 0.999671156978738, + 0.9998737836505487, + 0.9999460010502401 + ], + "train_f1": [ + 0.8516433629925549, + 0.9358627980735346, + 0.9678773392409491, + 0.9828166724718449, + 0.9892772271489017, + 0.9925814303929523, + 0.9961197660276828, + 0.9969907407407408, + 0.9980901672550495 + ], + "val_loss": [ + 0.19289982169866562, + 0.17769995480775833, + 0.25123097085290486, + 0.22796319822470348, + 0.2647772492633926, + 0.30438034501340655, + 0.33032024684879513, + 0.3406846645805571, + 0.5371278474728266 + ], + "val_acc": [ + 0.8833333333333333, + 0.9111111111111111, + 0.8548611111111111, + 0.9, + 0.8909722222222223, + 0.8993055555555556, + 0.9041666666666667, + 0.9048611111111111, + 0.8861111111111111 + ], + "val_auc": [ + 0.9526030815972222, + 0.9636534288194445, + 0.9576529947916667, + 0.9643446180555555, + 0.9597515190972223, + 0.9584884982638889, + 0.9564192708333333, + 0.9562858072916667, + 0.9418001302083334 + ], + "val_f1": [ + 0.9132231404958677, + 0.9333333333333333, + 0.8822535211267606, + 0.9242105263157895, + 0.9149051490514906, + 0.924518479958355, + 0.9287190082644629, + 0.9281594126900892, + 0.9188118811881189 + ] + }, + "test_metrics": { + "accuracy": 0.7866666666666666, + "auc_roc": 0.894214699074074, + "f1": 0.8427035330261137, + "confusion_matrix": [ + [ + 1033, + 167 + ], + [ + 857, + 2743 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.935, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 78, + 1122 + ] + ], + "n": 1200, + "detection_rate": 0.935, + "pairwise_auc": 0.9670590277777777, + "pairwise_f1": 0.9015668943350743 + }, + "insight": { + "accuracy": 0.91, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 108, + 1092 + ] + ], + "n": 1200, + "detection_rate": 0.91, + "pairwise_auc": 0.9627385416666667, + "pairwise_f1": 0.8881659211061407 + }, + "text2img": { + "accuracy": 0.44083333333333335, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 671, + 529 + ] + ], + "n": 1200, + "detection_rate": 0.44083333333333335, + "pairwise_auc": 0.7528465277777776, + "pairwise_f1": 0.5580168776371308 + }, + "wiki": { + "accuracy": 0.8608333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1033, + 167 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.13916666666666666 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8979166666666667, + "auc_roc": 0.9670590277777777, + "f1": 0.9015668943350743, + "confusion_matrix": [ + [ + 1033, + 167 + ], + [ + 78, + 1122 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8854166666666666, + "auc_roc": 0.9627385416666667, + "f1": 0.8881659211061407, + "confusion_matrix": [ + [ + 1033, + 167 + ], + [ + 108, + 1092 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.6508333333333334, + "auc_roc": 0.7528465277777776, + "f1": 0.5580168776371308, + "confusion_matrix": [ + [ + 1033, + 167 + ], + [ + 671, + 529 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2583258535574984, + 0.13611753616068098, + 0.07462463752125148, + 0.0452532290659247, + 0.02999016788657352, + 0.019122767252064264, + 0.010874123830203811, + 0.006297654124936354, + 0.003966155544324544, + 0.0032256692653727776, + 0.0021348577125019893, + 0.0017390118794032281, + 0.0006646750832945223, + 0.0012557536934831727 + ], + "train_acc": [ + 0.8185956790123456, + 0.9189814814814815, + 0.957716049382716, + 0.9763117283950618, + 0.9847222222222223, + 0.9912037037037037, + 0.9947530864197531, + 0.9972222222222222, + 0.9982253086419753, + 0.9983024691358025, + 0.9992283950617284, + 0.9993055555555556, + 0.999537037037037, + 0.9994598765432099 + ], + "train_auc": [ + 0.9041926681455761, + 0.974750862697188, + 0.9923605886702676, + 0.9969843508873458, + 0.9986152504501029, + 0.9993941963520233, + 0.999769616983882, + 0.9998085187328533, + 0.9999604150591564, + 0.9999880642361111, + 0.9999943201303155, + 0.9999955793467077, + 0.9999997320816187, + 0.9999982719264403 + ], + "train_f1": [ + 0.8596334109499074, + 0.9381698268755152, + 0.9680130749474667, + 0.9821646430023819, + 0.9885217391304347, + 0.9933974284721417, + 0.9960634479564664, + 0.9979154603358426, + 0.9986684420772304, + 0.9987268518518518, + 0.9994212962962963, + 0.999479015918958, + 0.9996526571726294, + 0.9995948370666203 + ], + "val_loss": [ + 0.1964837079246839, + 0.20594968348741532, + 0.26393358078267837, + 0.309774564868874, + 0.30068880518277485, + 0.30512222581439546, + 0.33635254121488994, + 0.31080355329646003, + 0.3263464295201831, + 0.3539930683043268, + 0.37549908343288635, + 0.32930633922417957, + 0.3372042354610231, + 0.3207292776968744 + ], + "val_acc": [ + 0.8729166666666667, + 0.8895833333333333, + 0.8888888888888888, + 0.8930555555555556, + 0.8965277777777778, + 0.9013888888888889, + 0.8743055555555556, + 0.8888888888888888, + 0.9055555555555556, + 0.9069444444444444, + 0.9118055555555555, + 0.9118055555555555, + 0.9125, + 0.9048611111111111 + ], + "val_auc": [ + 0.9484678819444444, + 0.9535058593750001, + 0.9505425347222223, + 0.9517469618055554, + 0.9594184027777778, + 0.9555544704861112, + 0.9591558159722221, + 0.9600748697916668, + 0.9611469184027779, + 0.9561469184027778, + 0.9521885850694445, + 0.9576985677083334, + 0.9570789930555555, + 0.960546875 + ], + "val_f1": [ + 0.8999453253143794, + 0.9163598106259864, + 0.917610710607621, + 0.9211065573770492, + 0.9237851662404092, + 0.9260416666666667, + 0.9009304871373837, + 0.9137001078748651, + 0.9298245614035088, + 0.9307851239669421, + 0.9351047521716913, + 0.934299017071909, + 0.9351184346035015, + 0.9286829776158251 + ] + }, + "test_metrics": { + "accuracy": 0.8116666666666666, + "auc_roc": 0.910004398148148, + "f1": 0.8645489961042853, + "confusion_matrix": [ + [ + 1011, + 189 + ], + [ + 715, + 2885 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9575, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 51, + 1149 + ] + ], + "n": 1200, + "detection_rate": 0.9575, + "pairwise_auc": 0.9744965277777777, + "pairwise_f1": 0.9054373522458629 + }, + "insight": { + "accuracy": 0.9241666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 91, + 1109 + ] + ], + "n": 1200, + "detection_rate": 0.9241666666666667, + "pairwise_auc": 0.9579416666666667, + "pairwise_f1": 0.8879103282626101 + }, + "text2img": { + "accuracy": 0.5225, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 573, + 627 + ] + ], + "n": 1200, + "detection_rate": 0.5225, + "pairwise_auc": 0.7975750000000001, + "pairwise_f1": 0.6220238095238095 + }, + "wiki": { + "accuracy": 0.8425, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1011, + 189 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.15749999999999997 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9, + "auc_roc": 0.9744965277777777, + "f1": 0.9054373522458629, + "confusion_matrix": [ + [ + 1011, + 189 + ], + [ + 51, + 1149 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8833333333333333, + "auc_roc": 0.9579416666666667, + "f1": 0.8879103282626101, + "confusion_matrix": [ + [ + 1011, + 189 + ], + [ + 91, + 1109 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.6825, + "auc_roc": 0.7975750000000001, + "f1": 0.6220238095238095, + "confusion_matrix": [ + [ + 1011, + 189 + ], + [ + 573, + 627 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.26232991450362736, + 0.13593779744373427, + 0.07640465086578956, + 0.04920590366294354, + 0.027937425581384796, + 0.021169344729181225, + 0.010735498867188092, + 0.010091174576473912, + 0.0058576608773207375, + 0.00232485462857531, + 0.0011369886758849908, + 0.0009826418625941446, + 0.0010492766609505777 + ], + "train_acc": [ + 0.8170524691358024, + 0.917283950617284, + 0.9560185185185185, + 0.9757716049382716, + 0.9856481481481482, + 0.9905092592592593, + 0.9955246913580247, + 0.9959876543209877, + 0.9975308641975309, + 0.9991512345679012, + 0.9994598765432099, + 0.999537037037037, + 0.9996913580246913 + ], + "train_auc": [ + 0.9024782584233539, + 0.9747301927940671, + 0.99196964216821, + 0.9962500401877572, + 0.998842472029321, + 0.9991242149991427, + 0.9997307152349109, + 0.9998100190757888, + 0.999917682077332, + 0.9999897119341563, + 0.999999410579561, + 0.9999993034122086, + 0.9999988211591221 + ], + "train_f1": [ + 0.8566938652160774, + 0.9367476988435214, + 0.9667715984610004, + 0.9817653890824622, + 0.9892111368909513, + 0.9928724575534565, + 0.9966415749855241, + 0.9969886495251332, + 0.998147933788633, + 0.9993633153904035, + 0.9995947901591896, + 0.9996527375853687, + 0.9997684917235791 + ], + "val_loss": [ + 0.18554263181156583, + 0.15960322005881203, + 0.19698696964316897, + 0.24289254877302383, + 0.2467615756723616, + 0.27950658169057635, + 0.33689615113867655, + 0.25568568557500837, + 0.3190272374285592, + 0.3059672231475512, + 0.33635250528653465, + 0.3490314172373878, + 0.31362664004166924 + ], + "val_acc": [ + 0.86875, + 0.9083333333333333, + 0.9097222222222222, + 0.8798611111111111, + 0.9083333333333333, + 0.9041666666666667, + 0.9076388888888889, + 0.9083333333333333, + 0.9145833333333333, + 0.9138888888888889, + 0.9138888888888889, + 0.9125, + 0.9145833333333333 + ], + "val_auc": [ + 0.9542849392361111, + 0.967917751736111, + 0.9659657118055556, + 0.9679568142361112, + 0.9660481770833333, + 0.9649891493055556, + 0.9617361111111111, + 0.9687163628472222, + 0.9629090711805555, + 0.9656477864583333, + 0.9625965711805556, + 0.9636642795138888, + 0.9665223524305555 + ], + "val_f1": [ + 0.8963247394404827, + 0.9303062302006336, + 0.9322210636079249, + 0.9034059184812954, + 0.930890052356021, + 0.9275210084033614, + 0.9317598768599281, + 0.9291084854994629, + 0.9363024339720352, + 0.935349322210636, + 0.9357512953367876, + 0.9351184346035015, + 0.9360374414976599 + ] + }, + "test_metrics": { + "accuracy": 0.7704166666666666, + "auc_roc": 0.8972921296296297, + "f1": 0.8268384663733501, + "confusion_matrix": [ + [ + 1067, + 133 + ], + [ + 969, + 2631 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9291666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 85, + 1115 + ] + ], + "n": 1200, + "detection_rate": 0.9291666666666667, + "pairwise_auc": 0.9711552083333335, + "pairwise_f1": 0.9109477124183006 + }, + "insight": { + "accuracy": 0.8883333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 134, + 1066 + ] + ], + "n": 1200, + "detection_rate": 0.8883333333333333, + "pairwise_auc": 0.9588736111111111, + "pairwise_f1": 0.8887036265110463 + }, + "text2img": { + "accuracy": 0.375, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 750, + 450 + ] + ], + "n": 1200, + "detection_rate": 0.375, + "pairwise_auc": 0.7618475694444444, + "pairwise_f1": 0.5047672462142456 + }, + "wiki": { + "accuracy": 0.8891666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1067, + 133 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.11083333333333334 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9091666666666667, + "auc_roc": 0.9711552083333335, + "f1": 0.9109477124183006, + "confusion_matrix": [ + [ + 1067, + 133 + ], + [ + 85, + 1115 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.88875, + "auc_roc": 0.9588736111111111, + "f1": 0.8887036265110463, + "confusion_matrix": [ + [ + 1067, + 133 + ], + [ + 134, + 1066 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.6320833333333333, + "auc_roc": 0.7618475694444444, + "f1": 0.5047672462142456, + "confusion_matrix": [ + [ + 1067, + 133 + ], + [ + 750, + 450 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2620197121191908, + 0.13751073586352078, + 0.07180561355234664, + 0.044450114913037575, + 0.033807506547223226, + 0.017647162451154413, + 0.014270167727954686 + ], + "train_acc": [ + 0.8123456790123457, + 0.9162037037037037, + 0.9591049382716049, + 0.9753086419753086, + 0.9849537037037037, + 0.9907407407407407, + 0.9925925925925926 + ], + "train_auc": [ + 0.9014318496013374, + 0.9744243505658436, + 0.9928130358367626, + 0.9971750953789438, + 0.9982361727323388, + 0.9994681150334361, + 0.9994378402563444 + ], + "train_f1": [ + 0.8537230843257548, + 0.9361552028218695, + 0.969056515646894, + 0.9814277423099246, + 0.9886949968114094, + 0.9930450909933929, + 0.9944392956441149 + ], + "val_loss": [ + 0.19311593059036467, + 0.18973327411545649, + 0.22272994501723184, + 0.37533411615424683, + 0.2943312370114856, + 0.3369398739602831, + 0.37520144234100977 + ], + "val_acc": [ + 0.8729166666666667, + 0.8930555555555556, + 0.89375, + 0.8833333333333333, + 0.8729166666666667, + 0.9048611111111111, + 0.9041666666666667 + ], + "val_auc": [ + 0.9499533420138889, + 0.9603559027777778, + 0.9574663628472222, + 0.9462217881944445, + 0.959501953125, + 0.950947265625, + 0.9460405815972223 + ], + "val_f1": [ + 0.9036334913112164, + 0.91995841995842, + 0.9188328912466843, + 0.9154078549848943, + 0.8986149584487535, + 0.9296353364149974, + 0.9293032786885246 + ] + }, + "test_metrics": { + "accuracy": 0.791875, + "auc_roc": 0.8873210648148148, + "f1": 0.8480608365019011, + "confusion_matrix": [ + [ + 1013, + 187 + ], + [ + 812, + 2788 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9375, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 75, + 1125 + ] + ], + "n": 1200, + "detection_rate": 0.9375, + "pairwise_auc": 0.9665180555555555, + "pairwise_f1": 0.8957006369426752 + }, + "insight": { + "accuracy": 0.9025, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 117, + 1083 + ] + ], + "n": 1200, + "detection_rate": 0.9025, + "pairwise_auc": 0.9520479166666668, + "pairwise_f1": 0.8769230769230769 + }, + "text2img": { + "accuracy": 0.48333333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 620, + 580 + ] + ], + "n": 1200, + "detection_rate": 0.48333333333333334, + "pairwise_auc": 0.7433972222222223, + "pairwise_f1": 0.5897305541433655 + }, + "wiki": { + "accuracy": 0.8441666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1013, + 187 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.15583333333333338 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8908333333333334, + "auc_roc": 0.9665180555555555, + "f1": 0.8957006369426752, + "confusion_matrix": [ + [ + 1013, + 187 + ], + [ + 75, + 1125 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8733333333333333, + "auc_roc": 0.9520479166666668, + "f1": 0.8769230769230769, + "confusion_matrix": [ + [ + 1013, + 187 + ], + [ + 117, + 1083 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.66375, + "auc_roc": 0.7433972222222223, + "f1": 0.5897305541433655, + "confusion_matrix": [ + [ + 1013, + 187 + ], + [ + 620, + 580 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 12960, + "val_size": 1440, + "test_size": 4800, + "history": { + "train_loss": [ + 0.26192283198053457, + 0.13976006851428086, + 0.07379607345368483, + 0.04615930510155949, + 0.03257456865322627, + 0.019680384536385882, + 0.014805444832772654, + 0.007178295066770249, + 0.00486855545719147, + 0.0034230970272790414 + ], + "train_acc": [ + 0.8118055555555556, + 0.9132716049382716, + 0.9589506172839506, + 0.9753858024691358, + 0.9844907407407407, + 0.9921296296296296, + 0.9925925925925926, + 0.9970679012345679, + 0.9976851851851852, + 0.9986882716049382 + ], + "train_auc": [ + 0.9018450869127228, + 0.9735350088948902, + 0.9923912921167696, + 0.9968518250600138, + 0.9983943517446845, + 0.9992084083504801, + 0.9995807479209533, + 0.9998065629286694, + 0.9999489481524348, + 0.9999726053455076 + ], + "train_f1": [ + 0.8530457311562331, + 0.9337498526464694, + 0.9689832089552238, + 0.981461033300401, + 0.9883376849434291, + 0.9940917516218721, + 0.9944360727947142, + 0.997801180418933, + 0.9982622798887859, + 0.9990161467677527 + ], + "val_loss": [ + 0.2104154187772009, + 0.16755928165382808, + 0.22552042288912666, + 0.2703928468955888, + 0.232618890537156, + 0.30671322064267265, + 0.34424640751547286, + 0.2897042198313607, + 0.29592826449208787, + 0.31122492684258357 + ], + "val_acc": [ + 0.8534722222222222, + 0.8965277777777778, + 0.8798611111111111, + 0.9076388888888889, + 0.9048611111111111, + 0.9118055555555555, + 0.9090277777777778, + 0.9041666666666667, + 0.9159722222222222, + 0.925 + ], + "val_auc": [ + 0.9383192274305556, + 0.9631792534722222, + 0.9636555989583334, + 0.9636729600694445, + 0.9671006944444446, + 0.9630620659722223, + 0.9626171875, + 0.9637207031250001, + 0.9643988715277777, + 0.966296657986111 + ], + "val_f1": [ + 0.8845101258894362, + 0.9203634420096205, + 0.9046831955922865, + 0.9326582278481013, + 0.9276281035393555, + 0.9354346720894764, + 0.933871781928319, + 0.9255663430420712, + 0.9365495542737283, + 0.9442148760330579 + ] + }, + "test_metrics": { + "accuracy": 0.7647916666666666, + "auc_roc": 0.8910119212962961, + "f1": 0.8215584005057689, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 1001, + 2599 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9108333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 107, + 1093 + ] + ], + "n": 1200, + "detection_rate": 0.9108333333333334, + "pairwise_auc": 0.9666881944444444, + "pairwise_f1": 0.9029326724494011 + }, + "insight": { + "accuracy": 0.9058333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 113, + 1087 + ] + ], + "n": 1200, + "detection_rate": 0.9058333333333334, + "pairwise_auc": 0.9646850694444445, + "pairwise_f1": 0.9002070393374741 + }, + "text2img": { + "accuracy": 0.3491666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 781, + 419 + ] + ], + "n": 1200, + "detection_rate": 0.3491666666666667, + "pairwise_auc": 0.7416625, + "pairwise_f1": 0.47967945048654836 + }, + "wiki": { + "accuracy": 0.8933333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.10666666666666669 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9020833333333333, + "auc_roc": 0.9666881944444444, + "f1": 0.9029326724494011, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 107, + 1093 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8995833333333333, + "auc_roc": 0.9646850694444445, + "f1": 0.9002070393374741, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 113, + 1087 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.62125, + "auc_roc": 0.7416625, + "f1": 0.47967945048654836, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 781, + 419 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.7850833333333334, + "std": 0.01858877442765451, + "ci_95": 0.01629377918954893, + "values": [ + 0.7866666666666666, + 0.8116666666666666, + 0.7704166666666666, + 0.791875, + 0.7647916666666666 + ] + }, + "auc_roc": { + "mean": 0.8959688425925926, + "std": 0.008677269977512222, + "ci_95": 0.007605962487303476, + "values": [ + 0.894214699074074, + 0.910004398148148, + 0.8972921296296297, + 0.8873210648148148, + 0.8910119212962961 + ] + }, + "f1": { + "mean": 0.8407420465022838, + "std": 0.01721558000704002, + "ci_95": 0.015090121209788492, + "values": [ + 0.8427035330261137, + 0.8645489961042853, + 0.8268384663733501, + 0.8480608365019011, + 0.8215584005057689 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9339999999999999, + "std": 0.016774650186781495, + "ci_95": 0.014703629181637803, + "values": [ + 0.935, + 0.9575, + 0.9291666666666667, + 0.9375, + 0.9108333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9339999999999999, + "std": 0.016774650186781495, + "ci_95": 0.014703629181637803, + "values": [ + 0.935, + 0.9575, + 0.9291666666666667, + 0.9375, + 0.9108333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.9691834027777777, + "std": 0.003534134405937126, + "ci_95": 0.00309780539113213, + "values": [ + 0.9670590277777777, + 0.9744965277777777, + 0.9711552083333335, + 0.9665180555555555, + 0.9666881944444444 + ] + }, + "pairwise_f1": { + "mean": 0.9033170536782628, + "std": 0.005566489678614175, + "ci_95": 0.004879243332433443, + "values": [ + 0.9015668943350743, + 0.9054373522458629, + 0.9109477124183006, + 0.8957006369426752, + 0.9029326724494011 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.9061666666666668, + "std": 0.012944861700475792, + "ci_95": 0.011346671562866178, + "values": [ + 0.91, + 0.9241666666666667, + 0.8883333333333333, + 0.9025, + 0.9058333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9061666666666668, + "std": 0.012944861700475792, + "ci_95": 0.011346671562866178, + "values": [ + 0.91, + 0.9241666666666667, + 0.8883333333333333, + 0.9025, + 0.9058333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.9592573611111112, + "std": 0.004884491809371957, + "ci_95": 0.004281445842748283, + "values": [ + 0.9627385416666667, + 0.9579416666666667, + 0.9588736111111111, + 0.9520479166666668, + 0.9646850694444445 + ] + }, + "pairwise_f1": { + "mean": 0.8883819984280695, + "std": 0.008238796655830128, + "ci_95": 0.007221623675091769, + "values": [ + 0.8881659211061407, + 0.8879103282626101, + 0.8887036265110463, + 0.8769230769230769, + 0.9002070393374741 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.4341666666666667, + "std": 0.07244010936245501, + "ci_95": 0.0634965554621316, + "values": [ + 0.44083333333333335, + 0.5225, + 0.375, + 0.48333333333333334, + 0.3491666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.4341666666666667, + "std": 0.07244010936245501, + "ci_95": 0.0634965554621316, + "values": [ + 0.44083333333333335, + 0.5225, + 0.375, + 0.48333333333333334, + 0.3491666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.7594657638888889, + "std": 0.022786813060201933, + "ci_95": 0.01997352229333108, + "values": [ + 0.7528465277777776, + 0.7975750000000001, + 0.7618475694444444, + 0.7433972222222223, + 0.7416625 + ] + }, + "pairwise_f1": { + "mean": 0.55084358760102, + "std": 0.058774228826551034, + "ci_95": 0.051517883024667956, + "values": [ + 0.5580168776371308, + 0.6220238095238095, + 0.5047672462142456, + 0.5897305541433655, + 0.47967945048654836 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.866, + "std": 0.02418390190004729, + "ci_95": 0.021198124654999286, + "values": [ + 0.8608333333333333, + 0.8425, + 0.8891666666666667, + 0.8441666666666666, + 0.8933333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.134, + "std": 0.02418390190004729, + "ci_95": 0.021198124654999286, + "values": [ + 0.13916666666666666, + 0.15749999999999997, + 0.11083333333333334, + 0.15583333333333338, + 0.10666666666666669 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.9, + "std": 0.006647106722309916, + "ci_95": 0.005826445934034069, + "values": [ + 0.8979166666666667, + 0.9, + 0.9091666666666667, + 0.8908333333333334, + 0.9020833333333333 + ] + }, + "auc_roc": { + "mean": 0.9691834027777777, + "std": 0.003534134405937126, + "ci_95": 0.00309780539113213, + "values": [ + 0.9670590277777777, + 0.9744965277777777, + 0.9711552083333335, + 0.9665180555555555, + 0.9666881944444444 + ] + }, + "f1": { + "mean": 0.9033170536782628, + "std": 0.005566489678614175, + "ci_95": 0.004879243332433443, + "values": [ + 0.9015668943350743, + 0.9054373522458629, + 0.9109477124183006, + 0.8957006369426752, + 0.9029326724494011 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8860833333333332, + "std": 0.009485917925477162, + "ci_95": 0.008314773665657482, + "values": [ + 0.8854166666666666, + 0.8833333333333333, + 0.88875, + 0.8733333333333333, + 0.8995833333333333 + ] + }, + "auc_roc": { + "mean": 0.9592573611111112, + "std": 0.004884491809371957, + "ci_95": 0.004281445842748283, + "values": [ + 0.9627385416666667, + 0.9579416666666667, + 0.9588736111111111, + 0.9520479166666668, + 0.9646850694444445 + ] + }, + "f1": { + "mean": 0.8883819984280695, + "std": 0.008238796655830128, + "ci_95": 0.007221623675091769, + "values": [ + 0.8881659211061407, + 0.8879103282626101, + 0.8887036265110463, + 0.8769230769230769, + 0.9002070393374741 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.6500833333333333, + "std": 0.02446191768170817, + "ci_95": 0.02144181623215098, + "values": [ + 0.6508333333333334, + 0.6825, + 0.6320833333333333, + 0.66375, + 0.62125 + ] + }, + "auc_roc": { + "mean": 0.7594657638888889, + "std": 0.022786813060201933, + "ci_95": 0.01997352229333108, + "values": [ + 0.7528465277777776, + 0.7975750000000001, + 0.7618475694444444, + 0.7433972222222223, + 0.7416625 + ] + }, + "f1": { + "mean": 0.55084358760102, + "std": 0.058774228826551034, + "ci_95": 0.051517883024667956, + "values": [ + 0.5580168776371308, + 0.6220238095238095, + 0.5047672462142456, + 0.5897305541433655, + 0.47967945048654836 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2a_t3_holdout_text2img", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false, + "normalization": "imagenet", + "train_sources": [ + "wiki", + "inpainting", + "insight" + ], + "eval_sources": [ + "wiki", + "inpainting", + "insight", + "text2img" + ] + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2b_resnet18_224.json b/classifier/outputs/logs/p2b_resnet18_224.json new file mode 100644 index 0000000..81be951 --- /dev/null +++ b/classifier/outputs/logs/p2b_resnet18_224.json @@ -0,0 +1,1713 @@ +{ + "run_name": "p2b_resnet18_224", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19722581385048452, + 0.10264252484603613, + 0.05920427175566416, + 0.0366648331324945, + 0.027175970428840998, + 0.016415510239635802, + 0.010992527571498713, + 0.006630335180751798, + 0.005149287341508305, + 0.0026562969974941518, + 0.0014051771179260362, + 0.0008840179361898124, + 0.0004661208866591719 + ], + "train_acc": [ + 0.811863425925926, + 0.9195023148148148, + 0.9572916666666667, + 0.9757523148148148, + 0.9828703703703704, + 0.9909143518518518, + 0.9928819444444444, + 0.9958912037037037, + 0.9965277777777778, + 0.9983217592592593, + 0.9991898148148148, + 0.9995949074074074, + 0.9997685185185186 + ], + "train_auc": [ + 0.9015951949731367, + 0.9747272858796296, + 0.9914408382630315, + 0.9967484800097165, + 0.9982575392232511, + 0.9990713323759717, + 0.9995895847622315, + 0.9998830982796068, + 0.9999293499228395, + 0.9999699663494512, + 0.9999906585791039, + 0.9999967135345222, + 0.9999998213877457 + ], + "train_f1": [ + 0.8662635237977704, + 0.9450913827813524, + 0.9712078651685393, + 0.9837527628058474, + 0.9885422311682279, + 0.9939328361092863, + 0.9952441712098364, + 0.9972579461630556, + 0.9976841130152848, + 0.9988807842229169, + 0.9994596263702331, + 0.999729886166313, + 0.9998456551937027 + ], + "val_loss": [ + 0.14872334962710737, + 0.15591826991488536, + 0.18881629154008503, + 0.18318608577440804, + 0.2586083890666487, + 0.21436443397154412, + 0.4333447159598412, + 0.21543170170140608, + 0.2493383682408118, + 0.28536541290159223, + 0.27383037212348427, + 0.31338123660993916, + 0.26701862792315295 + ], + "val_acc": [ + 0.8822916666666667, + 0.8833333333333333, + 0.9041666666666667, + 0.9015625, + 0.9015625, + 0.8963541666666667, + 0.9036458333333334, + 0.9052083333333333, + 0.91875, + 0.9208333333333333, + 0.9270833333333334, + 0.9229166666666667, + 0.9270833333333334 + ], + "val_auc": [ + 0.9479615162037038, + 0.9493648726851852, + 0.9572620081018519, + 0.9601273148148147, + 0.9543258101851851, + 0.9585423900462964, + 0.9473763020833333, + 0.9606532118055556, + 0.9584085648148147, + 0.9560619212962964, + 0.95689453125, + 0.9558637152777778, + 0.9571223958333335 + ], + "val_f1": [ + 0.919054441260745, + 0.92, + 0.9360222531293463, + 0.9338929695697796, + 0.9347600966517087, + 0.9286993908993192, + 0.9385177799933533, + 0.9352773826458037, + 0.9456824512534819, + 0.9479095270733379, + 0.9519560741249142, + 0.949419002050581, + 0.9516574585635359 + ] + }, + "test_metrics": { + "accuracy": 0.914375, + "auc_roc": 0.9687729166666665, + "f1": 0.9419409521118802, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 266, + 3334 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9333333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 80, + 1120 + ] + ], + "n": 1200, + "detection_rate": 0.9333333333333333, + "pairwise_auc": 0.9714215277777778, + "pairwise_f1": 0.9087221095334685 + }, + "insight": { + "accuracy": 0.8858333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 137, + 1063 + ] + ], + "n": 1200, + "detection_rate": 0.8858333333333334, + "pairwise_auc": 0.9548541666666667, + "pairwise_f1": 0.882890365448505 + }, + "text2img": { + "accuracy": 0.9591666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 49, + 1151 + ] + ], + "n": 1200, + "detection_rate": 0.9591666666666666, + "pairwise_auc": 0.9800430555555556, + "pairwise_f1": 0.9222756410256411 + }, + "wiki": { + "accuracy": 0.8791666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.12083333333333335 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.90625, + "auc_roc": 0.9714215277777778, + "f1": 0.9087221095334685, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 80, + 1120 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8825, + "auc_roc": 0.9548541666666667, + "f1": 0.882890365448505, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 137, + 1063 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9191666666666667, + "auc_roc": 0.9800430555555556, + "f1": 0.9222756410256411, + "confusion_matrix": [ + [ + 1055, + 145 + ], + [ + 49, + 1151 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.20000588447545414, + 0.10408794169003764, + 0.058041512501788226, + 0.034372955076267114, + 0.025223094800377214, + 0.015196698343168936, + 0.010545113944270405, + 0.007307375633192553 + ], + "train_acc": [ + 0.8090277777777778, + 0.9175925925925926, + 0.9587962962962963, + 0.9762731481481481, + 0.9840277777777777, + 0.9907986111111111, + 0.9944444444444445, + 0.9953703703703703 + ], + "train_auc": [ + 0.8993097082904664, + 0.9743819390860768, + 0.9915667866941016, + 0.9970922103623686, + 0.9983073184585048, + 0.9992988843878601, + 0.9995584437157066, + 0.9998731674382716 + ], + "train_f1": [ + 0.8646098301468779, + 0.9437198640423682, + 0.9722157184109889, + 0.9840974323171204, + 0.9893130953302873, + 0.993854122376406, + 0.9962934362934363, + 0.9969111969111969 + ], + "val_loss": [ + 0.14092905839594702, + 0.13848712935190027, + 0.15262191738778105, + 0.2710734717196222, + 0.29285608716936623, + 0.25471851951188, + 0.20351220176477605, + 0.2155651534909945 + ], + "val_acc": [ + 0.884375, + 0.8989583333333333, + 0.9020833333333333, + 0.9114583333333334, + 0.9135416666666667, + 0.909375, + 0.9078125, + 0.9104166666666667 + ], + "val_auc": [ + 0.9523307291666666, + 0.9595883969907405, + 0.9632103587962961, + 0.9511574074074074, + 0.9564756944444445, + 0.9586291956018518, + 0.9618590856481481, + 0.9612919560185186 + ], + "val_f1": [ + 0.9203158650394831, + 0.9312544294826365, + 0.9338959212376934, + 0.9420980926430518, + 0.943728813559322, + 0.9403292181069959, + 0.9373007438894793, + 0.9392226148409893 + ] + }, + "test_metrics": { + "accuracy": 0.905625, + "auc_roc": 0.9629736111111112, + "f1": 0.9361881955204958, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 277, + 3323 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9516666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 58, + 1142 + ] + ], + "n": 1200, + "detection_rate": 0.9516666666666667, + "pairwise_auc": 0.9716375000000002, + "pairwise_f1": 0.9070691024622717 + }, + "insight": { + "accuracy": 0.8775, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 147, + 1053 + ] + ], + "n": 1200, + "detection_rate": 0.8775, + "pairwise_auc": 0.9462055555555556, + "pairwise_f1": 0.8670234664470976 + }, + "text2img": { + "accuracy": 0.94, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 72, + 1128 + ] + ], + "n": 1200, + "detection_rate": 0.94, + "pairwise_auc": 0.9710777777777778, + "pairwise_f1": 0.9009584664536742 + }, + "wiki": { + "accuracy": 0.8533333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.1466666666666666 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9025, + "auc_roc": 0.9716375000000002, + "f1": 0.9070691024622717, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 58, + 1142 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8654166666666666, + "auc_roc": 0.9462055555555556, + "f1": 0.8670234664470976, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 147, + 1053 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8966666666666666, + "auc_roc": 0.9710777777777778, + "f1": 0.9009584664536742, + "confusion_matrix": [ + [ + 1024, + 176 + ], + [ + 72, + 1128 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.1954930212141739, + 0.10281271817921489, + 0.06016657212274839, + 0.03787446872965019, + 0.026305097892300518, + 0.017765212520370603, + 0.010911685225935932, + 0.009139342036825029, + 0.004632529768130351, + 0.004187346089508571, + 0.0017877653261907484, + 0.0007197948908006529, + 0.0007150228552733198 + ], + "train_acc": [ + 0.8079282407407408, + 0.9170717592592592, + 0.9578125, + 0.9766782407407407, + 0.9836226851851851, + 0.9901620370370371, + 0.9935763888888889, + 0.9946759259259259, + 0.9974537037037037, + 0.9972222222222222, + 0.9992476851851851, + 0.9995949074074074, + 0.9996527777777777 + ], + "train_auc": [ + 0.9030463123713992, + 0.9746345593278463, + 0.9914829728938043, + 0.996350594421582, + 0.9981514256830133, + 0.9991553515803613, + 0.9995712055612711, + 0.9995691515203475, + 0.9999019329418153, + 0.9999532482424555, + 0.9999925340077732, + 0.9999994820244628, + 0.9999990890775035 + ], + "train_f1": [ + 0.8626298580356774, + 0.943236284412755, + 0.9716154654830044, + 0.9843780284529209, + 0.9890501064035597, + 0.9934271574389112, + 0.9957121335033028, + 0.9964456807294081, + 0.9983014206300185, + 0.9981467181467182, + 0.999498437439716, + 0.9997299070108423, + 0.9997685006559148 + ], + "val_loss": [ + 0.19500481635332106, + 0.1268259635893628, + 0.13148466631925354, + 0.20869781834092768, + 0.2388368578608303, + 0.22051208737151076, + 0.21102181890407035, + 0.19602018855754674, + 0.2702031286339964, + 0.3234737387955799, + 0.2604731284258378, + 0.25106819690430104, + 0.25952417930657246 + ], + "val_acc": [ + 0.7515625, + 0.9072916666666667, + 0.9057291666666667, + 0.9171875, + 0.9114583333333334, + 0.9234375, + 0.9083333333333333, + 0.9270833333333334, + 0.9255208333333333, + 0.9197916666666667, + 0.9302083333333333, + 0.9270833333333334, + 0.9348958333333334 + ], + "val_auc": [ + 0.9487427662037037, + 0.9660561342592593, + 0.9677799479166665, + 0.9676699942129628, + 0.9617209201388888, + 0.9637868923611111, + 0.9642853009259259, + 0.9688606770833333, + 0.9638433159722222, + 0.9533347800925925, + 0.9644741030092593, + 0.9622547743055555, + 0.9640183738425926 + ], + "val_f1": [ + 0.8039457459926017, + 0.9366096866096866, + 0.9354263289332858, + 0.9457523029682702, + 0.9418604651162791, + 0.9491876944348427, + 0.9376328844790929, + 0.9510831586303284, + 0.9508083935328517, + 0.9471879286694102, + 0.9538567493112947, + 0.9514899514899515, + 0.9568221070811744 + ] + }, + "test_metrics": { + "accuracy": 0.9239583333333333, + "auc_roc": 0.9678178240740741, + "f1": 0.9493547939503261, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 179, + 3421 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9625, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 45, + 1155 + ] + ], + "n": 1200, + "detection_rate": 0.9625, + "pairwise_auc": 0.9738322916666667, + "pairwise_f1": 0.9090909090909091 + }, + "insight": { + "accuracy": 0.9158333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 101, + 1099 + ] + ], + "n": 1200, + "detection_rate": 0.9158333333333334, + "pairwise_auc": 0.9491239583333334, + "pairwise_f1": 0.8845070422535212 + }, + "text2img": { + "accuracy": 0.9725, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 33, + 1167 + ] + ], + "n": 1200, + "detection_rate": 0.9725, + "pairwise_auc": 0.9804972222222222, + "pairwise_f1": 0.9142185663924794 + }, + "wiki": { + "accuracy": 0.845, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.15500000000000003 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.90375, + "auc_roc": 0.9738322916666667, + "f1": 0.9090909090909091, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 45, + 1155 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8804166666666666, + "auc_roc": 0.9491239583333334, + "f1": 0.8845070422535212, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 101, + 1099 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.90875, + "auc_roc": 0.9804972222222222, + "f1": 0.9142185663924794, + "confusion_matrix": [ + [ + 1014, + 186 + ], + [ + 33, + 1167 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19490147646240613, + 0.10359700576022819, + 0.06170826292924445, + 0.04011154976345737, + 0.02456697733448689, + 0.017018179965196868, + 0.014746911858236234, + 0.009014791383426065 + ], + "train_acc": [ + 0.8076967592592592, + 0.9194444444444444, + 0.9548032407407407, + 0.975, + 0.9853009259259259, + 0.9898726851851852, + 0.992824074074074, + 0.9953703703703703 + ], + "train_auc": [ + 0.9043987822216506, + 0.9743296414180384, + 0.9908415941500914, + 0.9959990587134202, + 0.9984063857453134, + 0.99924799775663, + 0.9991502343392775, + 0.9995339827674896 + ], + "train_f1": [ + 0.8621790883828958, + 0.9450106660346054, + 0.9695052906954044, + 0.9832376222256712, + 0.990171800030955, + 0.9932367149758454, + 0.9952123552123552, + 0.9969107198022861 + ], + "val_loss": [ + 0.22769305457671482, + 0.17312400991407534, + 0.14870864135834078, + 0.1872468259287416, + 0.2433047288354525, + 0.3240897962289788, + 0.2584284867192613, + 0.23186856481673507 + ], + "val_acc": [ + 0.746875, + 0.8453125, + 0.9104166666666667, + 0.9161458333333333, + 0.9234375, + 0.9229166666666667, + 0.9208333333333333, + 0.9223958333333333 + ], + "val_auc": [ + 0.9224739583333335, + 0.9507523148148148, + 0.9644314236111111, + 0.9619386574074074, + 0.9605664062500001, + 0.9496766493055555, + 0.9632212094907407, + 0.9596093749999999 + ], + "val_f1": [ + 0.8008196721311476, + 0.8879668049792531, + 0.9398180545836249, + 0.943921978404737, + 0.9496402877697842, + 0.9499323410013532, + 0.9481935923653715, + 0.9479566887879847 + ] + }, + "test_metrics": { + "accuracy": 0.9035416666666667, + "auc_roc": 0.9612076388888888, + "f1": 0.935126803979263, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 263, + 3337 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9475, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 63, + 1137 + ] + ], + "n": 1200, + "detection_rate": 0.9475, + "pairwise_auc": 0.9671291666666667, + "pairwise_f1": 0.8963342530547891 + }, + "insight": { + "accuracy": 0.8941666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 127, + 1073 + ] + ], + "n": 1200, + "detection_rate": 0.8941666666666667, + "pairwise_auc": 0.9488027777777779, + "pairwise_f1": 0.8677719369187222 + }, + "text2img": { + "accuracy": 0.9391666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 73, + 1127 + ] + ], + "n": 1200, + "detection_rate": 0.9391666666666667, + "pairwise_auc": 0.9676909722222221, + "pairwise_f1": 0.8919667590027701 + }, + "wiki": { + "accuracy": 0.8333333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.16666666666666663 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8904166666666666, + "auc_roc": 0.9671291666666667, + "f1": 0.8963342530547891, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 63, + 1137 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.86375, + "auc_roc": 0.9488027777777779, + "f1": 0.8677719369187222, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 127, + 1073 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.88625, + "auc_roc": 0.9676909722222221, + "f1": 0.8919667590027701, + "confusion_matrix": [ + [ + 1000, + 200 + ], + [ + 73, + 1127 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.19957470789689707, + 0.10425364971505822, + 0.06086116745007328, + 0.04081417578606677, + 0.028762150054670334, + 0.019413475335063088, + 0.011313071529999703, + 0.006697674415297823, + 0.004091889076434436, + 0.003935401036381302, + 0.0018815870159793727 + ], + "train_acc": [ + 0.8087962962962963, + 0.9174189814814815, + 0.9564236111111111, + 0.9734953703703704, + 0.982175925925926, + 0.9881365740740741, + 0.9929976851851852, + 0.9956597222222222, + 0.9976273148148148, + 0.9975694444444444, + 0.9990740740740741 + ], + "train_auc": [ + 0.8990732167352539, + 0.97397518539952, + 0.9910111954160951, + 0.9960041848851166, + 0.9979256508630543, + 0.9990071837848651, + 0.9996136527634888, + 0.9998784186385459, + 0.9999477291237998, + 0.9999236879143805, + 0.9999895779749657 + ], + "train_f1": [ + 0.8639433371767419, + 0.943696981653186, + 0.9706238052510435, + 0.9822273961971284, + 0.9880712625871417, + 0.9920742315870869, + 0.9953240329249913, + 0.9971021212472471, + 0.9984171717561673, + 0.9983787539566124, + 0.9993825254708243 + ], + "val_loss": [ + 0.16892356189588706, + 0.23779966595272223, + 0.15918385590387818, + 0.18508020597219002, + 0.33955128504943183, + 0.17744567066595968, + 0.18761360666442972, + 0.23809361916598087, + 0.24717846481992942, + 0.3055811640280202, + 0.2333151061244583 + ], + "val_acc": [ + 0.8296875, + 0.7703125, + 0.9161458333333333, + 0.903125, + 0.9098958333333333, + 0.9177083333333333, + 0.9177083333333333, + 0.9213541666666667, + 0.9182291666666667, + 0.9239583333333333, + 0.9291666666666667 + ], + "val_auc": [ + 0.945900607638889, + 0.924205005787037, + 0.9659411168981483, + 0.9623741319444445, + 0.9572952835648147, + 0.9675354456018519, + 0.9642780671296296, + 0.9610481770833332, + 0.9629398148148148, + 0.9584772858796297, + 0.9648719618055557 + ], + "val_f1": [ + 0.8744721689059501, + 0.8242327620565962, + 0.9441166261714682, + 0.9346451159522137, + 0.9422370617696161, + 0.9443661971830986, + 0.9439716312056737, + 0.9474416985729203, + 0.945353289244692, + 0.9501706484641638, + 0.9527449617790132 + ] + }, + "test_metrics": { + "accuracy": 0.9185416666666667, + "auc_roc": 0.9692168981481482, + "f1": 0.9447974022306932, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 254, + 3346 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9375, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 75, + 1125 + ] + ], + "n": 1200, + "detection_rate": 0.9375, + "pairwise_auc": 0.9709586805555556, + "pairwise_f1": 0.9138911454102355 + }, + "insight": { + "accuracy": 0.8783333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 146, + 1054 + ] + ], + "n": 1200, + "detection_rate": 0.8783333333333333, + "pairwise_auc": 0.9507531250000001, + "pairwise_f1": 0.8816394813885403 + }, + "text2img": { + "accuracy": 0.9725, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 33, + 1167 + ] + ], + "n": 1200, + "detection_rate": 0.9725, + "pairwise_auc": 0.9859388888888889, + "pairwise_f1": 0.9321086261980831 + }, + "wiki": { + "accuracy": 0.8858333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.11416666666666664 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9116666666666666, + "auc_roc": 0.9709586805555556, + "f1": 0.9138911454102355, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 75, + 1125 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8820833333333333, + "auc_roc": 0.9507531250000001, + "f1": 0.8816394813885403, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 146, + 1054 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9291666666666667, + "auc_roc": 0.9859388888888889, + "f1": 0.9321086261980831, + "confusion_matrix": [ + [ + 1063, + 137 + ], + [ + 33, + 1167 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.9132083333333334, + "std": 0.008606965848143645, + "ci_95": 0.007544338200855582, + "values": [ + 0.914375, + 0.905625, + 0.9239583333333333, + 0.9035416666666667, + 0.9185416666666667 + ] + }, + "auc_roc": { + "mean": 0.9659977777777777, + "std": 0.0036560810622455297, + "ci_95": 0.003204696348280812, + "values": [ + 0.9687729166666665, + 0.9629736111111112, + 0.9678178240740741, + 0.9612076388888888, + 0.9692168981481482 + ] + }, + "f1": { + "mean": 0.9414816295585317, + "std": 0.0059497120774801284, + "ci_95": 0.005215152575504448, + "values": [ + 0.9419409521118802, + 0.9361881955204958, + 0.9493547939503261, + 0.935126803979263, + 0.9447974022306932 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9465, + "std": 0.011598012281804541, + "ci_95": 0.01016610599546902, + "values": [ + 0.9333333333333333, + 0.9516666666666667, + 0.9625, + 0.9475, + 0.9375 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9465, + "std": 0.011598012281804541, + "ci_95": 0.01016610599546902, + "values": [ + 0.9333333333333333, + 0.9516666666666667, + 0.9625, + 0.9475, + 0.9375 + ] + }, + "pairwise_auc": { + "mean": 0.9709958333333335, + "std": 0.0024285252308698644, + "ci_95": 0.0021286962205089677, + "values": [ + 0.9714215277777778, + 0.9716375000000002, + 0.9738322916666667, + 0.9671291666666667, + 0.9709586805555556 + ] + }, + "pairwise_f1": { + "mean": 0.9070215039103348, + "std": 0.006492034890839102, + "ci_95": 0.00569051948066093, + "values": [ + 0.9087221095334685, + 0.9070691024622717, + 0.9090909090909091, + 0.8963342530547891, + 0.9138911454102355 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8903333333333332, + "std": 0.015763001688200867, + "ci_95": 0.013816880175270344, + "values": [ + 0.8858333333333334, + 0.8775, + 0.9158333333333334, + 0.8941666666666667, + 0.8783333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8903333333333332, + "std": 0.015763001688200867, + "ci_95": 0.013816880175270344, + "values": [ + 0.8858333333333334, + 0.8775, + 0.9158333333333334, + 0.8941666666666667, + 0.8783333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.9499479166666667, + "std": 0.0031904218615427878, + "ci_95": 0.0027965280624499492, + "values": [ + 0.9548541666666667, + 0.9462055555555556, + 0.9491239583333334, + 0.9488027777777779, + 0.9507531250000001 + ] + }, + "pairwise_f1": { + "mean": 0.8767664584912772, + "std": 0.008616735719668613, + "ci_95": 0.007552901870825199, + "values": [ + 0.882890365448505, + 0.8670234664470976, + 0.8845070422535212, + 0.8677719369187222, + 0.8816394813885403 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9566666666666667, + "std": 0.01652018966799919, + "ci_95": 0.014480584702743658, + "values": [ + 0.9591666666666666, + 0.94, + 0.9725, + 0.9391666666666667, + 0.9725 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9566666666666667, + "std": 0.01652018966799919, + "ci_95": 0.014480584702743658, + "values": [ + 0.9591666666666666, + 0.94, + 0.9725, + 0.9391666666666667, + 0.9725 + ] + }, + "pairwise_auc": { + "mean": 0.9770495833333334, + "std": 0.007468482219229771, + "ci_95": 0.006546413300930931, + "values": [ + 0.9800430555555556, + 0.9710777777777778, + 0.9804972222222222, + 0.9676909722222221, + 0.9859388888888889 + ] + }, + "pairwise_f1": { + "mean": 0.9123056118145296, + "std": 0.01610626351688794, + "ci_95": 0.014117762434216218, + "values": [ + 0.9222756410256411, + 0.9009584664536742, + 0.9142185663924794, + 0.8919667590027701, + 0.9321086261980831 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.8593333333333334, + "std": 0.022433543832593387, + "ci_95": 0.01966386816247279, + "values": [ + 0.8791666666666667, + 0.8533333333333334, + 0.845, + 0.8333333333333334, + 0.8858333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.14066666666666666, + "std": 0.022433543832593383, + "ci_95": 0.019663868162472786, + "values": [ + 0.12083333333333335, + 0.1466666666666666, + 0.15500000000000003, + 0.16666666666666663, + 0.11416666666666664 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.9029166666666665, + "std": 0.007822909731466082, + "ci_95": 0.006857082713029891, + "values": [ + 0.90625, + 0.9025, + 0.90375, + 0.8904166666666666, + 0.9116666666666666 + ] + }, + "auc_roc": { + "mean": 0.9709958333333335, + "std": 0.0024285252308698644, + "ci_95": 0.0021286962205089677, + "values": [ + 0.9714215277777778, + 0.9716375000000002, + 0.9738322916666667, + 0.9671291666666667, + 0.9709586805555556 + ] + }, + "f1": { + "mean": 0.9070215039103348, + "std": 0.006492034890839102, + "ci_95": 0.00569051948066093, + "values": [ + 0.9087221095334685, + 0.9070691024622717, + 0.9090909090909091, + 0.8963342530547891, + 0.9138911454102355 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8748333333333334, + "std": 0.009407812946931092, + "ci_95": 0.008246311633424692, + "values": [ + 0.8825, + 0.8654166666666666, + 0.8804166666666666, + 0.86375, + 0.8820833333333333 + ] + }, + "auc_roc": { + "mean": 0.9499479166666667, + "std": 0.0031904218615427878, + "ci_95": 0.0027965280624499492, + "values": [ + 0.9548541666666667, + 0.9462055555555556, + 0.9491239583333334, + 0.9488027777777779, + 0.9507531250000001 + ] + }, + "f1": { + "mean": 0.8767664584912772, + "std": 0.008616735719668613, + "ci_95": 0.007552901870825199, + "values": [ + 0.882890365448505, + 0.8670234664470976, + 0.8845070422535212, + 0.8677719369187222, + 0.8816394813885403 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.908, + "std": 0.01713761700665141, + "ci_95": 0.015021783626897774, + "values": [ + 0.9191666666666667, + 0.8966666666666666, + 0.90875, + 0.88625, + 0.9291666666666667 + ] + }, + "auc_roc": { + "mean": 0.9770495833333334, + "std": 0.007468482219229771, + "ci_95": 0.006546413300930931, + "values": [ + 0.9800430555555556, + 0.9710777777777778, + 0.9804972222222222, + 0.9676909722222221, + 0.9859388888888889 + ] + }, + "f1": { + "mean": 0.9123056118145296, + "std": 0.01610626351688794, + "ci_95": 0.014117762434216218, + "values": [ + 0.9222756410256411, + 0.9009584664536742, + 0.9142185663924794, + 0.8919667590027701, + 0.9321086261980831 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2b_resnet18_224", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2b_simplecnn_224.json b/classifier/outputs/logs/p2b_simplecnn_224.json new file mode 100644 index 0000000..b623fdc --- /dev/null +++ b/classifier/outputs/logs/p2b_simplecnn_224.json @@ -0,0 +1,1890 @@ +{ + "run_name": "p2b_simplecnn_224", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33642795229399647, + 0.3219479806997158, + 0.3137210635399377, + 0.3085368701705226, + 0.3037944730233263, + 0.2989338524915554, + 0.2954062350922161, + 0.2930095558917081, + 0.29075437610348065, + 0.2876994205017885, + 0.2860731103078083, + 0.2841772020415024, + 0.2831934628387292, + 0.28101563903468624, + 0.2809163810478316 + ], + "train_acc": [ + 0.5032986111111111, + 0.6373263888888889, + 0.6516203703703703, + 0.6791666666666667, + 0.6784143518518518, + 0.6936921296296297, + 0.6980902777777778, + 0.7007523148148148, + 0.7054976851851852, + 0.7116319444444444, + 0.7111689814814814, + 0.7150462962962963, + 0.7181712962962963, + 0.7220486111111111, + 0.7201967592592593 + ], + "train_auc": [ + 0.6208675822330818, + 0.6855963416638089, + 0.7096752204075217, + 0.7255443297753772, + 0.7371636016803841, + 0.7485511956304298, + 0.7570394840249199, + 0.7623238525948789, + 0.7670263917466851, + 0.7727034572187929, + 0.7772292952674897, + 0.7811661326160265, + 0.7833068540666439, + 0.7878145719021491, + 0.7877469850251486 + ], + "train_f1": [ + 0.5607247044372793, + 0.7253122945430638, + 0.736703988803359, + 0.7641252552756977, + 0.7611228130507673, + 0.7746412909268957, + 0.7777825105422328, + 0.779290622732511, + 0.784135737009544, + 0.7882907762246676, + 0.7884542025176959, + 0.7916031826646351, + 0.7935915910824786, + 0.7963363439765934, + 0.7952745903374687 + ], + "val_loss": [ + 0.33139895225564636, + 0.32860592926541965, + 0.3132358161111673, + 0.30545889809727667, + 0.30481020535031955, + 0.2997308722386757, + 0.30189659756918746, + 0.29470868905385333, + 0.2992896310985088, + 0.29820267458756766, + 0.29505674292643863, + 0.291368980333209, + 0.2923826087266207, + 0.29070129444201787, + 0.2902953773736954 + ], + "val_acc": [ + 0.6026041666666667, + 0.5442708333333334, + 0.69375, + 0.6776041666666667, + 0.6494791666666667, + 0.6802083333333333, + 0.7114583333333333, + 0.6895833333333333, + 0.7541666666666667, + 0.6333333333333333, + 0.6473958333333333, + 0.7427083333333333, + 0.725, + 0.7171875, + 0.725 + ], + "val_auc": [ + 0.6414945023148148, + 0.6700086805555556, + 0.7251359953703703, + 0.7353559027777777, + 0.7346006944444444, + 0.7435886863425925, + 0.7441890914351852, + 0.7618916377314814, + 0.7642701099537037, + 0.7501671006944444, + 0.7580345775462963, + 0.7732747395833334, + 0.7687709780092593, + 0.7657371238425925, + 0.7714467592592592 + ], + "val_f1": [ + 0.6999606763664963, + 0.6077991931869117, + 0.7776096822995462, + 0.7599844901124467, + 0.7256420709335507, + 0.7610894941634241, + 0.7979576951130561, + 0.7660910518053375, + 0.8326241134751773, + 0.7044500419815282, + 0.7182688306283812, + 0.8182487122884474, + 0.8041543026706232, + 0.7959413754227734, + 0.8035714285714286 + ] + }, + "test_metrics": { + "accuracy": 0.7410416666666667, + "auc_roc": 0.7815633101851851, + "f1": 0.8191473883311509, + "confusion_matrix": [ + [ + 742, + 458 + ], + [ + 785, + 2815 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7883333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 254, + 946 + ] + ], + "n": 1200, + "detection_rate": 0.7883333333333333, + "pairwise_auc": 0.7842034722222222, + "pairwise_f1": 0.7265745007680492 + }, + "insight": { + "accuracy": 0.625, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 450, + 750 + ] + ], + "n": 1200, + "detection_rate": 0.625, + "pairwise_auc": 0.6708861111111111, + "pairwise_f1": 0.6229235880398671 + }, + "text2img": { + "accuracy": 0.9325, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 81, + 1119 + ] + ], + "n": 1200, + "detection_rate": 0.9325, + "pairwise_auc": 0.889600347222222, + "pairwise_f1": 0.8059056535830033 + }, + "wiki": { + "accuracy": 0.6183333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 742, + 458 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.3816666666666667 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.7033333333333334, + "auc_roc": 0.7842034722222222, + "f1": 0.7265745007680492, + "confusion_matrix": [ + [ + 742, + 458 + ], + [ + 254, + 946 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6216666666666667, + "auc_roc": 0.6708861111111111, + "f1": 0.6229235880398671, + "confusion_matrix": [ + [ + 742, + 458 + ], + [ + 450, + 750 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7754166666666666, + "auc_roc": 0.889600347222222, + "f1": 0.8059056535830033, + "confusion_matrix": [ + [ + 742, + 458 + ], + [ + 81, + 1119 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33494184792593673, + 0.31980081793886644, + 0.31173074910486187, + 0.3052182952011073, + 0.3005806598674368, + 0.29816654459745795, + 0.2949761002428002, + 0.29145461365028663, + 0.28799140657539724, + 0.2845175684326225, + 0.282909574828766, + 0.2800294547997139, + 0.2797196310427454, + 0.2786472484745361, + 0.2785897046327591 + ], + "train_acc": [ + 0.55, + 0.6352430555555556, + 0.6655092592592593, + 0.6780671296296297, + 0.6850115740740741, + 0.6923032407407408, + 0.7, + 0.7021990740740741, + 0.7053819444444445, + 0.7097800925925926, + 0.7160300925925925, + 0.7147569444444445, + 0.7215856481481482, + 0.7217013888888889, + 0.719849537037037 + ], + "train_auc": [ + 0.6264318272748057, + 0.6899175793752857, + 0.715228650477252, + 0.7334458769147234, + 0.7438965960076589, + 0.7498880994227252, + 0.756508576960448, + 0.7636998099565615, + 0.7706304744655922, + 0.7775100112168496, + 0.7821638160150892, + 0.7874386556212849, + 0.7890522297953818, + 0.791166579146662, + 0.7912839809813672 + ], + "train_f1": [ + 0.628972230174635, + 0.7225914352361251, + 0.7516542064105869, + 0.7607208912211277, + 0.7669848880517145, + 0.7732912633778195, + 0.7802831228278376, + 0.7815604041090075, + 0.7833709203863666, + 0.7869493181528527, + 0.7920674604856138, + 0.7903712839705694, + 0.7961699783925772, + 0.796616620850074, + 0.7949337061041216 + ], + "val_loss": [ + 0.327873378743728, + 0.3189878040303787, + 0.3057847712188959, + 0.3495474042991797, + 0.31820161218444504, + 0.30460814436276756, + 0.29808515633145966, + 0.3056687137732903, + 0.3086421674117446, + 0.2928515362242858, + 0.2853747680783272, + 0.2849019923557838, + 0.28397251789768535, + 0.28498338237404824, + 0.28285817181070644 + ], + "val_acc": [ + 0.4895833333333333, + 0.7265625, + 0.6947916666666667, + 0.4005208333333333, + 0.7536458333333333, + 0.60625, + 0.7359375, + 0.7442708333333333, + 0.7739583333333333, + 0.7505208333333333, + 0.7005208333333334, + 0.7234375, + 0.7364583333333333, + 0.7270833333333333, + 0.7088541666666667 + ], + "val_auc": [ + 0.6825694444444443, + 0.7175969328703704, + 0.7363317418981482, + 0.735716869212963, + 0.7476859085648148, + 0.7536957465277776, + 0.7642013888888889, + 0.7549377893518519, + 0.7705635127314814, + 0.7772786458333333, + 0.7824450231481481, + 0.7833586516203704, + 0.7868127893518518, + 0.7855613425925926, + 0.7867650462962963 + ], + "val_f1": [ + 0.5251937984496124, + 0.8176450156304272, + 0.7756508422664625, + 0.34787535410764875, + 0.8420701168614357, + 0.6690017513134852, + 0.8169014084507042, + 0.8283816847256205, + 0.8544600938967136, + 0.8292335115864528, + 0.7753028526768269, + 0.7987874194770747, + 0.812035661218425, + 0.8054936896807721, + 0.7845857418111754 + ] + }, + "test_metrics": { + "accuracy": 0.7366666666666667, + "auc_roc": 0.7832386574074073, + "f1": 0.81417230226404, + "confusion_matrix": [ + [ + 767, + 433 + ], + [ + 831, + 2769 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7683333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 278, + 922 + ] + ], + "n": 1200, + "detection_rate": 0.7683333333333333, + "pairwise_auc": 0.7756361111111111, + "pairwise_f1": 0.7217221135029355 + }, + "insight": { + "accuracy": 0.6083333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 470, + 730 + ] + ], + "n": 1200, + "detection_rate": 0.6083333333333333, + "pairwise_auc": 0.6721888888888888, + "pairwise_f1": 0.6178586542530682 + }, + "text2img": { + "accuracy": 0.9308333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 83, + 1117 + ] + ], + "n": 1200, + "detection_rate": 0.9308333333333333, + "pairwise_auc": 0.9018909722222223, + "pairwise_f1": 0.8123636363636364 + }, + "wiki": { + "accuracy": 0.6391666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 767, + 433 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.36083333333333334 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.70375, + "auc_roc": 0.7756361111111111, + "f1": 0.7217221135029355, + "confusion_matrix": [ + [ + 767, + 433 + ], + [ + 278, + 922 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.62375, + "auc_roc": 0.6721888888888888, + "f1": 0.6178586542530682, + "confusion_matrix": [ + [ + 767, + 433 + ], + [ + 470, + 730 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.785, + "auc_roc": 0.9018909722222223, + "f1": 0.8123636363636364, + "confusion_matrix": [ + [ + 767, + 433 + ], + [ + 83, + 1117 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33760875711838406, + 0.3252779006406113, + 0.3165019385240696, + 0.3092121518596455, + 0.3049778057469262, + 0.2998945628051405, + 0.29731951521502603, + 0.29386990506339955, + 0.29232703047218145, + 0.2887964825387354, + 0.2876195503605737, + 0.28566389881350374, + 0.28347732430254974, + 0.2826591684586472, + 0.2824135138204804 + ], + "train_acc": [ + 0.6177083333333333, + 0.6137152777777778, + 0.6525462962962963, + 0.6725694444444444, + 0.6802662037037037, + 0.6881944444444444, + 0.6966435185185185, + 0.6964699074074074, + 0.7039930555555556, + 0.7054398148148148, + 0.7090856481481481, + 0.7141782407407408, + 0.7175925925925926, + 0.7179976851851851, + 0.7211805555555556 + ], + "train_auc": [ + 0.6101162676468908, + 0.6684934502886374, + 0.6999834872970965, + 0.7212176443901462, + 0.7332868048411065, + 0.744878418638546, + 0.7520929248113855, + 0.758775720164609, + 0.7620998996199132, + 0.769980593778578, + 0.77223479652492, + 0.7768239258259031, + 0.7817781653663693, + 0.7825633805584133, + 0.784729938271605 + ], + "train_f1": [ + 0.7213833825390131, + 0.7018092472637927, + 0.7402440079605433, + 0.7574172526153319, + 0.7640199888950583, + 0.7699009224461907, + 0.7778248707298465, + 0.7763040047767304, + 0.7829776401204973, + 0.7836252338037749, + 0.7869102623881989, + 0.7913833157338965, + 0.7937447168216399, + 0.7938227205415697, + 0.7965199763493538 + ], + "val_loss": [ + 0.3363726057112217, + 0.3378799984852473, + 0.3096979019542535, + 0.31894261625905834, + 0.3030717228849729, + 0.2983466026683648, + 0.2919511067370574, + 0.29252975930770236, + 0.29202877171337605, + 0.29389017770687736, + 0.28525492958724497, + 0.28568517888585726, + 0.2854819449285666, + 0.2812471378594637, + 0.2815050051858028 + ], + "val_acc": [ + 0.653125, + 0.4359375, + 0.6588541666666666, + 0.7401041666666667, + 0.678125, + 0.6567708333333333, + 0.6880208333333333, + 0.68125, + 0.6692708333333334, + 0.7494791666666667, + 0.7083333333333334, + 0.7177083333333333, + 0.7328125, + 0.70625, + 0.7072916666666667 + ], + "val_auc": [ + 0.6254260706018518, + 0.6773155381944445, + 0.7258810763888889, + 0.7318865740740741, + 0.74162109375, + 0.7514633969907408, + 0.7647808159722222, + 0.7585409432870371, + 0.763015769675926, + 0.7770023148148149, + 0.7782219328703703, + 0.7799363425925926, + 0.7832139756944445, + 0.7895507812499999, + 0.7889966724537036 + ], + "val_f1": [ + 0.7611190817790531, + 0.41805480924234284, + 0.7399761810242159, + 0.8298670303443573, + 0.7595330739299611, + 0.7333063537029543, + 0.7695267410542517, + 0.7609375, + 0.7448774608276416, + 0.8299752562743019, + 0.7851112816577129, + 0.7946969696969697, + 0.8089385474860336, + 0.7815646785437645, + 0.7816627816627817 + ] + }, + "test_metrics": { + "accuracy": 0.705, + "auc_roc": 0.7808999999999999, + "f1": 0.7822878228782287, + "confusion_matrix": [ + [ + 840, + 360 + ], + [ + 1056, + 2544 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7025, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 357, + 843 + ] + ], + "n": 1200, + "detection_rate": 0.7025, + "pairwise_auc": 0.7759916666666666, + "pairwise_f1": 0.7016229712858927 + }, + "insight": { + "accuracy": 0.5258333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 569, + 631 + ] + ], + "n": 1200, + "detection_rate": 0.5258333333333334, + "pairwise_auc": 0.6704972222222223, + "pairwise_f1": 0.5759926973984482 + }, + "text2img": { + "accuracy": 0.8916666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 130, + 1070 + ] + ], + "n": 1200, + "detection_rate": 0.8916666666666667, + "pairwise_auc": 0.8962111111111111, + "pairwise_f1": 0.8136882129277566 + }, + "wiki": { + "accuracy": 0.7, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 840, + 360 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.30000000000000004 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.70125, + "auc_roc": 0.7759916666666666, + "f1": 0.7016229712858927, + "confusion_matrix": [ + [ + 840, + 360 + ], + [ + 357, + 843 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6129166666666667, + "auc_roc": 0.6704972222222223, + "f1": 0.5759926973984482, + "confusion_matrix": [ + [ + 840, + 360 + ], + [ + 569, + 631 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7958333333333333, + "auc_roc": 0.8962111111111111, + "f1": 0.8136882129277566, + "confusion_matrix": [ + [ + 840, + 360 + ], + [ + 130, + 1070 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.337249244970304, + 0.3231975228698165, + 0.3162407441271676, + 0.3098954699933529, + 0.3045088079792482, + 0.3009769942473482, + 0.2982580928062951, + 0.2956829953248854, + 0.2917421202140826, + 0.289607167299147, + 0.28779197931289674, + 0.2860722848110729, + 0.283746699133405, + 0.2829609624765537, + 0.28213512640860344 + ], + "train_acc": [ + 0.6210069444444445, + 0.6277199074074075, + 0.6588541666666666, + 0.6659143518518519, + 0.679224537037037, + 0.6872106481481481, + 0.6920138888888889, + 0.7013888888888888, + 0.7019675925925926, + 0.705324074074074, + 0.7071180555555555, + 0.7070023148148148, + 0.7111689814814814, + 0.7159722222222222, + 0.7142939814814815 + ], + "train_auc": [ + 0.6118265514260401, + 0.6770355545553269, + 0.7007076885430956, + 0.718720037794353, + 0.7336591042238225, + 0.7417046574931413, + 0.7478195105309785, + 0.7550400448673984, + 0.7627371077674897, + 0.7689429012345679, + 0.7719962330675583, + 0.7752038144433013, + 0.7806546585648149, + 0.7826317890517833, + 0.784322041466621 + ], + "train_f1": [ + 0.7278620403075006, + 0.7161202065222187, + 0.7460912262566223, + 0.7510242808470263, + 0.7623376066543756, + 0.7697942842540142, + 0.7732617586912065, + 0.7820209530246705, + 0.7807577692635164, + 0.7833744575852973, + 0.7851411589895988, + 0.7838818457335553, + 0.7874632713026445, + 0.7914861075707367, + 0.7899774535244821 + ], + "val_loss": [ + 0.33459077452619873, + 0.32016269663969676, + 0.3262031281987826, + 0.3102393079549074, + 0.30205371951063475, + 0.3108681572601199, + 0.29243061194817227, + 0.28962724208831786, + 0.2867765666296085, + 0.2884744721154372, + 0.2863101380566756, + 0.2838819123804569, + 0.28191759139299394, + 0.2815056130290031, + 0.28172559129695096 + ], + "val_acc": [ + 0.6703125, + 0.5515625, + 0.4947916666666667, + 0.5807291666666666, + 0.6942708333333333, + 0.7411458333333333, + 0.7192708333333333, + 0.7166666666666667, + 0.6927083333333334, + 0.6557291666666667, + 0.7307291666666667, + 0.7213541666666666, + 0.7005208333333334, + 0.7057291666666666, + 0.7255208333333333 + ], + "val_auc": [ + 0.6358760127314814, + 0.7006792534722223, + 0.7046932870370369, + 0.724578269675926, + 0.7411559606481481, + 0.7500896990740742, + 0.764841579861111, + 0.7726793981481481, + 0.7762543402777777, + 0.7723213252314814, + 0.780099826388889, + 0.781705005787037, + 0.7850535300925926, + 0.7859136284722223, + 0.7871607349537038 + ], + "val_f1": [ + 0.7757704569606801, + 0.6070287539936102, + 0.5174129353233831, + 0.6442775077330977, + 0.778406946017365, + 0.8296194720603359, + 0.7998514667656889, + 0.7970149253731343, + 0.7698907956318253, + 0.7278715520790449, + 0.8081632653061225, + 0.7990987607960947, + 0.7758284600389863, + 0.7783444488034523, + 0.8004543733434305 + ] + }, + "test_metrics": { + "accuracy": 0.7135416666666666, + "auc_roc": 0.7887782407407407, + "f1": 0.789336601807875, + "confusion_matrix": [ + [ + 849, + 351 + ], + [ + 1024, + 2576 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.6983333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 362, + 838 + ] + ], + "n": 1200, + "detection_rate": 0.6983333333333334, + "pairwise_auc": 0.7784409722222221, + "pairwise_f1": 0.7015487651737129 + }, + "insight": { + "accuracy": 0.5433333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 548, + 652 + ] + ], + "n": 1200, + "detection_rate": 0.5433333333333333, + "pairwise_auc": 0.684811111111111, + "pairwise_f1": 0.5919201089423514 + }, + "text2img": { + "accuracy": 0.905, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 114, + 1086 + ] + ], + "n": 1200, + "detection_rate": 0.905, + "pairwise_auc": 0.9030826388888888, + "pairwise_f1": 0.8236632536973834 + }, + "wiki": { + "accuracy": 0.7075, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 849, + 351 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.2925 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.7029166666666666, + "auc_roc": 0.7784409722222221, + "f1": 0.7015487651737129, + "confusion_matrix": [ + [ + 849, + 351 + ], + [ + 362, + 838 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6254166666666666, + "auc_roc": 0.684811111111111, + "f1": 0.5919201089423514, + "confusion_matrix": [ + [ + 849, + 351 + ], + [ + 548, + 652 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.80625, + "auc_roc": 0.9030826388888888, + "f1": 0.8236632536973834, + "confusion_matrix": [ + [ + 849, + 351 + ], + [ + 114, + 1086 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3349949306359998, + 0.31961298769822827, + 0.31237826744715375, + 0.3078970141708851, + 0.3034489966377064, + 0.2992694077116472, + 0.29664888122567423, + 0.2930278959925528, + 0.2906754905150996, + 0.28718290196524726, + 0.2851601732273897, + 0.28338901698589325, + 0.28234123562773067, + 0.28149240149392024, + 0.2816910716118636 + ], + "train_acc": [ + 0.5421296296296296, + 0.6496527777777777, + 0.6669560185185185, + 0.6735532407407407, + 0.688599537037037, + 0.6902777777777778, + 0.6973379629629629, + 0.6978009259259259, + 0.7050347222222222, + 0.7114583333333333, + 0.7095486111111111, + 0.7146412037037037, + 0.7168981481481481, + 0.7177662037037037, + 0.7159143518518518 + ], + "train_auc": [ + 0.6268254708219021, + 0.6920522905235482, + 0.714971305941358, + 0.7269601444615912, + 0.737362968678555, + 0.74836602830647, + 0.7535375050011431, + 0.7597683220450389, + 0.7662498839020349, + 0.7734178526520348, + 0.7777794745941929, + 0.7818110121599222, + 0.7836261324016918, + 0.785270436814129, + 0.7840435492398263 + ], + "train_f1": [ + 0.6191026381667629, + 0.7376040221914009, + 0.7521212904337339, + 0.7572406076515902, + 0.7714006542333999, + 0.7713406818764419, + 0.7777305567360816, + 0.7778628551982304, + 0.7835944465673162, + 0.7890148950575491, + 0.7868156139829249, + 0.7906423810130344, + 0.7927294297093467, + 0.7934262357575501, + 0.7924927082893013 + ], + "val_loss": [ + 0.3254904938240846, + 0.3161329500377178, + 0.30968490516146024, + 0.30678483756879965, + 0.29996535430351895, + 0.30060380374391876, + 0.29814002898832165, + 0.29347484682997066, + 0.2959611816952626, + 0.2879257820546627, + 0.2859937395900488, + 0.2879338406026363, + 0.2857776733736197, + 0.2840757895261049, + 0.28381205859283604 + ], + "val_acc": [ + 0.6151041666666667, + 0.6411458333333333, + 0.6286458333333333, + 0.7083333333333334, + 0.6864583333333333, + 0.7125, + 0.7328125, + 0.6666666666666666, + 0.7536458333333333, + 0.675, + 0.7005208333333334, + 0.6484375, + 0.7255208333333333, + 0.7203125, + 0.7083333333333334 + ], + "val_auc": [ + 0.6711762152777778, + 0.7036581307870371, + 0.7218287037037037, + 0.7360532407407407, + 0.7445963541666667, + 0.7471513310185186, + 0.7560235821759259, + 0.760847800925926, + 0.7678522858796296, + 0.7703587962962963, + 0.7753284143518518, + 0.7758318865740741, + 0.7774797453703703, + 0.7787594039351852, + 0.7794943576388889 + ], + "val_f1": [ + 0.7002028397565923, + 0.7229593888218737, + 0.7037806398005816, + 0.7956204379562044, + 0.7691717791411042, + 0.7954040029651593, + 0.8144665461121158, + 0.7425583266291231, + 0.8329212292476157, + 0.7527733755942948, + 0.7768723321691889, + 0.7193347193347194, + 0.8028432472876917, + 0.7969754253308129, + 0.7856049004594181 + ] + }, + "test_metrics": { + "accuracy": 0.7179166666666666, + "auc_roc": 0.7921833333333332, + "f1": 0.7917563826514918, + "confusion_matrix": [ + [ + 872, + 328 + ], + [ + 1026, + 2574 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.685, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 378, + 822 + ] + ], + "n": 1200, + "detection_rate": 0.685, + "pairwise_auc": 0.77563125, + "pairwise_f1": 0.6995744680851064 + }, + "insight": { + "accuracy": 0.5508333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 539, + 661 + ] + ], + "n": 1200, + "detection_rate": 0.5508333333333333, + "pairwise_auc": 0.6917645833333333, + "pairwise_f1": 0.6039287345820009 + }, + "text2img": { + "accuracy": 0.9091666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 109, + 1091 + ] + ], + "n": 1200, + "detection_rate": 0.9091666666666667, + "pairwise_auc": 0.9091541666666666, + "pairwise_f1": 0.8331424207712868 + }, + "wiki": { + "accuracy": 0.7266666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 872, + 328 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.2733333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.7058333333333333, + "auc_roc": 0.77563125, + "f1": 0.6995744680851064, + "confusion_matrix": [ + [ + 872, + 328 + ], + [ + 378, + 822 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.63875, + "auc_roc": 0.6917645833333333, + "f1": 0.6039287345820009, + "confusion_matrix": [ + [ + 872, + 328 + ], + [ + 539, + 661 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8179166666666666, + "auc_roc": 0.9091541666666666, + "f1": 0.8331424207712868, + "confusion_matrix": [ + [ + 872, + 328 + ], + [ + 109, + 1091 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.7228333333333332, + "std": 0.015422718406803539, + "ci_95": 0.013518608728136386, + "values": [ + 0.7410416666666667, + 0.7366666666666667, + 0.705, + 0.7135416666666666, + 0.7179166666666666 + ] + }, + "auc_roc": { + "mean": 0.7853327083333331, + "std": 0.004925572842552334, + "ci_95": 0.004317454955996964, + "values": [ + 0.7815633101851851, + 0.7832386574074073, + 0.7808999999999999, + 0.7887782407407407, + 0.7921833333333332 + ] + }, + "f1": { + "mean": 0.7993400995865574, + "std": 0.016284085750928216, + "ci_95": 0.014273630494680391, + "values": [ + 0.8191473883311509, + 0.81417230226404, + 0.7822878228782287, + 0.789336601807875, + 0.7917563826514918 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.7285, + "std": 0.0464892460683113, + "ci_95": 0.04074962085713188, + "values": [ + 0.7883333333333333, + 0.7683333333333333, + 0.7025, + 0.6983333333333334, + 0.685 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.7285, + "std": 0.0464892460683113, + "ci_95": 0.04074962085713188, + "values": [ + 0.7883333333333333, + 0.7683333333333333, + 0.7025, + 0.6983333333333334, + 0.685 + ] + }, + "pairwise_auc": { + "mean": 0.7779806944444444, + "std": 0.0036711037376233246, + "ci_95": 0.003217864303833488, + "values": [ + 0.7842034722222222, + 0.7756361111111111, + 0.7759916666666666, + 0.7784409722222221, + 0.77563125 + ] + }, + "pairwise_f1": { + "mean": 0.7102085637631392, + "std": 0.012866567358310677, + "ci_95": 0.011278043546102925, + "values": [ + 0.7265745007680492, + 0.7217221135029355, + 0.7016229712858927, + 0.7015487651737129, + 0.6995744680851064 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.5706666666666667, + "std": 0.043362970634196876, + "ci_95": 0.03800931961740142, + "values": [ + 0.625, + 0.6083333333333333, + 0.5258333333333334, + 0.5433333333333333, + 0.5508333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.5706666666666667, + "std": 0.043362970634196876, + "ci_95": 0.03800931961740142, + "values": [ + 0.625, + 0.6083333333333333, + 0.5258333333333334, + 0.5433333333333333, + 0.5508333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.6780295833333333, + "std": 0.009702045079414632, + "ci_95": 0.008504217468788676, + "values": [ + 0.6708861111111111, + 0.6721888888888888, + 0.6704972222222223, + 0.684811111111111, + 0.6917645833333333 + ] + }, + "pairwise_f1": { + "mean": 0.6025247566431472, + "std": 0.019167815557193456, + "ci_95": 0.01680133111789653, + "values": [ + 0.6229235880398671, + 0.6178586542530682, + 0.5759926973984482, + 0.5919201089423514, + 0.6039287345820009 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9138333333333334, + "std": 0.017525774669833474, + "ci_95": 0.0153620188198759, + "values": [ + 0.9325, + 0.9308333333333333, + 0.8916666666666667, + 0.905, + 0.9091666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9138333333333334, + "std": 0.017525774669833474, + "ci_95": 0.0153620188198759, + "values": [ + 0.9325, + 0.9308333333333333, + 0.8916666666666667, + 0.905, + 0.9091666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.8999878472222221, + "std": 0.007405824131828075, + "ci_95": 0.0064914910657649675, + "values": [ + 0.889600347222222, + 0.9018909722222223, + 0.8962111111111111, + 0.9030826388888888, + 0.9091541666666666 + ] + }, + "pairwise_f1": { + "mean": 0.8177526354686133, + "std": 0.010696877128988288, + "ci_95": 0.009376226207693192, + "values": [ + 0.8059056535830033, + 0.8123636363636364, + 0.8136882129277566, + 0.8236632536973834, + 0.8331424207712868 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.6783333333333333, + "std": 0.0468782406287229, + "ci_95": 0.041090589622875415, + "values": [ + 0.6183333333333333, + 0.6391666666666667, + 0.7, + 0.7075, + 0.7266666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.32166666666666666, + "std": 0.0468782406287229, + "ci_95": 0.041090589622875415, + "values": [ + 0.3816666666666667, + 0.36083333333333334, + 0.30000000000000004, + 0.2925, + 0.2733333333333333 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.7034166666666668, + "std": 0.001650967729679893, + "ci_95": 0.001447137020311313, + "values": [ + 0.7033333333333334, + 0.70375, + 0.70125, + 0.7029166666666666, + 0.7058333333333333 + ] + }, + "auc_roc": { + "mean": 0.7779806944444444, + "std": 0.0036711037376233246, + "ci_95": 0.003217864303833488, + "values": [ + 0.7842034722222222, + 0.7756361111111111, + 0.7759916666666666, + 0.7784409722222221, + 0.77563125 + ] + }, + "f1": { + "mean": 0.7102085637631392, + "std": 0.012866567358310677, + "ci_95": 0.011278043546102925, + "values": [ + 0.7265745007680492, + 0.7217221135029355, + 0.7016229712858927, + 0.7015487651737129, + 0.6995744680851064 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.6245, + "std": 0.00930949336251264, + "ci_95": 0.00816013071790725, + "values": [ + 0.6216666666666667, + 0.62375, + 0.6129166666666667, + 0.6254166666666666, + 0.63875 + ] + }, + "auc_roc": { + "mean": 0.6780295833333333, + "std": 0.009702045079414632, + "ci_95": 0.008504217468788676, + "values": [ + 0.6708861111111111, + 0.6721888888888888, + 0.6704972222222223, + 0.684811111111111, + 0.6917645833333333 + ] + }, + "f1": { + "mean": 0.6025247566431472, + "std": 0.019167815557193456, + "ci_95": 0.01680133111789653, + "values": [ + 0.6229235880398671, + 0.6178586542530682, + 0.5759926973984482, + 0.5919201089423514, + 0.6039287345820009 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.7960833333333334, + "std": 0.01680825276794441, + "ci_95": 0.014733083142672097, + "values": [ + 0.7754166666666666, + 0.785, + 0.7958333333333333, + 0.80625, + 0.8179166666666666 + ] + }, + "auc_roc": { + "mean": 0.8999878472222221, + "std": 0.007405824131828075, + "ci_95": 0.0064914910657649675, + "values": [ + 0.889600347222222, + 0.9018909722222223, + 0.8962111111111111, + 0.9030826388888888, + 0.9091541666666666 + ] + }, + "f1": { + "mean": 0.8177526354686133, + "std": 0.010696877128988288, + "ci_95": 0.009376226207693192, + "values": [ + 0.8059056535830033, + 0.8123636363636364, + 0.8136882129277566, + 0.8236632536973834, + 0.8331424207712868 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2b_simplecnn_224", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2c_resnet18_facecrop.json b/classifier/outputs/logs/p2c_resnet18_facecrop.json new file mode 100644 index 0000000..ca94206 --- /dev/null +++ b/classifier/outputs/logs/p2c_resnet18_facecrop.json @@ -0,0 +1,1825 @@ +{ + "run_name": "p2c_resnet18_facecrop", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.17837155399913038, + 0.08668091422188337, + 0.0546142068553578, + 0.03263587815400334, + 0.02188914533574091, + 0.014589451567802893, + 0.010271098692693933, + 0.005492528760285293, + 0.003907029165670624 + ], + "train_acc": [ + 0.8384259259259259, + 0.9328703703703703, + 0.9629050925925926, + 0.978125, + 0.9860532407407407, + 0.9905092592592593, + 0.993287037037037, + 0.9966435185185185, + 0.9976273148148148 + ], + "train_auc": [ + 0.9217162583590535, + 0.9819768268461362, + 0.9930535283064701, + 0.9974078539380431, + 0.9986854495313215, + 0.9994491598079562, + 0.9997198020261775, + 0.9999142393261315, + 0.9999583297610881 + ], + "train_f1": [ + 0.8861151900799478, + 0.9544275948770331, + 0.9750767914771181, + 0.9853477013721994, + 0.990675900491353, + 0.993663060278207, + 0.9955191594561187, + 0.9977602718566574, + 0.9984174161423554 + ], + "val_loss": [ + 0.1934239829541184, + 0.12679014972721536, + 0.2147138158713157, + 0.1646016168097655, + 0.2376978426298592, + 0.27776295945125945, + 0.2169565581716597, + 0.21303040613371801, + 0.3133047623368384 + ], + "val_acc": [ + 0.8963541666666667, + 0.8916666666666667, + 0.9078125, + 0.8703125, + 0.9078125, + 0.9322916666666666, + 0.9, + 0.915625, + 0.9161458333333333 + ], + "val_auc": [ + 0.9533991608796296, + 0.9686168981481482, + 0.9572663483796297, + 0.9695334201388889, + 0.9551953125000001, + 0.9656134259259259, + 0.9623220486111113, + 0.9663310185185185, + 0.961710792824074 + ], + "val_f1": [ + 0.9332438778933244, + 0.9238095238095239, + 0.9389444636081408, + 0.9069158878504673, + 0.9376979936642027, + 0.95578231292517, + 0.9310344827586207, + 0.9426751592356688, + 0.945293917770982 + ] + }, + "test_metrics": { + "accuracy": 0.8820833333333333, + "auc_roc": 0.9671379629629631, + "f1": 0.9163958641063515, + "confusion_matrix": [ + [ + 1132, + 68 + ], + [ + 498, + 3102 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.8725, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 153, + 1047 + ] + ], + "n": 1200, + "detection_rate": 0.8725, + "pairwise_auc": 0.9701722222222223, + "pairwise_f1": 0.9045356371490281 + }, + "insight": { + "accuracy": 0.8133333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 224, + 976 + ] + ], + "n": 1200, + "detection_rate": 0.8133333333333334, + "pairwise_auc": 0.9557256944444444, + "pairwise_f1": 0.8698752228163993 + }, + "text2img": { + "accuracy": 0.8991666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 121, + 1079 + ] + ], + "n": 1200, + "detection_rate": 0.8991666666666667, + "pairwise_auc": 0.9755159722222223, + "pairwise_f1": 0.9194716659565403 + }, + "wiki": { + "accuracy": 0.9433333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1132, + 68 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.05666666666666664 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9079166666666667, + "auc_roc": 0.9701722222222223, + "f1": 0.9045356371490281, + "confusion_matrix": [ + [ + 1132, + 68 + ], + [ + 153, + 1047 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8783333333333333, + "auc_roc": 0.9557256944444444, + "f1": 0.8698752228163993, + "confusion_matrix": [ + [ + 1132, + 68 + ], + [ + 224, + 976 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.92125, + "auc_roc": 0.9755159722222223, + "f1": 0.9194716659565403, + "confusion_matrix": [ + [ + 1132, + 68 + ], + [ + 121, + 1079 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.18338548640410104, + 0.09500005689656568, + 0.049826115153566074, + 0.032816237530291635, + 0.023678185039278578, + 0.01637398871323428, + 0.006945904467411984, + 0.005075414012214794, + 0.002860962831949076, + 0.002268254530042247, + 0.0012579843788949397, + 0.0007868543442273053, + 0.00047980366241602123, + 0.0002720828362182854, + 0.00017770904728990898 + ], + "train_acc": [ + 0.8342592592592593, + 0.9265625, + 0.9642939814814815, + 0.9791666666666666, + 0.9860532407407407, + 0.9905671296296297, + 0.9956018518518519, + 0.9971643518518518, + 0.9984953703703704, + 0.9987268518518518, + 0.9991898148148148, + 0.9995949074074074, + 0.9997685185185186, + 0.9997685185185186, + 0.9999421296296296 + ], + "train_auc": [ + 0.9170149248399634, + 0.9785777016889574, + 0.9938715099165523, + 0.9971903131430041, + 0.9987185285208048, + 0.9991661576217421, + 0.9998722029320988, + 0.9999299750657293, + 0.9999720114597623, + 0.9999805848479653, + 0.9999880061871285, + 0.9999861575502973, + 0.9999997499428441, + 0.9999999821387745, + 1.0 + ], + "train_f1": [ + 0.8831783325175395, + 0.9500374030473641, + 0.9760043557733442, + 0.9860443479609242, + 0.9906859903381643, + 0.9936965853281257, + 0.9970645036693704, + 0.9981083272207852, + 0.9989966038900895, + 0.9991509069857198, + 0.9994596680818217, + 0.999729886166313, + 0.9998456671039432, + 0.9998456551937027, + 0.9999614182645935 + ], + "val_loss": [ + 0.13804995346193513, + 0.14105506432242693, + 0.24023572888545458, + 0.24679516345046673, + 0.30523416100265116, + 0.2427545978020741, + 0.27079731949027824, + 0.2104506594655201, + 0.24569564278463077, + 0.24668190908752574, + 0.2807497647115724, + 0.22402887331851767, + 0.2197469462816419, + 0.27753435674994764, + 0.23912918382134193 + ], + "val_acc": [ + 0.8875, + 0.878125, + 0.9145833333333333, + 0.9229166666666667, + 0.909375, + 0.9213541666666667, + 0.9276041666666667, + 0.921875, + 0.928125, + 0.9296875, + 0.9364583333333333, + 0.9354166666666667, + 0.9354166666666667, + 0.9338541666666667, + 0.9333333333333333 + ], + "val_auc": [ + 0.9551634837962963, + 0.9617997685185184, + 0.9630063657407408, + 0.9639098668981482, + 0.9572178819444445, + 0.9632414641203704, + 0.962427662037037, + 0.9640386284722222, + 0.9617570891203704, + 0.9636588541666666, + 0.9629767071759259, + 0.9667259837962963, + 0.9689279513888889, + 0.9646035879629629, + 0.9685344328703704 + ], + "val_f1": [ + 0.9223021582733812, + 0.9142857142857143, + 0.9449294828744124, + 0.9497964721845319, + 0.9412162162162162, + 0.9480206540447504, + 0.952446117003079, + 0.9474421864050455, + 0.9522160664819944, + 0.9533033552404012, + 0.9583617747440273, + 0.9568545581071677, + 0.9569145239749827, + 0.9566108643662453, + 0.955831608005521 + ] + }, + "test_metrics": { + "accuracy": 0.935625, + "auc_roc": 0.9772581018518519, + "f1": 0.9567287494748634, + "confusion_matrix": [ + [ + 1075, + 125 + ], + [ + 184, + 3416 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9533333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 56, + 1144 + ] + ], + "n": 1200, + "detection_rate": 0.9533333333333334, + "pairwise_auc": 0.9800895833333334, + "pairwise_f1": 0.9266909680032401 + }, + "insight": { + "accuracy": 0.9141666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 103, + 1097 + ] + ], + "n": 1200, + "detection_rate": 0.9141666666666667, + "pairwise_auc": 0.9639170138888888, + "pairwise_f1": 0.9058629232039637 + }, + "text2img": { + "accuracy": 0.9791666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 25, + 1175 + ] + ], + "n": 1200, + "detection_rate": 0.9791666666666666, + "pairwise_auc": 0.9877677083333334, + "pairwise_f1": 0.94 + }, + "wiki": { + "accuracy": 0.8958333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1075, + 125 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.10416666666666663 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9245833333333333, + "auc_roc": 0.9800895833333334, + "f1": 0.9266909680032401, + "confusion_matrix": [ + [ + 1075, + 125 + ], + [ + 56, + 1144 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.905, + "auc_roc": 0.9639170138888888, + "f1": 0.9058629232039637, + "confusion_matrix": [ + [ + 1075, + 125 + ], + [ + 103, + 1097 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9375, + "auc_roc": 0.9877677083333334, + "f1": 0.94, + "confusion_matrix": [ + [ + 1075, + 125 + ], + [ + 25, + 1175 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.18449091869923803, + 0.0947562804169677, + 0.05549010326784035, + 0.03444569395430369, + 0.021967991301506926, + 0.017280577368519852, + 0.008806067607841854, + 0.006068033144075107, + 0.003479789084553886, + 0.002117162245154018, + 0.0015876744302402216, + 0.0010428875206689218, + 0.0006022416273792798, + 0.0004376104135909979, + 0.0002453135835291202 + ], + "train_acc": [ + 0.8331597222222222, + 0.927488425925926, + 0.9630208333333333, + 0.979224537037037, + 0.987037037037037, + 0.9891203703703704, + 0.9944444444444445, + 0.996875, + 0.9983217592592593, + 0.9988425925925926, + 0.9991319444444444, + 0.9995949074074074, + 0.9996527777777777, + 0.9996527777777777, + 0.9998842592592593 + ], + "train_auc": [ + 0.9162129558184728, + 0.9787451417466849, + 0.9923763556670097, + 0.9969988765289209, + 0.9987452757058757, + 0.9993180048296754, + 0.9997558549096937, + 0.9998619773805442, + 0.9999464073931185, + 0.9999908550525834, + 0.9999901852566301, + 0.9999951863997484, + 0.9999995177469135, + 0.999999660636717, + 0.9999999464163237 + ], + "train_f1": [ + 0.8822736738944016, + 0.9508029369036868, + 0.9751506902586039, + 0.9860998180199017, + 0.9913359634872747, + 0.9927373870045585, + 0.9962894248608535, + 0.9979155408013588, + 0.9988807842229169, + 0.9992283950617284, + 0.9994211846420992, + 0.9997299070108423, + 0.9997684827905541, + 0.9997684827905541, + 0.9999228335519716 + ], + "val_loss": [ + 0.15567205057789882, + 0.22502039108076133, + 0.18017800888143634, + 0.17141791892548403, + 0.3055859423493227, + 0.22991524864191887, + 0.24873213853061316, + 0.3944835726075629, + 0.29895355441332566, + 0.2677129467035229, + 0.2291035176460203, + 0.2527602607322327, + 0.23705230109674932, + 0.2504274368242629, + 0.23202879253328015 + ], + "val_acc": [ + 0.8354166666666667, + 0.903125, + 0.9140625, + 0.9130208333333333, + 0.9213541666666667, + 0.9276041666666667, + 0.9244791666666666, + 0.9223958333333333, + 0.9286458333333333, + 0.9354166666666667, + 0.9296875, + 0.9291666666666667, + 0.9380208333333333, + 0.9348958333333334, + 0.9338541666666667 + ], + "val_auc": [ + 0.9571947337962964, + 0.9600159143518519, + 0.9605823206018519, + 0.9636848958333334, + 0.9598401331018518, + 0.9659837962962963, + 0.9580743634259259, + 0.9480541087962964, + 0.9602524594907409, + 0.962152777777778, + 0.9678660300925925, + 0.9663635706018517, + 0.9697511574074075, + 0.9698603877314815, + 0.9704333043981482 + ], + "val_f1": [ + 0.8791124713083397, + 0.9378342245989305, + 0.9428472462764115, + 0.9407591344448386, + 0.9489001692047377, + 0.9518864659051575, + 0.9497748527883616, + 0.9498822737975109, + 0.9530661185337445, + 0.9572118702553485, + 0.9528466643381068, + 0.9528759528759528, + 0.9587521663778162, + 0.9569114098586694, + 0.955887460923932 + ] + }, + "test_metrics": { + "accuracy": 0.9397916666666667, + "auc_roc": 0.97951875, + "f1": 0.9597885070265758, + "confusion_matrix": [ + [ + 1062, + 138 + ], + [ + 151, + 3449 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9558333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 53, + 1147 + ] + ], + "n": 1200, + "detection_rate": 0.9558333333333333, + "pairwise_auc": 0.9794552083333333, + "pairwise_f1": 0.9231388329979879 + }, + "insight": { + "accuracy": 0.9316666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 82, + 1118 + ] + ], + "n": 1200, + "detection_rate": 0.9316666666666666, + "pairwise_auc": 0.9698652777777778, + "pairwise_f1": 0.9104234527687296 + }, + "text2img": { + "accuracy": 0.9866666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 16, + 1184 + ] + ], + "n": 1200, + "detection_rate": 0.9866666666666667, + "pairwise_auc": 0.9892357638888889, + "pairwise_f1": 0.9389373513084853 + }, + "wiki": { + "accuracy": 0.885, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1062, + 138 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.11499999999999999 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9204166666666667, + "auc_roc": 0.9794552083333333, + "f1": 0.9231388329979879, + "confusion_matrix": [ + [ + 1062, + 138 + ], + [ + 53, + 1147 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.9083333333333333, + "auc_roc": 0.9698652777777778, + "f1": 0.9104234527687296, + "confusion_matrix": [ + [ + 1062, + 138 + ], + [ + 82, + 1118 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9358333333333333, + "auc_roc": 0.9892357638888889, + "f1": 0.9389373513084853, + "confusion_matrix": [ + [ + 1062, + 138 + ], + [ + 16, + 1184 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.18040739428941852, + 0.09424030264598075, + 0.05525035115252076, + 0.034907107170309044, + 0.022619862975099953, + 0.01340910082003743, + 0.010180935443370093, + 0.0055470221014771864, + 0.002768729440802215, + 0.002049396960809544, + 0.001337939503389828, + 0.0008659062841493997, + 0.0006751639676066488, + 0.00042964793515533594, + 0.00046883536936756325 + ], + "train_acc": [ + 0.833275462962963, + 0.9280671296296297, + 0.9619791666666667, + 0.9779513888888889, + 0.985474537037037, + 0.9920717592592593, + 0.9938078703703703, + 0.9963541666666667, + 0.9986111111111111, + 0.9986689814814815, + 0.9990162037037037, + 0.9994791666666667, + 0.9994791666666667, + 0.9997106481481481, + 0.9997685185185186 + ], + "train_auc": [ + 0.91943718385631, + 0.9789158325474394, + 0.9926320926926154, + 0.9968068147719478, + 0.9986295885488111, + 0.9995704285979653, + 0.9997140685728166, + 0.9999351101680385, + 0.9999771465620714, + 0.999991891003658, + 0.9999962491426612, + 0.9999981870856196, + 0.9999991962448559, + 0.9999993927183357, + 0.9999997499428441 + ], + "train_f1": [ + 0.8823649503899391, + 0.9511072650749322, + 0.9744128987031195, + 0.9852296956774569, + 0.9902867536086065, + 0.9947094033597219, + 0.9958682472873306, + 0.997568037058483, + 0.999073859689743, + 0.9991124831178854, + 0.999343958630803, + 0.9996526839810134, + 0.9996526839810134, + 0.999807076436316, + 0.9998456671039432 + ], + "val_loss": [ + 0.21914336445527927, + 0.1180105590261519, + 0.12904602941562188, + 0.21052778689772822, + 0.19554610329505523, + 0.2098548914532633, + 0.17912958193095013, + 0.17403326073253994, + 0.2349371372461974, + 0.20780743270111088, + 0.20033595547926478, + 0.20275330990283086, + 0.21412657996277934, + 0.235956841945017, + 0.195170938181991 + ], + "val_acc": [ + 0.89375, + 0.890625, + 0.934375, + 0.9239583333333333, + 0.9078125, + 0.9296875, + 0.9234375, + 0.9203125, + 0.9364583333333333, + 0.9338541666666667, + 0.9354166666666667, + 0.9442708333333333, + 0.9453125, + 0.9458333333333333, + 0.9432291666666667 + ], + "val_auc": [ + 0.9470363136574076, + 0.9694111689814815, + 0.9738946759259259, + 0.9649153645833335, + 0.9622243923611111, + 0.9698372395833333, + 0.9699992766203704, + 0.9741485821759259, + 0.9701287615740742, + 0.9721715856481482, + 0.9744010416666666, + 0.9743612557870371, + 0.973978587962963, + 0.9704369212962962, + 0.9756647858796296 + ], + "val_f1": [ + 0.9308943089430894, + 0.923469387755102, + 0.9561586638830898, + 0.95, + 0.9375220614189904, + 0.9535922997593674, + 0.9475935828877006, + 0.9455322178711285, + 0.9582191780821918, + 0.9561917902725078, + 0.9571230982019364, + 0.963116166839021, + 0.964004113815564, + 0.9644079397672827, + 0.9623748705557473 + ] + }, + "test_metrics": { + "accuracy": 0.935625, + "auc_roc": 0.976428587962963, + "f1": 0.9571488004437665, + "confusion_matrix": [ + [ + 1040, + 160 + ], + [ + 149, + 3451 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9566666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 52, + 1148 + ] + ], + "n": 1200, + "detection_rate": 0.9566666666666667, + "pairwise_auc": 0.9758034722222221, + "pairwise_f1": 0.9154704944178629 + }, + "insight": { + "accuracy": 0.9375, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 75, + 1125 + ] + ], + "n": 1200, + "detection_rate": 0.9375, + "pairwise_auc": 0.9677885416666665, + "pairwise_f1": 0.9054325955734407 + }, + "text2img": { + "accuracy": 0.9816666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 22, + 1178 + ] + ], + "n": 1200, + "detection_rate": 0.9816666666666667, + "pairwise_auc": 0.9856937499999999, + "pairwise_f1": 0.9282899921197794 + }, + "wiki": { + "accuracy": 0.8666666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1040, + 160 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.1333333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9116666666666666, + "auc_roc": 0.9758034722222221, + "f1": 0.9154704944178629, + "confusion_matrix": [ + [ + 1040, + 160 + ], + [ + 52, + 1148 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.9020833333333333, + "auc_roc": 0.9677885416666665, + "f1": 0.9054325955734407, + "confusion_matrix": [ + [ + 1040, + 160 + ], + [ + 75, + 1125 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9241666666666667, + "auc_roc": 0.9856937499999999, + "f1": 0.9282899921197794, + "confusion_matrix": [ + [ + 1040, + 160 + ], + [ + 22, + 1178 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.18253784660525896, + 0.09274266364087386, + 0.05595069021952373, + 0.03206709125089994, + 0.02324649474740673, + 0.014556840769526187, + 0.01106473373061413, + 0.004897450984359845, + 0.003464228110739761, + 0.002620224120977144, + 0.0018979384601990473, + 0.0008156847854357457, + 0.00021853346456989837 + ], + "train_acc": [ + 0.8367476851851852, + 0.9315972222222222, + 0.9619212962962963, + 0.9785879629629629, + 0.9843171296296296, + 0.9912615740740741, + 0.9939236111111112, + 0.9967592592592592, + 0.9979166666666667, + 0.9983217592592593, + 0.9987847222222223, + 0.9996527777777777, + 0.9999421296296296 + ], + "train_auc": [ + 0.9176218850022863, + 0.9793452164066073, + 0.9925894043638546, + 0.9974967492569731, + 0.9987300758030406, + 0.9994492044610196, + 0.9995728309327847, + 0.9999372624457019, + 0.9999361639803384, + 0.9999688321616369, + 0.99998870277492, + 0.9999955882773205, + 0.9999999999999999 + ], + "train_f1": [ + 0.8852645707080977, + 0.953552342030808, + 0.9744307142302013, + 0.9856711331422818, + 0.9895119780177252, + 0.9941628976767559, + 0.9959442234153502, + 0.9978371697821721, + 0.9986101459346769, + 0.9988803521099572, + 0.9991895334028019, + 0.9997684827905541, + 0.9999614212414645 + ], + "val_loss": [ + 0.15742664287487665, + 0.1538327511632815, + 0.13509041668924812, + 0.17210772472899408, + 0.14238195471310366, + 0.218546209925474, + 0.15706069137292314, + 0.16238107544680436, + 0.19468771822976122, + 0.19298389890391263, + 0.17740953016473213, + 0.1736834918614477, + 0.16653081489178778 + ], + "val_acc": [ + 0.8625, + 0.9223958333333333, + 0.928125, + 0.9338541666666667, + 0.921875, + 0.9416666666666667, + 0.9380208333333333, + 0.9239583333333333, + 0.9359375, + 0.9291666666666667, + 0.9411458333333333, + 0.9416666666666667, + 0.9432291666666667 + ], + "val_auc": [ + 0.9497395833333334, + 0.9683940972222222, + 0.9745515046296296, + 0.9692657696759258, + 0.975681423611111, + 0.967502170138889, + 0.9756336805555554, + 0.9774450231481481, + 0.9750665509259259, + 0.969890769675926, + 0.9751280381944445, + 0.9750593171296298, + 0.977341579861111 + ], + "val_f1": [ + 0.9022222222222223, + 0.9490249743414301, + 0.9520500347463516, + 0.9558260869565217, + 0.9465431218816821, + 0.9616175462645647, + 0.9584352078239609, + 0.9478943611705924, + 0.9573952199515068, + 0.952112676056338, + 0.9605033205173017, + 0.9609756097560975, + 0.9619811649808162 + ] + }, + "test_metrics": { + "accuracy": 0.9183333333333333, + "auc_roc": 0.9770061342592594, + "f1": 0.9439519588218473, + "confusion_matrix": [ + [ + 1107, + 93 + ], + [ + 299, + 3301 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.91, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 108, + 1092 + ] + ], + "n": 1200, + "detection_rate": 0.91, + "pairwise_auc": 0.9738024305555557, + "pairwise_f1": 0.9157232704402516 + }, + "insight": { + "accuracy": 0.8808333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 143, + 1057 + ] + ], + "n": 1200, + "detection_rate": 0.8808333333333334, + "pairwise_auc": 0.9699489583333334, + "pairwise_f1": 0.8995744680851064 + }, + "text2img": { + "accuracy": 0.96, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 48, + 1152 + ] + ], + "n": 1200, + "detection_rate": 0.96, + "pairwise_auc": 0.9872670138888889, + "pairwise_f1": 0.9423312883435583 + }, + "wiki": { + "accuracy": 0.9225, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1107, + 93 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.07750000000000001 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.91625, + "auc_roc": 0.9738024305555557, + "f1": 0.9157232704402516, + "confusion_matrix": [ + [ + 1107, + 93 + ], + [ + 108, + 1092 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.9016666666666666, + "auc_roc": 0.9699489583333334, + "f1": 0.8995744680851064, + "confusion_matrix": [ + [ + 1107, + 93 + ], + [ + 143, + 1057 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.94125, + "auc_roc": 0.9872670138888889, + "f1": 0.9423312883435583, + "confusion_matrix": [ + [ + 1107, + 93 + ], + [ + 48, + 1152 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.9222916666666666, + "std": 0.02394882057522761, + "ci_95": 0.020992066788564585, + "values": [ + 0.8820833333333333, + 0.935625, + 0.9397916666666667, + 0.935625, + 0.9183333333333333 + ] + }, + "auc_roc": { + "mean": 0.9754699074074076, + "std": 0.004803413507184044, + "ci_95": 0.004210377577432845, + "values": [ + 0.9671379629629631, + 0.9772581018518519, + 0.97951875, + 0.976428587962963, + 0.9770061342592594 + ] + }, + "f1": { + "mean": 0.9468027759746809, + "std": 0.018075548615468288, + "ci_95": 0.015843916930437404, + "values": [ + 0.9163958641063515, + 0.9567287494748634, + 0.9597885070265758, + 0.9571488004437665, + 0.9439519588218473 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9296666666666666, + "std": 0.03751203510578673, + "ci_95": 0.03288074850459188, + "values": [ + 0.8725, + 0.9533333333333334, + 0.9558333333333333, + 0.9566666666666667, + 0.91 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9296666666666666, + "std": 0.03751203510578673, + "ci_95": 0.03288074850459188, + "values": [ + 0.8725, + 0.9533333333333334, + 0.9558333333333333, + 0.9566666666666667, + 0.91 + ] + }, + "pairwise_auc": { + "mean": 0.9758645833333335, + "std": 0.004104948128682983, + "ci_95": 0.003598145679459516, + "values": [ + 0.9701722222222223, + 0.9800895833333334, + 0.9794552083333333, + 0.9758034722222221, + 0.9738024305555557 + ] + }, + "pairwise_f1": { + "mean": 0.9171118406016742, + "std": 0.008527416912396574, + "ci_95": 0.007474610484331243, + "values": [ + 0.9045356371490281, + 0.9266909680032401, + 0.9231388329979879, + 0.9154704944178629, + 0.9157232704402516 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8955, + "std": 0.05095818002157367, + "ci_95": 0.04466681417886089, + "values": [ + 0.8133333333333334, + 0.9141666666666667, + 0.9316666666666666, + 0.9375, + 0.8808333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8955, + "std": 0.05095818002157367, + "ci_95": 0.04466681417886089, + "values": [ + 0.8133333333333334, + 0.9141666666666667, + 0.9316666666666666, + 0.9375, + 0.8808333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.9654490972222222, + "std": 0.005960625846888337, + "ci_95": 0.005224718916177153, + "values": [ + 0.9557256944444444, + 0.9639170138888888, + 0.9698652777777778, + 0.9677885416666665, + 0.9699489583333334 + ] + }, + "pairwise_f1": { + "mean": 0.8982337324895278, + "std": 0.016314258364705986, + "ci_95": 0.014300077956743039, + "values": [ + 0.8698752228163993, + 0.9058629232039637, + 0.9104234527687296, + 0.9054325955734407, + 0.8995744680851064 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9613333333333334, + "std": 0.03619296279051434, + "ci_95": 0.03172453064183053, + "values": [ + 0.8991666666666667, + 0.9791666666666666, + 0.9866666666666667, + 0.9816666666666667, + 0.96 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9613333333333334, + "std": 0.03619296279051434, + "ci_95": 0.03172453064183053, + "values": [ + 0.8991666666666667, + 0.9791666666666666, + 0.9866666666666667, + 0.9816666666666667, + 0.96 + ] + }, + "pairwise_auc": { + "mean": 0.9850960416666666, + "std": 0.005502799493364987, + "ci_95": 0.004823416423616481, + "values": [ + 0.9755159722222223, + 0.9877677083333334, + 0.9892357638888889, + 0.9856937499999999, + 0.9872670138888889 + ] + }, + "pairwise_f1": { + "mean": 0.9338060595456726, + "std": 0.009660181403295696, + "ci_95": 0.008467522338757406, + "values": [ + 0.9194716659565403, + 0.94, + 0.9389373513084853, + 0.9282899921197794, + 0.9423312883435583 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.9026666666666667, + "std": 0.030421803219255604, + "ci_95": 0.026665886238579968, + "values": [ + 0.9433333333333334, + 0.8958333333333334, + 0.885, + 0.8666666666666667, + 0.9225 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.09733333333333331, + "std": 0.030421803219255607, + "ci_95": 0.02666588623857997, + "values": [ + 0.05666666666666664, + 0.10416666666666663, + 0.11499999999999999, + 0.1333333333333333, + 0.07750000000000001 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.9161666666666667, + "std": 0.006656241849238607, + "ci_95": 0.005834453225833961, + "values": [ + 0.9079166666666667, + 0.9245833333333333, + 0.9204166666666667, + 0.9116666666666666, + 0.91625 + ] + }, + "auc_roc": { + "mean": 0.9758645833333335, + "std": 0.004104948128682983, + "ci_95": 0.003598145679459516, + "values": [ + 0.9701722222222223, + 0.9800895833333334, + 0.9794552083333333, + 0.9758034722222221, + 0.9738024305555557 + ] + }, + "f1": { + "mean": 0.9171118406016742, + "std": 0.008527416912396574, + "ci_95": 0.007474610484331243, + "values": [ + 0.9045356371490281, + 0.9266909680032401, + 0.9231388329979879, + 0.9154704944178629, + 0.9157232704402516 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8990833333333332, + "std": 0.011903839249960042, + "ci_95": 0.01043417515240718, + "values": [ + 0.8783333333333333, + 0.905, + 0.9083333333333333, + 0.9020833333333333, + 0.9016666666666666 + ] + }, + "auc_roc": { + "mean": 0.9654490972222222, + "std": 0.005960625846888337, + "ci_95": 0.005224718916177153, + "values": [ + 0.9557256944444444, + 0.9639170138888888, + 0.9698652777777778, + 0.9677885416666665, + 0.9699489583333334 + ] + }, + "f1": { + "mean": 0.8982337324895278, + "std": 0.016314258364705986, + "ci_95": 0.014300077956743039, + "values": [ + 0.8698752228163993, + 0.9058629232039637, + 0.9104234527687296, + 0.9054325955734407, + 0.8995744680851064 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.932, + "std": 0.00876684885742242, + "ci_95": 0.0076844818375158535, + "values": [ + 0.92125, + 0.9375, + 0.9358333333333333, + 0.9241666666666667, + 0.94125 + ] + }, + "auc_roc": { + "mean": 0.9850960416666666, + "std": 0.005502799493364987, + "ci_95": 0.004823416423616481, + "values": [ + 0.9755159722222223, + 0.9877677083333334, + 0.9892357638888889, + 0.9856937499999999, + 0.9872670138888889 + ] + }, + "f1": { + "mean": 0.9338060595456726, + "std": 0.009660181403295696, + "ci_95": 0.008467522338757406, + "values": [ + 0.9194716659565403, + 0.94, + 0.9389373513084853, + 0.9282899921197794, + 0.9423312883435583 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data_cropped", + "run_name": "p2c_resnet18_facecrop", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2c_simplecnn_facecrop.json b/classifier/outputs/logs/p2c_simplecnn_facecrop.json new file mode 100644 index 0000000..4a82b93 --- /dev/null +++ b/classifier/outputs/logs/p2c_simplecnn_facecrop.json @@ -0,0 +1,1890 @@ +{ + "run_name": "p2c_simplecnn_facecrop", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33534963467606793, + 0.3235572189092636, + 0.3171152654345389, + 0.31327827986743717, + 0.31033674619264073, + 0.30600922115974954, + 0.3029164695629367, + 0.2985397193442892, + 0.2957023377495783, + 0.2929931388684997, + 0.29124047027693856, + 0.289444793815966, + 0.28789058402180673, + 0.28674482085086683, + 0.2855528046411497 + ], + "train_acc": [ + 0.505150462962963, + 0.6240162037037037, + 0.6297453703703704, + 0.6519675925925926, + 0.6519675925925926, + 0.6697916666666667, + 0.6739583333333333, + 0.6771412037037037, + 0.6871527777777777, + 0.6933449074074074, + 0.6957175925925926, + 0.7017361111111111, + 0.7012731481481481, + 0.7065972222222222, + 0.7072916666666667 + ], + "train_auc": [ + 0.6263689379000914, + 0.6740577578446502, + 0.6934107617455418, + 0.7053752554155236, + 0.7127962552154778, + 0.7262564300411523, + 0.7339958240454961, + 0.7445143086276863, + 0.7509831175697304, + 0.7575106363597394, + 0.7618086544781665, + 0.766314845178898, + 0.769531589363283, + 0.7734971743541381, + 0.7762781314300411 + ], + "train_f1": [ + 0.5627205318332907, + 0.7125348435909915, + 0.7161238796698909, + 0.7394054944102608, + 0.7381118272078036, + 0.7545806451612903, + 0.7575105448911078, + 0.7595362268867721, + 0.768697586856067, + 0.7747119595255304, + 0.7765595784463709, + 0.7819058903182126, + 0.7805085466451229, + 0.7846768028539879, + 0.785295865523389 + ], + "val_loss": [ + 0.33023928925395013, + 0.3251593872904778, + 0.3177362270653248, + 0.31646473705768585, + 0.31118818186223507, + 0.3061512573311726, + 0.30492371941606206, + 0.30204968092342216, + 0.3095003794878721, + 0.3010386052230994, + 0.29479843166967235, + 0.29448223834236464, + 0.293135704472661, + 0.2926966001590093, + 0.2926642368237177 + ], + "val_acc": [ + 0.6432291666666666, + 0.621875, + 0.6088541666666667, + 0.7, + 0.6661458333333333, + 0.6401041666666667, + 0.6796875, + 0.6802083333333333, + 0.7442708333333333, + 0.6151041666666667, + 0.6380208333333334, + 0.7182291666666667, + 0.7005208333333334, + 0.6973958333333333, + 0.6989583333333333 + ], + "val_auc": [ + 0.6544914641203703, + 0.6718988715277778, + 0.7002271412037038, + 0.7136226851851851, + 0.7157537615740741, + 0.7245818865740741, + 0.7274066840277778, + 0.7377973090277778, + 0.7438194444444444, + 0.7416587094907408, + 0.7539554398148147, + 0.7581662326388888, + 0.759189814814815, + 0.7572041377314815, + 0.7597214988425927 + ], + "val_f1": [ + 0.741216471477144, + 0.7109872611464968, + 0.6859054788791301, + 0.7906976744186046, + 0.7500974658869396, + 0.7219315895372234, + 0.766248574686431, + 0.7652905198776758, + 0.8301625735039778, + 0.6869970351545955, + 0.711737868104521, + 0.8016134946828016, + 0.782608695652174, + 0.7783288821060664, + 0.781557067271353 + ] + }, + "test_metrics": { + "accuracy": 0.718125, + "auc_roc": 0.7698571759259261, + "f1": 0.7970601469926504, + "confusion_matrix": [ + [ + 790, + 410 + ], + [ + 943, + 2657 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7533333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 296, + 904 + ] + ], + "n": 1200, + "detection_rate": 0.7533333333333333, + "pairwise_auc": 0.7727059027777777, + "pairwise_f1": 0.7191726332537789 + }, + "insight": { + "accuracy": 0.5525, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 537, + 663 + ] + ], + "n": 1200, + "detection_rate": 0.5525, + "pairwise_auc": 0.646773263888889, + "pairwise_f1": 0.5833699956005279 + }, + "text2img": { + "accuracy": 0.9083333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 110, + 1090 + ] + ], + "n": 1200, + "detection_rate": 0.9083333333333333, + "pairwise_auc": 0.8900923611111111, + "pairwise_f1": 0.8074074074074075 + }, + "wiki": { + "accuracy": 0.6583333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 790, + 410 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.3416666666666667 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.7058333333333333, + "auc_roc": 0.7727059027777777, + "f1": 0.7191726332537789, + "confusion_matrix": [ + [ + 790, + 410 + ], + [ + 296, + 904 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6054166666666667, + "auc_roc": 0.646773263888889, + "f1": 0.5833699956005279, + "confusion_matrix": [ + [ + 790, + 410 + ], + [ + 537, + 663 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7833333333333333, + "auc_roc": 0.8900923611111111, + "f1": 0.8074074074074075, + "confusion_matrix": [ + [ + 790, + 410 + ], + [ + 110, + 1090 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.33503686451801545, + 0.32277677677295824, + 0.31805250788176503, + 0.3142383051967179, + 0.3094754738940133, + 0.3068588615015701, + 0.3051918382997866, + 0.30135664796387707, + 0.2982914166593993, + 0.29571508094668386, + 0.29429655058516396, + 0.2921788873495879, + 0.29186505976098553, + 0.29059922350225625, + 0.29067752968381955 + ], + "train_acc": [ + 0.5513888888888889, + 0.6213541666666667, + 0.6432291666666666, + 0.6490162037037037, + 0.6556712962962963, + 0.6656828703703703, + 0.6737847222222222, + 0.6768518518518518, + 0.6764467592592592, + 0.6832754629629629, + 0.6901041666666666, + 0.6892939814814815, + 0.6947337962962963, + 0.695949074074074, + 0.6942708333333333 + ], + "train_auc": [ + 0.6244928394347279, + 0.676084185313786, + 0.692228330761317, + 0.702313635973937, + 0.7141995670438956, + 0.7240220264631916, + 0.72704001093107, + 0.738377021890718, + 0.7440825938786009, + 0.7494291195130316, + 0.7544271726394604, + 0.7589719435871056, + 0.7599465949359854, + 0.7633568029835391, + 0.76321705675583 + ], + "train_f1": [ + 0.631768953068592, + 0.7088765294771969, + 0.7313842534094375, + 0.735787410150294, + 0.7410341225626741, + 0.7495773548918462, + 0.7584936378047213, + 0.7591234578552325, + 0.7582688399844352, + 0.7645109935028613, + 0.7699248120300752, + 0.7685676106728738, + 0.7736925651036081, + 0.775527642484833, + 0.7729987539208525 + ], + "val_loss": [ + 0.3270006366074085, + 0.32059879961113136, + 0.32616251905759175, + 0.31671720544497173, + 0.328063361470898, + 0.303794256473581, + 0.30220593375464283, + 0.3008812251190344, + 0.30283960898717244, + 0.2976109920690457, + 0.2951974760740995, + 0.2948875146607558, + 0.29335316171248754, + 0.29429919583102065, + 0.29334711730480195 + ], + "val_acc": [ + 0.5895833333333333, + 0.6822916666666666, + 0.6932291666666667, + 0.5567708333333333, + 0.7020833333333333, + 0.6557291666666667, + 0.6796875, + 0.6109375, + 0.71875, + 0.671875, + 0.6692708333333334, + 0.6572916666666667, + 0.7015625, + 0.6848958333333334, + 0.6875 + ], + "val_auc": [ + 0.6702575231481483, + 0.6945775462962962, + 0.6884975405092593, + 0.7100557002314815, + 0.6958210358796296, + 0.731765769675926, + 0.7375441261574074, + 0.749129050925926, + 0.7465776909722223, + 0.7491493055555556, + 0.7528739872685184, + 0.7585908564814815, + 0.760939670138889, + 0.7562912326388889, + 0.7590277777777779 + ], + "val_f1": [ + 0.6702928870292887, + 0.7799422799422799, + 0.7929701230228471, + 0.607291185971389, + 0.7992982456140351, + 0.7361277445109781, + 0.764638346727899, + 0.6764833261152013, + 0.8053352559480894, + 0.748, + 0.7467092142002393, + 0.7301066447908121, + 0.7833648393194707, + 0.7655947307245253, + 0.7667185069984448 + ] + }, + "test_metrics": { + "accuracy": 0.7002083333333333, + "auc_roc": 0.7605444444444445, + "f1": 0.7826612294215375, + "confusion_matrix": [ + [ + 770, + 430 + ], + [ + 1009, + 2591 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7083333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 350, + 850 + ] + ], + "n": 1200, + "detection_rate": 0.7083333333333334, + "pairwise_auc": 0.7455847222222223, + "pairwise_f1": 0.6854838709677419 + }, + "insight": { + "accuracy": 0.5391666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 553, + 647 + ] + ], + "n": 1200, + "detection_rate": 0.5391666666666667, + "pairwise_auc": 0.6429701388888889, + "pairwise_f1": 0.5682916117698726 + }, + "text2img": { + "accuracy": 0.9116666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 106, + 1094 + ] + ], + "n": 1200, + "detection_rate": 0.9116666666666666, + "pairwise_auc": 0.8930784722222223, + "pairwise_f1": 0.8032305433186491 + }, + "wiki": { + "accuracy": 0.6416666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 770, + 430 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.3583333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.675, + "auc_roc": 0.7455847222222223, + "f1": 0.6854838709677419, + "confusion_matrix": [ + [ + 770, + 430 + ], + [ + 350, + 850 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5904166666666667, + "auc_roc": 0.6429701388888889, + "f1": 0.5682916117698726, + "confusion_matrix": [ + [ + 770, + 430 + ], + [ + 553, + 647 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7766666666666666, + "auc_roc": 0.8930784722222223, + "f1": 0.8032305433186491, + "confusion_matrix": [ + [ + 770, + 430 + ], + [ + 106, + 1094 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3364739426308208, + 0.3257173133117181, + 0.3191978419268573, + 0.31475865840911865, + 0.3104811983803908, + 0.3067943162664219, + 0.3046634111967352, + 0.3019978031792023, + 0.2989502435205159, + 0.2969158275535813, + 0.29395404678803905, + 0.2935019516006664, + 0.29131871784726776, + 0.29010633034286676, + 0.2895898224854911 + ], + "train_acc": [ + 0.6266782407407407, + 0.6070023148148148, + 0.6330439814814814, + 0.6465856481481481, + 0.6571759259259259, + 0.6667824074074075, + 0.6752314814814815, + 0.6715277777777777, + 0.6829282407407408, + 0.6857060185185185, + 0.690162037037037, + 0.6949652777777777, + 0.6958333333333333, + 0.7016203703703704, + 0.7013310185185185 + ], + "train_auc": [ + 0.6196772923096707, + 0.66664474201246, + 0.6890921764260403, + 0.7021147512288524, + 0.7146474908550526, + 0.7254038333762003, + 0.7303141789551898, + 0.734999258759145, + 0.7441112075617284, + 0.7483698238168726, + 0.7565775480824188, + 0.7568492262517147, + 0.7620775195044581, + 0.7653932327389118, + 0.7671726394604481 + ], + "train_f1": [ + 0.7297104789039259, + 0.6943331682945493, + 0.7202294286344584, + 0.7329105619943145, + 0.743260813036318, + 0.7510807539339444, + 0.7601299367413233, + 0.7548587717025136, + 0.7658847156347477, + 0.7673591775540801, + 0.7708244157178323, + 0.7761117954381345, + 0.776511608129943, + 0.7808941016488187, + 0.7811559173981257 + ], + "val_loss": [ + 0.3374361862738927, + 0.32487107887864114, + 0.3151769695182641, + 0.34831909922262033, + 0.3085048049688339, + 0.3033625200390816, + 0.30210716277360916, + 0.2975748070826133, + 0.29701284045974413, + 0.30581669323146343, + 0.2960714076956113, + 0.29469408467411995, + 0.2966602848221858, + 0.2917791905502478, + 0.2914164108534654 + ], + "val_acc": [ + 0.6557291666666667, + 0.5223958333333333, + 0.64375, + 0.7270833333333333, + 0.696875, + 0.6286458333333333, + 0.6729166666666667, + 0.6817708333333333, + 0.6953125, + 0.753125, + 0.6390625, + 0.7125, + 0.7265625, + 0.6942708333333333, + 0.6895833333333333 + ], + "val_auc": [ + 0.6229116030092593, + 0.6753002025462962, + 0.7035865162037037, + 0.6813382523148148, + 0.7272186053240741, + 0.7343366608796296, + 0.7358557581018518, + 0.7465791377314814, + 0.748572048611111, + 0.7571180555555556, + 0.7543959780092593, + 0.7582233796296296, + 0.7582581018518519, + 0.7625180844907407, + 0.7636074942129629 + ], + "val_f1": [ + 0.7595489268825028, + 0.5692813527477689, + 0.7279236276849642, + 0.8276315789473684, + 0.7842846553002224, + 0.7064635652531907, + 0.7548790007806401, + 0.7619789637709389, + 0.7778199772123053, + 0.8373369938229238, + 0.7087011349306431, + 0.7924812030075188, + 0.8091603053435115, + 0.774490971955436, + 0.7679127725856698 + ] + }, + "test_metrics": { + "accuracy": 0.6891666666666667, + "auc_roc": 0.7577002314814816, + "f1": 0.7690402476780186, + "confusion_matrix": [ + [ + 824, + 376 + ], + [ + 1116, + 2484 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.6808333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 383, + 817 + ] + ], + "n": 1200, + "detection_rate": 0.6808333333333333, + "pairwise_auc": 0.7459881944444444, + "pairwise_f1": 0.6828249059757626 + }, + "insight": { + "accuracy": 0.47583333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 629, + 571 + ] + ], + "n": 1200, + "detection_rate": 0.47583333333333333, + "pairwise_auc": 0.6303194444444444, + "pairwise_f1": 0.5319049836981835 + }, + "text2img": { + "accuracy": 0.9133333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 104, + 1096 + ] + ], + "n": 1200, + "detection_rate": 0.9133333333333333, + "pairwise_auc": 0.8967930555555557, + "pairwise_f1": 0.8203592814371258 + }, + "wiki": { + "accuracy": 0.6866666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 824, + 376 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.31333333333333335 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.68375, + "auc_roc": 0.7459881944444444, + "f1": 0.6828249059757626, + "confusion_matrix": [ + [ + 824, + 376 + ], + [ + 383, + 817 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.58125, + "auc_roc": 0.6303194444444444, + "f1": 0.5319049836981835, + "confusion_matrix": [ + [ + 824, + 376 + ], + [ + 629, + 571 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.8, + "auc_roc": 0.8967930555555557, + "f1": 0.8203592814371258, + "confusion_matrix": [ + [ + 824, + 376 + ], + [ + 104, + 1096 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3353836394294544, + 0.32259115118671344, + 0.3174777179128594, + 0.31266347663822, + 0.3088702575476081, + 0.30478058032415534, + 0.30158055444006565, + 0.2999485428686495, + 0.2953231288603059, + 0.2923485529091623, + 0.290638677113586, + 0.28876484593859425, + 0.287046128704592, + 0.28589169733502245, + 0.28556128365022165 + ], + "train_acc": [ + 0.6427662037037037, + 0.6274884259259259, + 0.6494791666666667, + 0.6559606481481481, + 0.6636574074074074, + 0.6642361111111111, + 0.6725694444444444, + 0.6872685185185186, + 0.6883101851851852, + 0.6949074074074074, + 0.6920138888888889, + 0.6994791666666667, + 0.7, + 0.7056134259259259, + 0.7027777777777777 + ], + "train_auc": [ + 0.6252745984796524, + 0.6772062542866941, + 0.6943444305126886, + 0.7079929162379973, + 0.7176040594993143, + 0.7267386741969594, + 0.7361496645661866, + 0.7418370002429127, + 0.7525480556270004, + 0.7595829939700502, + 0.7619877489854824, + 0.7670766621656379, + 0.7709038405206905, + 0.7749076842564014, + 0.774598167081047 + ], + "train_f1": [ + 0.7459775317888153, + 0.7162191949918442, + 0.7373715475003252, + 0.7421607320987119, + 0.748789764868603, + 0.7488529131676911, + 0.7551709216789269, + 0.7706865823644233, + 0.769592744695414, + 0.7746815967176682, + 0.7717643022557681, + 0.7782190903267137, + 0.7780251776997517, + 0.7833013844515442, + 0.7799674406648959 + ], + "val_loss": [ + 0.33067879155278207, + 0.31833433161179225, + 0.31625557964046797, + 0.32984760105609895, + 0.3070489232738813, + 0.30327728651463987, + 0.30142497407893337, + 0.29803119239707787, + 0.2943152385453383, + 0.29500476717948915, + 0.29081568531692026, + 0.2908581268042326, + 0.2895766315360864, + 0.28882027106980485, + 0.2892854228615761 + ], + "val_acc": [ + 0.6479166666666667, + 0.5953125, + 0.5901041666666667, + 0.7208333333333333, + 0.659375, + 0.6734375, + 0.6338541666666667, + 0.6963541666666667, + 0.7010416666666667, + 0.7197916666666667, + 0.7005208333333334, + 0.7135416666666666, + 0.703125, + 0.7010416666666667, + 0.715625 + ], + "val_auc": [ + 0.6509143518518519, + 0.6982942708333333, + 0.6995392071759259, + 0.7052047164351851, + 0.7219053819444445, + 0.7319661458333334, + 0.7466218171296297, + 0.7510192418981482, + 0.760144675925926, + 0.7611422164351851, + 0.7662398726851852, + 0.7672996238425926, + 0.7694596354166667, + 0.7714149305555555, + 0.7731358506944445 + ], + "val_f1": [ + 0.7534646243617797, + 0.6675224646983312, + 0.6594547814798788, + 0.82145236508994, + 0.7449297971918877, + 0.7593090211132437, + 0.7049937054133445, + 0.777055449330784, + 0.7797390636991558, + 0.8010355029585798, + 0.7784200385356455, + 0.7929216867469879, + 0.7799227799227799, + 0.7761310452418096, + 0.7938066465256798 + ] + }, + "test_metrics": { + "accuracy": 0.7033333333333334, + "auc_roc": 0.7640530092592592, + "f1": 0.7838494231936854, + "confusion_matrix": [ + [ + 794, + 406 + ], + [ + 1018, + 2582 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7133333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 344, + 856 + ] + ], + "n": 1200, + "detection_rate": 0.7133333333333334, + "pairwise_auc": 0.7618437499999999, + "pairwise_f1": 0.6953696181965882 + }, + "insight": { + "accuracy": 0.5425, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 549, + 651 + ] + ], + "n": 1200, + "detection_rate": 0.5425, + "pairwise_auc": 0.6448930555555555, + "pairwise_f1": 0.5768719539211342 + }, + "text2img": { + "accuracy": 0.8958333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 125, + 1075 + ] + ], + "n": 1200, + "detection_rate": 0.8958333333333334, + "pairwise_auc": 0.8854222222222223, + "pairwise_f1": 0.8019395747855278 + }, + "wiki": { + "accuracy": 0.6616666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 794, + 406 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.3383333333333334 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6875, + "auc_roc": 0.7618437499999999, + "f1": 0.6953696181965882, + "confusion_matrix": [ + [ + 794, + 406 + ], + [ + 344, + 856 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6020833333333333, + "auc_roc": 0.6448930555555555, + "f1": 0.5768719539211342, + "confusion_matrix": [ + [ + 794, + 406 + ], + [ + 549, + 651 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.77875, + "auc_roc": 0.8854222222222223, + "f1": 0.8019395747855278, + "confusion_matrix": [ + [ + 794, + 406 + ], + [ + 125, + 1075 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3342884210248788, + 0.3216918839900582, + 0.3155434602388629, + 0.3121900218780394, + 0.3077619354206103, + 0.30377424199272085, + 0.3018366852844203, + 0.29796070293695837, + 0.29583850431221503, + 0.2936908225218455, + 0.29134527218562584, + 0.28989972400444525, + 0.28901703153495434, + 0.2873561058607366, + 0.2871142524536009 + ], + "train_acc": [ + 0.547337962962963, + 0.6428819444444445, + 0.6480902777777777, + 0.6585648148148148, + 0.672974537037037, + 0.6743055555555556, + 0.6793402777777777, + 0.6843171296296297, + 0.692650462962963, + 0.6927662037037037, + 0.6998842592592592, + 0.7006365740740741, + 0.703587962962963, + 0.7069444444444445, + 0.7052083333333333 + ], + "train_auc": [ + 0.6321781675097164, + 0.6835533121856424, + 0.701161970950503, + 0.7100530924925698, + 0.7238381808699131, + 0.7339107242369685, + 0.737921265932213, + 0.7465948020261775, + 0.7526217421124828, + 0.7560302712048468, + 0.7635840870770462, + 0.7668943526377457, + 0.7684636934870828, + 0.7723945473251029, + 0.7729369659493599 + ], + "train_f1": [ + 0.6253472554842417, + 0.732173082765505, + 0.7353440396918658, + 0.7454043324415293, + 0.7583493692537951, + 0.757893831196765, + 0.7631140182121329, + 0.7662109458706553, + 0.7738749095244177, + 0.7739793094640044, + 0.7791876011240739, + 0.779937890841026, + 0.7829293100525513, + 0.784968152866242, + 0.7831786839192986 + ], + "val_loss": [ + 0.3260150834918022, + 0.3201495518287023, + 0.31530093053976693, + 0.3082558525105317, + 0.3041532670458158, + 0.30400137255589166, + 0.3035708008954922, + 0.29604487580557665, + 0.3090707903107007, + 0.2906061477959156, + 0.29748790661493935, + 0.28959565858046216, + 0.2878942226370176, + 0.28793462216854093, + 0.2872403415540854 + ], + "val_acc": [ + 0.5880208333333333, + 0.6197916666666666, + 0.5765625, + 0.709375, + 0.6760416666666667, + 0.6859375, + 0.7046875, + 0.7036458333333333, + 0.7541666666666667, + 0.6677083333333333, + 0.7317708333333334, + 0.6739583333333333, + 0.7072916666666667, + 0.7098958333333333, + 0.6869791666666667 + ], + "val_auc": [ + 0.6657523148148148, + 0.681693431712963, + 0.7005504918981481, + 0.7232523148148147, + 0.7271144386574074, + 0.7287897858796297, + 0.7337261284722223, + 0.7499645543981481, + 0.7491666666666666, + 0.7583666087962964, + 0.7573140914351852, + 0.7632060185185184, + 0.7679701967592593, + 0.7663917824074075, + 0.7666550925925926 + ], + "val_f1": [ + 0.6730053741215378, + 0.7086991221069433, + 0.647594278283485, + 0.7966472303206997, + 0.7633181126331812, + 0.7727101394647569, + 0.7964093357271095, + 0.787448636533433, + 0.8416107382550335, + 0.7460191082802548, + 0.8175699610343606, + 0.7521773555027712, + 0.7874432677760969, + 0.7900490011307953, + 0.7658745617452279 + ] + }, + "test_metrics": { + "accuracy": 0.715, + "auc_roc": 0.778203935185185, + "f1": 0.7936028968014484, + "confusion_matrix": [ + [ + 802, + 398 + ], + [ + 970, + 2630 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.7008333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 359, + 841 + ] + ], + "n": 1200, + "detection_rate": 0.7008333333333333, + "pairwise_auc": 0.7559784722222221, + "pairwise_f1": 0.6896268962689627 + }, + "insight": { + "accuracy": 0.5933333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 488, + 712 + ] + ], + "n": 1200, + "detection_rate": 0.5933333333333334, + "pairwise_auc": 0.6878465277777777, + "pairwise_f1": 0.6164502164502165 + }, + "text2img": { + "accuracy": 0.8975, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 123, + 1077 + ] + ], + "n": 1200, + "detection_rate": 0.8975, + "pairwise_auc": 0.8907868055555557, + "pairwise_f1": 0.8052336448598131 + }, + "wiki": { + "accuracy": 0.6683333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 802, + 398 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.33166666666666667 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6845833333333333, + "auc_roc": 0.7559784722222221, + "f1": 0.6896268962689627, + "confusion_matrix": [ + [ + 802, + 398 + ], + [ + 359, + 841 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.6308333333333334, + "auc_roc": 0.6878465277777777, + "f1": 0.6164502164502165, + "confusion_matrix": [ + [ + 802, + 398 + ], + [ + 488, + 712 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7829166666666667, + "auc_roc": 0.8907868055555557, + "f1": 0.8052336448598131, + "confusion_matrix": [ + [ + 802, + 398 + ], + [ + 123, + 1077 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.7051666666666666, + "std": 0.011710668509715578, + "ci_95": 0.010264853533078553, + "values": [ + 0.718125, + 0.7002083333333333, + 0.6891666666666667, + 0.7033333333333334, + 0.715 + ] + }, + "auc_roc": { + "mean": 0.7660717592592594, + "std": 0.008158249046158228, + "ci_95": 0.007151020582276385, + "values": [ + 0.7698571759259261, + 0.7605444444444445, + 0.7577002314814816, + 0.7640530092592592, + 0.778203935185185 + ] + }, + "f1": { + "mean": 0.7852427888174681, + "std": 0.010962089397055749, + "ci_95": 0.009608695010360564, + "values": [ + 0.7970601469926504, + 0.7826612294215375, + 0.7690402476780186, + 0.7838494231936854, + 0.7936028968014484 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.7113333333333334, + "std": 0.02654006405418043, + "ci_95": 0.02326339184211967, + "values": [ + 0.7533333333333333, + 0.7083333333333334, + 0.6808333333333333, + 0.7133333333333334, + 0.7008333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.7113333333333334, + "std": 0.02654006405418043, + "ci_95": 0.02326339184211967, + "values": [ + 0.7533333333333333, + 0.7083333333333334, + 0.6808333333333333, + 0.7133333333333334, + 0.7008333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.7564202083333333, + "std": 0.011413461915450845, + "ci_95": 0.010004340467008795, + "values": [ + 0.7727059027777777, + 0.7455847222222223, + 0.7459881944444444, + 0.7618437499999999, + 0.7559784722222221 + ] + }, + "pairwise_f1": { + "mean": 0.6944955849325668, + "std": 0.0145845852974448, + "ci_95": 0.012783952666302379, + "values": [ + 0.7191726332537789, + 0.6854838709677419, + 0.6828249059757626, + 0.6953696181965882, + 0.6896268962689627 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.5406666666666667, + "std": 0.04219827537286857, + "ci_95": 0.03698841920865985, + "values": [ + 0.5525, + 0.5391666666666667, + 0.47583333333333333, + 0.5425, + 0.5933333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.5406666666666667, + "std": 0.04219827537286857, + "ci_95": 0.03698841920865985, + "values": [ + 0.5525, + 0.5391666666666667, + 0.47583333333333333, + 0.5425, + 0.5933333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.6505604861111112, + "std": 0.021817566678805893, + "ci_95": 0.019123940381398162, + "values": [ + 0.646773263888889, + 0.6429701388888889, + 0.6303194444444444, + 0.6448930555555555, + 0.6878465277777777 + ] + }, + "pairwise_f1": { + "mean": 0.5753777522879868, + "std": 0.03038564998626015, + "ci_95": 0.02663419653263895, + "values": [ + 0.5833699956005279, + 0.5682916117698726, + 0.5319049836981835, + 0.5768719539211342, + 0.6164502164502165 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9053333333333334, + "std": 0.008135143241243842, + "ci_95": 0.007130767451294726, + "values": [ + 0.9083333333333333, + 0.9116666666666666, + 0.9133333333333333, + 0.8958333333333334, + 0.8975 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9053333333333334, + "std": 0.008135143241243842, + "ci_95": 0.007130767451294726, + "values": [ + 0.9083333333333333, + 0.9116666666666666, + 0.9133333333333333, + 0.8958333333333334, + 0.8975 + ] + }, + "pairwise_auc": { + "mean": 0.8912345833333335, + "std": 0.004170888931299613, + "ci_95": 0.0036559453413790547, + "values": [ + 0.8900923611111111, + 0.8930784722222223, + 0.8967930555555557, + 0.8854222222222223, + 0.8907868055555557 + ] + }, + "pairwise_f1": { + "mean": 0.8076340903617046, + "std": 0.007408824904852019, + "ci_95": 0.00649412135929187, + "values": [ + 0.8074074074074075, + 0.8032305433186491, + 0.8203592814371258, + 0.8019395747855278, + 0.8052336448598131 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.6633333333333333, + "std": 0.016329931618554498, + "ci_95": 0.01431381616946832, + "values": [ + 0.6583333333333333, + 0.6416666666666667, + 0.6866666666666666, + 0.6616666666666666, + 0.6683333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.3366666666666666, + "std": 0.016329931618554498, + "ci_95": 0.01431381616946832, + "values": [ + 0.3416666666666667, + 0.3583333333333333, + 0.31333333333333335, + 0.3383333333333334, + 0.33166666666666667 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.6873333333333334, + "std": 0.01134451041987561, + "ci_95": 0.009943901816356245, + "values": [ + 0.7058333333333333, + 0.675, + 0.68375, + 0.6875, + 0.6845833333333333 + ] + }, + "auc_roc": { + "mean": 0.7564202083333333, + "std": 0.011413461915450845, + "ci_95": 0.010004340467008795, + "values": [ + 0.7727059027777777, + 0.7455847222222223, + 0.7459881944444444, + 0.7618437499999999, + 0.7559784722222221 + ] + }, + "f1": { + "mean": 0.6944955849325668, + "std": 0.0145845852974448, + "ci_95": 0.012783952666302379, + "values": [ + 0.7191726332537789, + 0.6854838709677419, + 0.6828249059757626, + 0.6953696181965882, + 0.6896268962689627 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.602, + "std": 0.018760182420340273, + "ci_95": 0.016444024919573533, + "values": [ + 0.6054166666666667, + 0.5904166666666667, + 0.58125, + 0.6020833333333333, + 0.6308333333333334 + ] + }, + "auc_roc": { + "mean": 0.6505604861111112, + "std": 0.021817566678805893, + "ci_95": 0.019123940381398162, + "values": [ + 0.646773263888889, + 0.6429701388888889, + 0.6303194444444444, + 0.6448930555555555, + 0.6878465277777777 + ] + }, + "f1": { + "mean": 0.5753777522879868, + "std": 0.03038564998626015, + "ci_95": 0.02663419653263895, + "values": [ + 0.5833699956005279, + 0.5682916117698726, + 0.5319049836981835, + 0.5768719539211342, + 0.6164502164502165 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.7843333333333333, + "std": 0.009197863580443277, + "ci_95": 0.008062282899747182, + "values": [ + 0.7833333333333333, + 0.7766666666666666, + 0.8, + 0.77875, + 0.7829166666666667 + ] + }, + "auc_roc": { + "mean": 0.8912345833333335, + "std": 0.004170888931299613, + "ci_95": 0.0036559453413790547, + "values": [ + 0.8900923611111111, + 0.8930784722222223, + 0.8967930555555557, + 0.8854222222222223, + 0.8907868055555557 + ] + }, + "f1": { + "mean": 0.8076340903617046, + "std": 0.007408824904852019, + "ci_95": 0.00649412135929187, + "values": [ + 0.8074074074074075, + 0.8032305433186491, + 0.8203592814371258, + 0.8019395747855278, + 0.8052336448598131 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data_cropped", + "run_name": "p2c_simplecnn_facecrop", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": false + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2d_resnet18_aug.json b/classifier/outputs/logs/p2d_resnet18_aug.json new file mode 100644 index 0000000..1c1faae --- /dev/null +++ b/classifier/outputs/logs/p2d_resnet18_aug.json @@ -0,0 +1,1821 @@ +{ + "run_name": "p2d_resnet18_aug", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2432145776434077, + 0.18624131784394934, + 0.1615376862524836, + 0.14311136932422716, + 0.12778810034333557, + 0.11735554574126447, + 0.1028537227090931, + 0.08899825522934811, + 0.08163309689369742 + ], + "train_acc": [ + 0.7476851851851852, + 0.828761574074074, + 0.8553240740740741, + 0.8760416666666667, + 0.8890625, + 0.9065972222222223, + 0.9166087962962963, + 0.9321759259259259, + 0.9360532407407407 + ], + "train_auc": [ + 0.8427512359967992, + 0.9138303487225652, + 0.9362707815357796, + 0.9505006054955417, + 0.960344569830247, + 0.9671106342163922, + 0.9747477548439643, + 0.9806416377314815, + 0.9842013085133745 + ], + "train_f1": [ + 0.8151445772916137, + 0.8789527510738392, + 0.8985554293134231, + 0.9137403350515464, + 0.923206345391179, + 0.9358403561774526, + 0.9429419916848149, + 0.9538182677910001, + 0.9566071077950128 + ], + "val_loss": [ + 0.1902425479143858, + 0.26963399226466817, + 0.18652883305524787, + 0.13493328224867582, + 0.18491155989468097, + 0.16451409317863483, + 0.17540942650909225, + 0.16768297875920932, + 0.17393040573224425 + ], + "val_acc": [ + 0.7958333333333333, + 0.696875, + 0.8723958333333334, + 0.8770833333333333, + 0.7890625, + 0.8796875, + 0.8666666666666667, + 0.88125, + 0.8619791666666666 + ], + "val_auc": [ + 0.9145949074074075, + 0.9076374421296297, + 0.9411385995370372, + 0.9544654224537036, + 0.9407219328703703, + 0.9507740162037037, + 0.9443192997685185, + 0.9523459201388889, + 0.9515921585648148 + ], + "val_f1": [ + 0.847944142746315, + 0.7497850386930353, + 0.9143656064313177, + 0.9131714495952906, + 0.8394768133174791, + 0.9163952225841476, + 0.9062957540263543, + 0.9174511223750905, + 0.9015235971757711 + ] + }, + "test_metrics": { + "accuracy": 0.8841666666666667, + "auc_roc": 0.9598277777777778, + "f1": 0.9190212642004079, + "confusion_matrix": [ + [ + 1089, + 111 + ], + [ + 445, + 3155 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.8633333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 164, + 1036 + ] + ], + "n": 1200, + "detection_rate": 0.8633333333333333, + "pairwise_auc": 0.9568444444444445, + "pairwise_f1": 0.8828291435875586 + }, + "insight": { + "accuracy": 0.835, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 198, + 1002 + ] + ], + "n": 1200, + "detection_rate": 0.835, + "pairwise_auc": 0.9470201388888889, + "pairwise_f1": 0.8664072632944229 + }, + "text2img": { + "accuracy": 0.9308333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 83, + 1117 + ] + ], + "n": 1200, + "detection_rate": 0.9308333333333333, + "pairwise_auc": 0.97561875, + "pairwise_f1": 0.9200988467874794 + }, + "wiki": { + "accuracy": 0.9075, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1089, + 111 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.09250000000000003 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.8854166666666666, + "auc_roc": 0.9568444444444445, + "f1": 0.8828291435875586, + "confusion_matrix": [ + [ + 1089, + 111 + ], + [ + 164, + 1036 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.87125, + "auc_roc": 0.9470201388888889, + "f1": 0.8664072632944229, + "confusion_matrix": [ + [ + 1089, + 111 + ], + [ + 198, + 1002 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9191666666666667, + "auc_roc": 0.97561875, + "f1": 0.9200988467874794, + "confusion_matrix": [ + [ + 1089, + 111 + ], + [ + 83, + 1117 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.24689088360302977, + 0.1842047021413843, + 0.15967476530069555, + 0.14266639583640628, + 0.12989114740587496, + 0.11649227232706767, + 0.09814681154503314, + 0.08726330978312978, + 0.07994664205083003, + 0.07109554970585223, + 0.06332696684791396, + 0.05453277301793504, + 0.05285849852414146, + 0.04519847576609916, + 0.04594721748769559 + ], + "train_acc": [ + 0.7479166666666667, + 0.8290509259259259, + 0.8592592592592593, + 0.875462962962963, + 0.8919560185185185, + 0.9060185185185186, + 0.9182291666666667, + 0.9318287037037037, + 0.9401041666666666, + 0.9473379629629629, + 0.9521412037037037, + 0.9607060185185186, + 0.9625578703703703, + 0.9675925925925926, + 0.9673611111111111 + ], + "train_auc": [ + 0.8384914676926155, + 0.9154041370170325, + 0.9379931484339277, + 0.9506273040980795, + 0.959658002186214, + 0.9682331675811614, + 0.9764297643032693, + 0.9817006655092593, + 0.9845431455761318, + 0.9879434227823503, + 0.9905176272433699, + 0.9928843467792638, + 0.9933776381029948, + 0.9949007362397119, + 0.994895226051669 + ], + "train_f1": [ + 0.8159384771402012, + 0.879023671062331, + 0.901689708141321, + 0.9133236668277751, + 0.9254898830666082, + 0.9355760076166296, + 0.9439129917040447, + 0.9536037810161481, + 0.959349593495935, + 0.9643612438317537, + 0.9676662626578567, + 0.9735251686357079, + 0.9748239231098487, + 0.9781795511221946, + 0.9780510585305106 + ], + "val_loss": [ + 0.19462023427089056, + 0.16530078780682136, + 0.1554836075132092, + 0.1940426806608836, + 0.14365296380904813, + 0.17461532298475504, + 0.1586057751439512, + 0.15580776433149973, + 0.14082847472357873, + 0.1512137787610603, + 0.14102415824697043, + 0.14971011347758273, + 0.14048875640922537, + 0.14256967038381846, + 0.14555416881727676 + ], + "val_acc": [ + 0.7916666666666666, + 0.871875, + 0.8895833333333333, + 0.790625, + 0.8619791666666666, + 0.8328125, + 0.8567708333333334, + 0.9015625, + 0.90625, + 0.9015625, + 0.8963541666666667, + 0.8916666666666667, + 0.9020833333333333, + 0.9109375, + 0.9057291666666667 + ], + "val_auc": [ + 0.9082855902777778, + 0.9404890046296297, + 0.9493388310185185, + 0.9419618055555556, + 0.9553168402777777, + 0.9546332465277778, + 0.9602445023148148, + 0.9603103298611111, + 0.966378761574074, + 0.9639539930555556, + 0.9676135706018517, + 0.9687463831018518, + 0.9701822916666667, + 0.9702162905092593, + 0.9695348668981482 + ], + "val_f1": [ + 0.8460354118552733, + 0.9117013639626705, + 0.924822695035461, + 0.8408551068883611, + 0.9013035381750466, + 0.8766807529773338, + 0.8961872404681012, + 0.9325240985362371, + 0.9357142857142857, + 0.9318427695636495, + 0.9276100400145507, + 0.9234731420161884, + 0.9315866084425036, + 0.9385113268608414, + 0.9344440420137631 + ] + }, + "test_metrics": { + "accuracy": 0.90625, + "auc_roc": 0.9686325231481481, + "f1": 0.9355300859598854, + "confusion_matrix": [ + [ + 1085, + 115 + ], + [ + 335, + 3265 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.915, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 102, + 1098 + ] + ], + "n": 1200, + "detection_rate": 0.915, + "pairwise_auc": 0.9732482638888889, + "pairwise_f1": 0.9100704517198508 + }, + "insight": { + "accuracy": 0.8575, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 171, + 1029 + ] + ], + "n": 1200, + "detection_rate": 0.8575, + "pairwise_auc": 0.9496430555555556, + "pairwise_f1": 0.8779863481228669 + }, + "text2img": { + "accuracy": 0.9483333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 62, + 1138 + ] + ], + "n": 1200, + "detection_rate": 0.9483333333333334, + "pairwise_auc": 0.9830062500000001, + "pairwise_f1": 0.9278434569914391 + }, + "wiki": { + "accuracy": 0.9041666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1085, + 115 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.09583333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9095833333333333, + "auc_roc": 0.9732482638888889, + "f1": 0.9100704517198508, + "confusion_matrix": [ + [ + 1085, + 115 + ], + [ + 102, + 1098 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8808333333333334, + "auc_roc": 0.9496430555555556, + "f1": 0.8779863481228669, + "confusion_matrix": [ + [ + 1085, + 115 + ], + [ + 171, + 1029 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.92625, + "auc_roc": 0.9830062500000001, + "f1": 0.9278434569914391, + "confusion_matrix": [ + [ + 1085, + 115 + ], + [ + 62, + 1138 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.25084885087002207, + 0.18861986772605666, + 0.16143610937589847, + 0.14482936942260022, + 0.13110146244160004, + 0.11945354820539554, + 0.10625538078171236, + 0.09158743754098261, + 0.0816777328081015, + 0.06940991530643294, + 0.061847320255926916 + ], + "train_acc": [ + 0.7384259259259259, + 0.8266782407407407, + 0.8543981481481482, + 0.8726851851851852, + 0.8870370370370371, + 0.9002314814814815, + 0.9131365740740741, + 0.9268518518518518, + 0.934837962962963, + 0.9470486111111112, + 0.9521990740740741 + ], + "train_auc": [ + 0.8316352041180841, + 0.9111492180355509, + 0.9359855824188386, + 0.9491406696530635, + 0.9583204643204162, + 0.9660177862082763, + 0.9729465217049611, + 0.9793789026777551, + 0.9839431512917238, + 0.9882295506830132, + 0.9904374303412209 + ], + "train_f1": [ + 0.8077740920302798, + 0.8775702080693292, + 0.8979558728098638, + 0.9112903225806451, + 0.9219012563015123, + 0.9312709296762877, + 0.9404246874379837, + 0.9500079101408005, + 0.9556658004567289, + 0.9641499823688437, + 0.9676813522184835 + ], + "val_loss": [ + 0.1721192395935456, + 0.15591581917057434, + 0.14221578563253084, + 0.13032059934921564, + 0.17075392644231518, + 0.12318657368887216, + 0.13254480995237827, + 0.14408315840022018, + 0.14193831564237674, + 0.16232128216749211, + 0.15326399483795589 + ], + "val_acc": [ + 0.8296875, + 0.8854166666666666, + 0.8729166666666667, + 0.8796875, + 0.8359375, + 0.9057291666666667, + 0.8703125, + 0.9072916666666667, + 0.8942708333333333, + 0.8994791666666667, + 0.9072916666666667 + ], + "val_auc": [ + 0.9259215856481481, + 0.9469864004629629, + 0.9507479745370371, + 0.9603211805555556, + 0.9540726273148148, + 0.9683825231481481, + 0.9631264467592593, + 0.9650130208333333, + 0.9639561631944444, + 0.9628884548611112, + 0.963708767361111 + ], + "val_f1": [ + 0.8775739423436915, + 0.9224806201550387, + 0.9106227106227106, + 0.915785636164783, + 0.8787061994609164, + 0.9352878083661066, + 0.9072625698324023, + 0.9369688385269122, + 0.925939438161255, + 0.9304504504504505, + 0.9359712230215828 + ] + }, + "test_metrics": { + "accuracy": 0.909375, + "auc_roc": 0.9661710648148147, + "f1": 0.938567998870216, + "confusion_matrix": [ + [ + 1042, + 158 + ], + [ + 277, + 3323 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9366666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 76, + 1124 + ] + ], + "n": 1200, + "detection_rate": 0.9366666666666666, + "pairwise_auc": 0.9708569444444444, + "pairwise_f1": 0.9057211925866236 + }, + "insight": { + "accuracy": 0.8683333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 158, + 1042 + ] + ], + "n": 1200, + "detection_rate": 0.8683333333333333, + "pairwise_auc": 0.9475027777777779, + "pairwise_f1": 0.8683333333333333 + }, + "text2img": { + "accuracy": 0.9641666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 43, + 1157 + ] + ], + "n": 1200, + "detection_rate": 0.9641666666666666, + "pairwise_auc": 0.9801534722222222, + "pairwise_f1": 0.920079522862823 + }, + "wiki": { + "accuracy": 0.8683333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1042, + 158 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.1316666666666667 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9025, + "auc_roc": 0.9708569444444444, + "f1": 0.9057211925866236, + "confusion_matrix": [ + [ + 1042, + 158 + ], + [ + 76, + 1124 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8683333333333333, + "auc_roc": 0.9475027777777779, + "f1": 0.8683333333333333, + "confusion_matrix": [ + [ + 1042, + 158 + ], + [ + 158, + 1042 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.91625, + "auc_roc": 0.9801534722222222, + "f1": 0.920079522862823, + "confusion_matrix": [ + [ + 1042, + 158 + ], + [ + 43, + 1157 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2421666906249744, + 0.18122071538258483, + 0.15945808111379545, + 0.14149741873972946, + 0.12693481605706944, + 0.11328697106456039, + 0.10285938018760471, + 0.09215615937762238, + 0.08213630432901145, + 0.0721674160566181, + 0.06024700803078573, + 0.05493976923516365, + 0.04812634282236643, + 0.04283446180116799, + 0.044198857838744984 + ], + "train_acc": [ + 0.7519097222222222, + 0.8376736111111112, + 0.8593171296296296, + 0.8793402777777778, + 0.8940393518518519, + 0.9099537037037037, + 0.9178240740740741, + 0.9293981481481481, + 0.9375578703703704, + 0.9460069444444444, + 0.9568865740740741, + 0.9601851851851851, + 0.9638310185185185, + 0.9694444444444444, + 0.967650462962963 + ], + "train_auc": [ + 0.8447519343707134, + 0.9199105420524691, + 0.938318115569273, + 0.9516152441986739, + 0.9610948841878143, + 0.9698494030778464, + 0.9747931312871514, + 0.9799926233139004, + 0.9839744888117283, + 0.987740840763603, + 0.991201095964792, + 0.993033845236054, + 0.994431030664152, + 0.9954667495427527, + 0.9951984917981251 + ], + "train_f1": [ + 0.8185089538969561, + 0.8854868340477648, + 0.9017261591947285, + 0.9163356205609726, + 0.9268273188666427, + 0.9383664738968549, + 0.9438557646686699, + 0.9519344417303601, + 0.9576181311127695, + 0.9634476003917728, + 0.9708927524907208, + 0.9732003739482705, + 0.9756439733447644, + 0.9794584500466853, + 0.9782583330092178 + ], + "val_loss": [ + 0.20158521762738627, + 0.20123521853238344, + 0.1367407993723949, + 0.14876992775437733, + 0.19659454523546932, + 0.16914862338453532, + 0.14609688865796974, + 0.15264005694383134, + 0.1667207621658842, + 0.16147574520048996, + 0.1583384559994253, + 0.15340277797852953, + 0.15068399023536283, + 0.15819599378931645, + 0.1549152214662172 + ], + "val_acc": [ + 0.7708333333333334, + 0.7619791666666667, + 0.8848958333333333, + 0.8421875, + 0.8854166666666666, + 0.8291666666666667, + 0.8854166666666666, + 0.8890625, + 0.8526041666666667, + 0.8635416666666667, + 0.8833333333333333, + 0.890625, + 0.8942708333333333, + 0.8973958333333333, + 0.903125 + ], + "val_auc": [ + 0.9126171875, + 0.9304144965277777, + 0.9546397569444444, + 0.9528190104166666, + 0.948021556712963, + 0.9557262731481481, + 0.9573972800925925, + 0.9590986689814813, + 0.9580721932870371, + 0.9602184606481481, + 0.9626613136574075, + 0.9661429398148149, + 0.9663020833333332, + 0.9655041956018519, + 0.9661733217592593 + ], + "val_f1": [ + 0.8267716535433071, + 0.8153535353535354, + 0.920816911501254, + 0.8849221420432967, + 0.9235048678720446, + 0.8734567901234568, + 0.9205776173285198, + 0.9230769230769231, + 0.8932478310071671, + 0.9023117076808352, + 0.9182481751824818, + 0.9238026124818578, + 0.9264759145237232, + 0.9290601368383147, + 0.9333810888252149 + ] + }, + "test_metrics": { + "accuracy": 0.89625, + "auc_roc": 0.9677373842592593, + "f1": 0.9278679026651216, + "confusion_matrix": [ + [ + 1099, + 101 + ], + [ + 397, + 3203 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.8966666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 124, + 1076 + ] + ], + "n": 1200, + "detection_rate": 0.8966666666666666, + "pairwise_auc": 0.9690951388888888, + "pairwise_f1": 0.9053428691628103 + }, + "insight": { + "accuracy": 0.8308333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 203, + 997 + ] + ], + "n": 1200, + "detection_rate": 0.8308333333333333, + "pairwise_auc": 0.9505517361111111, + "pairwise_f1": 0.8677110530896431 + }, + "text2img": { + "accuracy": 0.9416666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 70, + 1130 + ] + ], + "n": 1200, + "detection_rate": 0.9416666666666667, + "pairwise_auc": 0.9835652777777779, + "pairwise_f1": 0.9296585767174003 + }, + "wiki": { + "accuracy": 0.9158333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1099, + 101 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.08416666666666661 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.90625, + "auc_roc": 0.9690951388888888, + "f1": 0.9053428691628103, + "confusion_matrix": [ + [ + 1099, + 101 + ], + [ + 124, + 1076 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8733333333333333, + "auc_roc": 0.9505517361111111, + "f1": 0.8677110530896431, + "confusion_matrix": [ + [ + 1099, + 101 + ], + [ + 203, + 997 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.92875, + "auc_roc": 0.9835652777777779, + "f1": 0.9296585767174003, + "confusion_matrix": [ + [ + 1099, + 101 + ], + [ + 70, + 1130 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.24544570325977272, + 0.18812347099322962, + 0.16036981526348326, + 0.14614522219808013, + 0.13189721391018894, + 0.1195542064115957, + 0.10563782626693999, + 0.09264731759515901, + 0.08151098894972907, + 0.07179282589781064, + 0.06718809488746855, + 0.0589830837829073, + 0.05091542243198664, + 0.04770976847546451, + 0.04968874369816924 + ], + "train_acc": [ + 0.7505787037037037, + 0.8262731481481481, + 0.8545717592592592, + 0.8723958333333334, + 0.8863425925925926, + 0.8999421296296296, + 0.9134837962962963, + 0.9256365740740741, + 0.9347800925925925, + 0.9429398148148148, + 0.9482060185185185, + 0.9564236111111111, + 0.9633680555555556, + 0.9653356481481481, + 0.9654513888888889 + ], + "train_auc": [ + 0.8410518832876086, + 0.9122108178297896, + 0.9365969543038409, + 0.9480229677497714, + 0.9581318676411752, + 0.9660167681184271, + 0.9734348743998629, + 0.9792336194701645, + 0.9840432634602194, + 0.9876275380801325, + 0.9892473100994513, + 0.9917352448416781, + 0.9936879322416554, + 0.9943118337762917, + 0.9941567358253316 + ], + "train_f1": [ + 0.8182814739860022, + 0.8771384136858476, + 0.8980320551836072, + 0.9110133580854756, + 0.9214211410738578, + 0.9310193496908039, + 0.9408272313477142, + 0.9492155080425246, + 0.9556246800803244, + 0.9612695419907298, + 0.9649857204334729, + 0.9705893840565559, + 0.975326447086338, + 0.976642620393839, + 0.9767713318547916 + ], + "val_loss": [ + 0.18931125878977278, + 0.22387202978134155, + 0.1712254531060656, + 0.15280801858752965, + 0.15293544462571543, + 0.1433148423054566, + 0.1387926299435397, + 0.1622421024988095, + 0.18698418781471748, + 0.16526137704883392, + 0.17185187203188737, + 0.15588134233257733, + 0.15483337138430214, + 0.1561354076412196, + 0.1543029025197029 + ], + "val_acc": [ + 0.8484375, + 0.73125, + 0.8677083333333333, + 0.8979166666666667, + 0.8494791666666667, + 0.8953125, + 0.8848958333333333, + 0.8604166666666667, + 0.8463541666666666, + 0.8817708333333333, + 0.8614583333333333, + 0.8911458333333333, + 0.8963541666666667, + 0.8895833333333333, + 0.8963541666666667 + ], + "val_auc": [ + 0.9208955439814814, + 0.9001779513888889, + 0.9425021701388889, + 0.9574074074074073, + 0.9547460937499999, + 0.9589749710648148, + 0.9629311342592594, + 0.9570898437500001, + 0.9581785300925926, + 0.9601280381944444, + 0.9621542245370371, + 0.9644046585648149, + 0.9662514467592591, + 0.9656597222222223, + 0.9666919849537038 + ], + "val_f1": [ + 0.8959599570968895, + 0.7873042044517725, + 0.9077705156136529, + 0.9313244569025928, + 0.8909845341380611, + 0.9278794402583423, + 0.9199565374864179, + 0.8998505231689088, + 0.8877046060144652, + 0.9171835096680043, + 0.900374531835206, + 0.9246303642264695, + 0.9281847708408517, + 0.9230210602759622, + 0.9281847708408517 + ] + }, + "test_metrics": { + "accuracy": 0.9064583333333334, + "auc_roc": 0.9700229166666666, + "f1": 0.9354421279654924, + "confusion_matrix": [ + [ + 1098, + 102 + ], + [ + 347, + 3253 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9116666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 106, + 1094 + ] + ], + "n": 1200, + "detection_rate": 0.9116666666666666, + "pairwise_auc": 0.9703850694444445, + "pairwise_f1": 0.9131886477462438 + }, + "insight": { + "accuracy": 0.8608333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 167, + 1033 + ] + ], + "n": 1200, + "detection_rate": 0.8608333333333333, + "pairwise_auc": 0.9591802083333332, + "pairwise_f1": 0.884796573875803 + }, + "text2img": { + "accuracy": 0.9383333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 74, + 1126 + ] + ], + "n": 1200, + "detection_rate": 0.9383333333333334, + "pairwise_auc": 0.9805034722222222, + "pairwise_f1": 0.9275123558484349 + }, + "wiki": { + "accuracy": 0.915, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1098, + 102 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.08499999999999996 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9133333333333333, + "auc_roc": 0.9703850694444445, + "f1": 0.9131886477462438, + "confusion_matrix": [ + [ + 1098, + 102 + ], + [ + 106, + 1094 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8879166666666667, + "auc_roc": 0.9591802083333332, + "f1": 0.884796573875803, + "confusion_matrix": [ + [ + 1098, + 102 + ], + [ + 167, + 1033 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9266666666666666, + "auc_roc": 0.9805034722222222, + "f1": 0.9275123558484349, + "confusion_matrix": [ + [ + 1098, + 102 + ], + [ + 74, + 1126 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.9005000000000001, + "std": 0.010394351096736275, + "ci_95": 0.009111050448646307, + "values": [ + 0.8841666666666667, + 0.90625, + 0.909375, + 0.89625, + 0.9064583333333334 + ] + }, + "auc_roc": { + "mean": 0.9664783333333332, + "std": 0.0039722196924161105, + "ci_95": 0.0034818040754918456, + "values": [ + 0.9598277777777778, + 0.9686325231481481, + 0.9661710648148147, + 0.9677373842592593, + 0.9700229166666666 + ] + }, + "f1": { + "mean": 0.9312858759322247, + "std": 0.00791233698716453, + "ci_95": 0.006935469158760821, + "values": [ + 0.9190212642004079, + 0.9355300859598854, + 0.938567998870216, + 0.9278679026651216, + 0.9354421279654924 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9046666666666667, + "std": 0.027167177909299976, + "ci_95": 0.02381308137231394, + "values": [ + 0.8633333333333333, + 0.915, + 0.9366666666666666, + 0.8966666666666666, + 0.9116666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9046666666666667, + "std": 0.027167177909299976, + "ci_95": 0.02381308137231394, + "values": [ + 0.8633333333333333, + 0.915, + 0.9366666666666666, + 0.8966666666666666, + 0.9116666666666666 + ] + }, + "pairwise_auc": { + "mean": 0.9680859722222221, + "std": 0.00646149662394386, + "ci_95": 0.005663751509509356, + "values": [ + 0.9568444444444445, + 0.9732482638888889, + 0.9708569444444444, + 0.9690951388888888, + 0.9703850694444445 + ] + }, + "pairwise_f1": { + "mean": 0.9034304609606174, + "std": 0.011964851865417849, + "ci_95": 0.010487655067821474, + "values": [ + 0.8828291435875586, + 0.9100704517198508, + 0.9057211925866236, + 0.9053428691628103, + 0.9131886477462438 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8504999999999999, + "std": 0.016589404248897353, + "ci_95": 0.01454125395784926, + "values": [ + 0.835, + 0.8575, + 0.8683333333333333, + 0.8308333333333333, + 0.8608333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8504999999999999, + "std": 0.016589404248897353, + "ci_95": 0.01454125395784926, + "values": [ + 0.835, + 0.8575, + 0.8683333333333333, + 0.8308333333333333, + 0.8608333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.9507795833333332, + "std": 0.004918968065477247, + "ci_95": 0.004311665613634642, + "values": [ + 0.9470201388888889, + 0.9496430555555556, + 0.9475027777777779, + 0.9505517361111111, + 0.9591802083333332 + ] + }, + "pairwise_f1": { + "mean": 0.8730469143432138, + "std": 0.008019144787617885, + "ci_95": 0.007029090323678467, + "values": [ + 0.8664072632944229, + 0.8779863481228669, + 0.8683333333333333, + 0.8677110530896431, + 0.884796573875803 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9446666666666668, + "std": 0.012591332998005671, + "ci_95": 0.011036789992263745, + "values": [ + 0.9308333333333333, + 0.9483333333333334, + 0.9641666666666666, + 0.9416666666666667, + 0.9383333333333334 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9446666666666668, + "std": 0.012591332998005671, + "ci_95": 0.011036789992263745, + "values": [ + 0.9308333333333333, + 0.9483333333333334, + 0.9641666666666666, + 0.9416666666666667, + 0.9383333333333334 + ] + }, + "pairwise_auc": { + "mean": 0.9805694444444445, + "std": 0.003146418574455627, + "ci_95": 0.0027579574807151, + "values": [ + 0.97561875, + 0.9830062500000001, + 0.9801534722222222, + 0.9835652777777779, + 0.9805034722222222 + ] + }, + "pairwise_f1": { + "mean": 0.9250385518415154, + "std": 0.004591418175544836, + "ci_95": 0.004024555476229355, + "values": [ + 0.9200988467874794, + 0.9278434569914391, + 0.920079522862823, + 0.9296585767174003, + 0.9275123558484349 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.9021666666666667, + "std": 0.019548728290550745, + "ci_95": 0.01713521584988713, + "values": [ + 0.9075, + 0.9041666666666667, + 0.8683333333333333, + 0.9158333333333334, + 0.915 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.09783333333333333, + "std": 0.019548728290550745, + "ci_95": 0.01713521584988713, + "values": [ + 0.09250000000000003, + 0.09583333333333333, + 0.1316666666666667, + 0.08416666666666661, + 0.08499999999999996 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.9034166666666665, + "std": 0.010830929220421394, + "ci_95": 0.009493728046569607, + "values": [ + 0.8854166666666666, + 0.9095833333333333, + 0.9025, + 0.90625, + 0.9133333333333333 + ] + }, + "auc_roc": { + "mean": 0.9680859722222221, + "std": 0.00646149662394386, + "ci_95": 0.005663751509509356, + "values": [ + 0.9568444444444445, + 0.9732482638888889, + 0.9708569444444444, + 0.9690951388888888, + 0.9703850694444445 + ] + }, + "f1": { + "mean": 0.9034304609606174, + "std": 0.011964851865417849, + "ci_95": 0.010487655067821474, + "values": [ + 0.8828291435875586, + 0.9100704517198508, + 0.9057211925866236, + 0.9053428691628103, + 0.9131886477462438 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8763333333333332, + "std": 0.007957133417394903, + "ci_95": 0.006974734961113443, + "values": [ + 0.87125, + 0.8808333333333334, + 0.8683333333333333, + 0.8733333333333333, + 0.8879166666666667 + ] + }, + "auc_roc": { + "mean": 0.9507795833333332, + "std": 0.004918968065477247, + "ci_95": 0.004311665613634642, + "values": [ + 0.9470201388888889, + 0.9496430555555556, + 0.9475027777777779, + 0.9505517361111111, + 0.9591802083333332 + ] + }, + "f1": { + "mean": 0.8730469143432138, + "std": 0.008019144787617885, + "ci_95": 0.007029090323678467, + "values": [ + 0.8664072632944229, + 0.8779863481228669, + 0.8683333333333333, + 0.8677110530896431, + 0.884796573875803 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.9234166666666667, + "std": 0.005395793114393207, + "ci_95": 0.0047296211969529356, + "values": [ + 0.9191666666666667, + 0.92625, + 0.91625, + 0.92875, + 0.9266666666666666 + ] + }, + "auc_roc": { + "mean": 0.9805694444444445, + "std": 0.003146418574455627, + "ci_95": 0.0027579574807151, + "values": [ + 0.97561875, + 0.9830062500000001, + 0.9801534722222222, + 0.9835652777777779, + 0.9805034722222222 + ] + }, + "f1": { + "mean": 0.9250385518415154, + "std": 0.004591418175544836, + "ci_95": 0.004024555476229355, + "values": [ + 0.9200988467874794, + 0.9278434569914391, + 0.920079522862823, + 0.9296585767174003, + 0.9275123558484349 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2d_resnet18_aug", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2d_simplecnn_aug.json b/classifier/outputs/logs/p2d_simplecnn_aug.json new file mode 100644 index 0000000..9b22c23 --- /dev/null +++ b/classifier/outputs/logs/p2d_simplecnn_aug.json @@ -0,0 +1,1902 @@ +{ + "run_name": "p2d_simplecnn_aug", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3432222083486893, + 0.33507634751774645, + 0.3293182541374807, + 0.3259058644650159, + 0.32449627405515424, + 0.32151402560649095, + 0.3206712271604273, + 0.31893986742253655, + 0.3183640861952746, + 0.31630628774011577, + 0.3165576588224482, + 0.31553808332041455, + 0.31364991190256897, + 0.3127118207790233, + 0.3142870426453926 + ], + "train_acc": [ + 0.47175925925925927, + 0.5883101851851852, + 0.5983217592592592, + 0.6283564814814815, + 0.6300925925925925, + 0.6409143518518519, + 0.6443287037037037, + 0.6449652777777778, + 0.6493055555555556, + 0.6523148148148148, + 0.6599537037037037, + 0.6554398148148148, + 0.6583912037037037, + 0.6597800925925926, + 0.6587384259259259 + ], + "train_auc": [ + 0.5713025745170325, + 0.6252053951617511, + 0.6522195162465707, + 0.6678279856824417, + 0.6717118734282121, + 0.6834617466135117, + 0.6866383298325331, + 0.6901815950788752, + 0.6914628075703018, + 0.6999451749685642, + 0.7001394783093279, + 0.7021001496770691, + 0.7075707751057384, + 0.7095949074074074, + 0.7072277413408778 + ], + "train_f1": [ + 0.5276829142088378, + 0.6807861437673876, + 0.6865091910934465, + 0.718456817185445, + 0.7196245284674094, + 0.7295707125735454, + 0.7319200907266858, + 0.7327146778198929, + 0.737707756232687, + 0.7388961321164711, + 0.7473774720550301, + 0.7430519592611773, + 0.7438934444010586, + 0.7455749340026832, + 0.7448842742807701 + ], + "val_loss": [ + 0.33837437480688093, + 0.33807249913613, + 0.32194422334432604, + 0.32031315887967743, + 0.3163623419900735, + 0.31524986525376636, + 0.3115694376329581, + 0.31178189168373743, + 0.3080384400983652, + 0.31005625476439796, + 0.3096416714290778, + 0.3075092243651549, + 0.30712711364030837, + 0.30760360732674596, + 0.30752624372641246 + ], + "val_acc": [ + 0.6161458333333333, + 0.4421875, + 0.6005208333333333, + 0.6401041666666667, + 0.6114583333333333, + 0.5817708333333333, + 0.6223958333333334, + 0.665625, + 0.6479166666666667, + 0.6046875, + 0.6119791666666666, + 0.6223958333333334, + 0.6401041666666667, + 0.6291666666666667, + 0.6286458333333333 + ], + "val_auc": [ + 0.6138527199074074, + 0.6485691550925926, + 0.6915386284722222, + 0.68408203125, + 0.7003399884259259, + 0.7102575231481483, + 0.7108159722222221, + 0.7113266782407408, + 0.7213946759259259, + 0.7176294849537037, + 0.7190118634259258, + 0.7246643518518519, + 0.721712962962963, + 0.7221448206018518, + 0.7223683449074074 + ], + "val_f1": [ + 0.7271380970011107, + 0.4413145539906103, + 0.6737558485750744, + 0.7297614391865468, + 0.6873428331936295, + 0.6448474126492703, + 0.7070707070707071, + 0.7530769230769231, + 0.7308917197452229, + 0.6768837803320562, + 0.6825734980826587, + 0.6980424822990421, + 0.7210335082761405, + 0.7052980132450332, + 0.7050062060405461 + ] + }, + "test_metrics": { + "accuracy": 0.6289583333333333, + "auc_roc": 0.7213978009259259, + "f1": 0.7043983402489626, + "confusion_matrix": [ + [ + 897, + 303 + ], + [ + 1478, + 2122 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.5691666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 517, + 683 + ] + ], + "n": 1200, + "detection_rate": 0.5691666666666667, + "pairwise_auc": 0.7173145833333333, + "pairwise_f1": 0.6248856358645929 + }, + "insight": { + "accuracy": 0.3883333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 734, + 466 + ] + ], + "n": 1200, + "detection_rate": 0.3883333333333333, + "pairwise_auc": 0.5887399305555556, + "pairwise_f1": 0.473336719146775 + }, + "text2img": { + "accuracy": 0.8108333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 227, + 973 + ] + ], + "n": 1200, + "detection_rate": 0.8108333333333333, + "pairwise_auc": 0.8581388888888889, + "pairwise_f1": 0.7859450726978998 + }, + "wiki": { + "accuracy": 0.7475, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 897, + 303 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.25249999999999995 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6583333333333333, + "auc_roc": 0.7173145833333333, + "f1": 0.6248856358645929, + "confusion_matrix": [ + [ + 897, + 303 + ], + [ + 517, + 683 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5679166666666666, + "auc_roc": 0.5887399305555556, + "f1": 0.473336719146775, + "confusion_matrix": [ + [ + 897, + 303 + ], + [ + 734, + 466 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7791666666666667, + "auc_roc": 0.8581388888888889, + "f1": 0.7859450726978998, + "confusion_matrix": [ + [ + 897, + 303 + ], + [ + 227, + 973 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.34135231695793294, + 0.33233658282293216, + 0.32671470636570893, + 0.3230978334667506, + 0.32052963414677865, + 0.32000469129394604, + 0.3171410999364323, + 0.31599997883593595, + 0.3153689634192873, + 0.3127501525260784, + 0.3129213980226605, + 0.3114956370934292, + 0.3121951187098468, + 0.30899017282106256, + 0.3110985143593064 + ], + "train_acc": [ + 0.5291666666666667, + 0.5928240740740741, + 0.6244212962962963, + 0.6361111111111111, + 0.6405671296296296, + 0.6410300925925926, + 0.65, + 0.6569444444444444, + 0.6519675925925926, + 0.656886574074074, + 0.6583912037037037, + 0.6601273148148148, + 0.6627893518518518, + 0.6655092592592593, + 0.6626157407407407 + ], + "train_auc": [ + 0.5842827021176269, + 0.6402050111454047, + 0.6614736493341336, + 0.6769979388145863, + 0.6854288033693415, + 0.6863612754343851, + 0.6953566172267946, + 0.7005833208304755, + 0.699118093064129, + 0.7068048053840877, + 0.7072928722993828, + 0.7110104898976908, + 0.710223479652492, + 0.7191083319044352, + 0.7136100751600366 + ], + "train_f1": [ + 0.6125345270978189, + 0.6816577685277351, + 0.7161228238999212, + 0.7248621685481754, + 0.7293563989716327, + 0.7291621184997599, + 0.7388149939540508, + 0.7439308855291576, + 0.7392246986384529, + 0.743699476937708, + 0.7450218133125999, + 0.7459884953072964, + 0.7497530599098132, + 0.7512908777969018, + 0.7481859018659295 + ], + "val_loss": [ + 0.3324682280421257, + 0.3229331818719705, + 0.3223130183915297, + 0.3171472112337748, + 0.32741391447683177, + 0.3114958000679811, + 0.31507122814655303, + 0.3176405022541682, + 0.30666357489923635, + 0.30443483479321004, + 0.3099006695051988, + 0.30612343152364097, + 0.3034300667544206, + 0.3031110850473245, + 0.3030962315698465 + ], + "val_acc": [ + 0.49947916666666664, + 0.6625, + 0.6666666666666666, + 0.5552083333333333, + 0.7348958333333333, + 0.6114583333333333, + 0.5604166666666667, + 0.696875, + 0.6619791666666667, + 0.6583333333333333, + 0.590625, + 0.6302083333333334, + 0.6645833333333333, + 0.6677083333333333, + 0.6526041666666667 + ], + "val_auc": [ + 0.6646889467592592, + 0.6834613715277779, + 0.6835836226851852, + 0.7176895254629629, + 0.7107602719907408, + 0.717976707175926, + 0.72703125, + 0.7082154224537037, + 0.729400318287037, + 0.7377495659722222, + 0.7337123842592592, + 0.7368851273148148, + 0.7385590277777777, + 0.740538917824074, + 0.7409114583333333 + ], + "val_f1": [ + 0.5399712781235041, + 0.757847533632287, + 0.7638376383763837, + 0.6060885608856088, + 0.8294807370184255, + 0.6841659610499576, + 0.6110599078341014, + 0.7888243831640058, + 0.7415372361608921, + 0.7384370015948963, + 0.6528268551236749, + 0.70042194092827, + 0.744241461477363, + 0.7452076677316294, + 0.7285307285307285 + ] + }, + "test_metrics": { + "accuracy": 0.6535416666666667, + "auc_roc": 0.7351962962962963, + "f1": 0.7312974632412345, + "confusion_matrix": [ + [ + 874, + 326 + ], + [ + 1337, + 2263 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.5816666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 502, + 698 + ] + ], + "n": 1200, + "detection_rate": 0.5816666666666667, + "pairwise_auc": 0.7138361111111111, + "pairwise_f1": 0.6276978417266187 + }, + "insight": { + "accuracy": 0.4625, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 645, + 555 + ] + ], + "n": 1200, + "detection_rate": 0.4625, + "pairwise_auc": 0.6235090277777777, + "pairwise_f1": 0.5333974050937049 + }, + "text2img": { + "accuracy": 0.8416666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 190, + 1010 + ] + ], + "n": 1200, + "detection_rate": 0.8416666666666667, + "pairwise_auc": 0.86824375, + "pairwise_f1": 0.7965299684542587 + }, + "wiki": { + "accuracy": 0.7283333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 874, + 326 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.2716666666666666 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.655, + "auc_roc": 0.7138361111111111, + "f1": 0.6276978417266187, + "confusion_matrix": [ + [ + 874, + 326 + ], + [ + 502, + 698 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5954166666666667, + "auc_roc": 0.6235090277777777, + "f1": 0.5333974050937049, + "confusion_matrix": [ + [ + 874, + 326 + ], + [ + 645, + 555 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.785, + "auc_roc": 0.86824375, + "f1": 0.7965299684542587, + "confusion_matrix": [ + [ + 874, + 326 + ], + [ + 190, + 1010 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3432017128776621, + 0.3361322825429616, + 0.3312958776122994, + 0.327618961284558, + 0.32551107191377215, + 0.3221943142513434, + 0.32040270804255094, + 0.3188823620478312, + 0.31841765749785633, + 0.3170403319928381, + 0.3163535770166803, + 0.3154812162673032, + 0.31375355403180477, + 0.3136919692986541, + 0.3128732605940766 + ], + "train_acc": [ + 0.6121527777777778, + 0.580787037037037, + 0.6019097222222223, + 0.6129629629629629, + 0.6235532407407407, + 0.6311342592592593, + 0.6418402777777777, + 0.6409143518518519, + 0.6494791666666667, + 0.6483796296296296, + 0.6509837962962963, + 0.6536458333333334, + 0.6546296296296297, + 0.6572337962962963, + 0.6597800925925926 + ], + "train_auc": [ + 0.5712090084876542, + 0.6187098926183128, + 0.6427650605852766, + 0.6595359207104481, + 0.6664719435871056, + 0.6794381483624827, + 0.6827242208933471, + 0.6896292902663466, + 0.6909287569301554, + 0.6965370745456104, + 0.69892219114369, + 0.699491071173411, + 0.7047945869770234, + 0.7075826349594193, + 0.7091430005429813 + ], + "train_f1": [ + 0.7228287841191067, + 0.6733405483405484, + 0.6935720967526393, + 0.7016416845110636, + 0.7130189261922619, + 0.7198980488662331, + 0.7315316878497375, + 0.7288616998033647, + 0.7379396876216847, + 0.7361931226120181, + 0.7382037591700308, + 0.7414575143634714, + 0.7423365857870651, + 0.744290463238786, + 0.7459707038845439 + ], + "val_loss": [ + 0.3381376507381598, + 0.33743989691138265, + 0.32400355438391365, + 0.32913788755734763, + 0.31807966232299806, + 0.31570037131508194, + 0.31126136556267736, + 0.3205740148822466, + 0.31344808464248974, + 0.30750460426012677, + 0.3072973035275936, + 0.3085544841984908, + 0.30584329267342886, + 0.3057563436528047, + 0.3066906807323297 + ], + "val_acc": [ + 0.6020833333333333, + 0.44114583333333335, + 0.5770833333333333, + 0.7052083333333333, + 0.6677083333333333, + 0.6604166666666667, + 0.6364583333333333, + 0.5380208333333333, + 0.590625, + 0.6546875, + 0.6338541666666667, + 0.621875, + 0.6427083333333333, + 0.6375, + 0.6270833333333333 + ], + "val_auc": [ + 0.6092418981481482, + 0.643970630787037, + 0.6784903067129631, + 0.6768164062500001, + 0.7009657118055556, + 0.7037912326388889, + 0.7130975115740741, + 0.705298755787037, + 0.7108998842592593, + 0.7225245949074073, + 0.7233926504629629, + 0.719126880787037, + 0.7292968750000001, + 0.7291876446759259, + 0.7279484953703705 + ], + "val_f1": [ + 0.6994492525570417, + 0.43615344193378874, + 0.6469565217391304, + 0.8064295485636115, + 0.759245283018868, + 0.7478731631863882, + 0.7157980456026058, + 0.5829807240244476, + 0.6591500433651344, + 0.734906037584966, + 0.7096241222635274, + 0.6959798994974874, + 0.7162944582299421, + 0.7121588089330024, + 0.6986531986531986 + ] + }, + "test_metrics": { + "accuracy": 0.6375, + "auc_roc": 0.7324938657407407, + "f1": 0.7140039447731755, + "confusion_matrix": [ + [ + 888, + 312 + ], + [ + 1428, + 2172 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.5583333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 530, + 670 + ] + ], + "n": 1200, + "detection_rate": 0.5583333333333333, + "pairwise_auc": 0.7142305555555555, + "pairwise_f1": 0.614115490375802 + }, + "insight": { + "accuracy": 0.4191666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 697, + 503 + ] + ], + "n": 1200, + "detection_rate": 0.4191666666666667, + "pairwise_auc": 0.6156399305555555, + "pairwise_f1": 0.4992555831265509 + }, + "text2img": { + "accuracy": 0.8325, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 201, + 999 + ] + ], + "n": 1200, + "detection_rate": 0.8325, + "pairwise_auc": 0.8676111111111111, + "pairwise_f1": 0.7956989247311828 + }, + "wiki": { + "accuracy": 0.74, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 888, + 312 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.26 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6491666666666667, + "auc_roc": 0.7142305555555555, + "f1": 0.614115490375802, + "confusion_matrix": [ + [ + 888, + 312 + ], + [ + 530, + 670 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5795833333333333, + "auc_roc": 0.6156399305555555, + "f1": 0.4992555831265509, + "confusion_matrix": [ + [ + 888, + 312 + ], + [ + 697, + 503 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.78625, + "auc_roc": 0.8676111111111111, + "f1": 0.7956989247311828, + "confusion_matrix": [ + [ + 888, + 312 + ], + [ + 201, + 999 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3422969259045742, + 0.3340135246239327, + 0.32988277044561176, + 0.3258236734403504, + 0.3224401587413417, + 0.3208613306559898, + 0.3194242352688754, + 0.3181941964560085, + 0.3169373148845302, + 0.3154708259911449, + 0.3140328030343409, + 0.3134019218661167, + 0.3126688904784344, + 0.3126100615494781, + 0.31205870856841406 + ], + "train_acc": [ + 0.6261574074074074, + 0.5974537037037037, + 0.6171875, + 0.6219907407407408, + 0.6372106481481481, + 0.6428819444444445, + 0.639988425925926, + 0.6538194444444444, + 0.651099537037037, + 0.6548611111111111, + 0.6539930555555555, + 0.655787037037037, + 0.6585069444444445, + 0.6561342592592593, + 0.6594907407407408 + ], + "train_auc": [ + 0.5766818397776634, + 0.6320873074559901, + 0.6497502018318473, + 0.6660870699016919, + 0.6798354088077274, + 0.6841931905864198, + 0.6877101641089391, + 0.6927077260516689, + 0.6958822462991541, + 0.70048828125, + 0.7035443279892548, + 0.7064093935756743, + 0.7093209787237083, + 0.7091885913208733, + 0.7111335448102424 + ], + "train_f1": [ + 0.7368849788204627, + 0.6911464345972826, + 0.7099320324490244, + 0.7117896223085068, + 0.7272094338801619, + 0.7315439161265063, + 0.7281150299375028, + 0.7429087158329035, + 0.7379720978747447, + 0.7429975006463846, + 0.7405736104482146, + 0.7421536327379921, + 0.745020092468565, + 0.7423690600069373, + 0.7452813852813853 + ], + "val_loss": [ + 0.33704863985379535, + 0.3310091565052668, + 0.3267402487496535, + 0.32072843462228773, + 0.31991002758344017, + 0.3109689460446437, + 0.31181026846170423, + 0.307723306491971, + 0.3066505029797554, + 0.30859205350279806, + 0.3080542877316475, + 0.30592135017116867, + 0.30444662968317665, + 0.3036535983284315, + 0.30318543675045173 + ], + "val_acc": [ + 0.6494791666666667, + 0.4942708333333333, + 0.5135416666666667, + 0.5625, + 0.69375, + 0.6572916666666667, + 0.671875, + 0.6864583333333333, + 0.6260416666666667, + 0.5895833333333333, + 0.5911458333333334, + 0.6109375, + 0.6213541666666667, + 0.6270833333333333, + 0.6328125 + ], + "val_auc": [ + 0.6172135416666666, + 0.6680613425925925, + 0.6827640335648149, + 0.6948546006944445, + 0.6994994212962964, + 0.7136349826388888, + 0.7139076967592592, + 0.7269574652777777, + 0.7284584780092592, + 0.7292078993055556, + 0.7312145543981481, + 0.7319089988425925, + 0.7353385416666667, + 0.7363563368055555, + 0.7372330729166666 + ], + "val_f1": [ + 0.7548269581056466, + 0.5233186057928326, + 0.5513928914505284, + 0.6229802513464991, + 0.7890961262553802, + 0.7419607843137255, + 0.7595419847328244, + 0.774024024024024, + 0.6988255033557047, + 0.6510186005314438, + 0.6521931767833408, + 0.6809055958991884, + 0.6938947368421052, + 0.7001675041876047, + 0.7078325735598839 + ] + }, + "test_metrics": { + "accuracy": 0.6372916666666667, + "auc_roc": 0.7464637731481482, + "f1": 0.7104606685514718, + "confusion_matrix": [ + [ + 923, + 277 + ], + [ + 1464, + 2136 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.5308333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 563, + 637 + ] + ], + "n": 1200, + "detection_rate": 0.5308333333333334, + "pairwise_auc": 0.7140843750000001, + "pairwise_f1": 0.6026490066225165 + }, + "insight": { + "accuracy": 0.42, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 696, + 504 + ] + ], + "n": 1200, + "detection_rate": 0.42, + "pairwise_auc": 0.6437659722222222, + "pairwise_f1": 0.508833922261484 + }, + "text2img": { + "accuracy": 0.8291666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 205, + 995 + ] + ], + "n": 1200, + "detection_rate": 0.8291666666666667, + "pairwise_auc": 0.8815409722222224, + "pairwise_f1": 0.8050161812297735 + }, + "wiki": { + "accuracy": 0.7691666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 923, + 277 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.23083333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.65, + "auc_roc": 0.7140843750000001, + "f1": 0.6026490066225165, + "confusion_matrix": [ + [ + 923, + 277 + ], + [ + 563, + 637 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5945833333333334, + "auc_roc": 0.6437659722222222, + "f1": 0.508833922261484, + "confusion_matrix": [ + [ + 923, + 277 + ], + [ + 696, + 504 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7991666666666667, + "auc_roc": 0.8815409722222224, + "f1": 0.8050161812297735, + "confusion_matrix": [ + [ + 923, + 277 + ], + [ + 205, + 995 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.34190545937529315, + 0.33423557830629524, + 0.3278784419651385, + 0.3254611318586049, + 0.32319095537618353, + 0.3206246926828667, + 0.32062207385897634, + 0.318214114010334, + 0.3172161552089232, + 0.3168039749893877, + 0.31658775938881767, + 0.31374884711371526, + 0.3145395788605566, + 0.31365797674214396, + 0.3130338887925501 + ], + "train_acc": [ + 0.5179976851851852, + 0.6002893518518518, + 0.6147569444444444, + 0.6287615740740741, + 0.6365740740740741, + 0.6424189814814815, + 0.6464699074074074, + 0.6509259259259259, + 0.6591435185185185, + 0.6520833333333333, + 0.6596643518518519, + 0.6600115740740741, + 0.6618055555555555, + 0.6641782407407407, + 0.6588541666666666 + ], + "train_auc": [ + 0.580956290009145, + 0.6321801411751258, + 0.6597076385316644, + 0.6695928712277093, + 0.675423060985368, + 0.687096345236054, + 0.6861537012031321, + 0.693255342292524, + 0.6985244841678098, + 0.6981359221393462, + 0.6995920138888889, + 0.7069281907293097, + 0.7061520133173297, + 0.7091995759745084, + 0.7090486754115226 + ], + "train_f1": [ + 0.5996250540787387, + 0.693226737730402, + 0.7046190708612504, + 0.7195628415300547, + 0.7272885183255168, + 0.7303277615327544, + 0.7357813243371827, + 0.7395734392539505, + 0.7449112169770463, + 0.740032863443743, + 0.7467596779055247, + 0.7460557596714934, + 0.7487100103199175, + 0.7499892292447546, + 0.7455651948724589 + ], + "val_loss": [ + 0.3369367082913717, + 0.3295597155888875, + 0.32405452529589335, + 0.31579154878854754, + 0.3234446888168653, + 0.31575660382707915, + 0.31100493085881076, + 0.30960125252604487, + 0.30768936971823374, + 0.3080344771345456, + 0.3091481186449528, + 0.3133338193098704, + 0.30498845701416333, + 0.30529720534880955, + 0.3054157773653666 + ], + "val_acc": [ + 0.5088541666666667, + 0.49166666666666664, + 0.5260416666666666, + 0.634375, + 0.5057291666666667, + 0.6807291666666667, + 0.684375, + 0.5989583333333334, + 0.6802083333333333, + 0.6005208333333333, + 0.5953125, + 0.5515625, + 0.6322916666666667, + 0.6296875, + 0.6286458333333333 + ], + "val_auc": [ + 0.625418113425926, + 0.675193142361111, + 0.6908600983796297, + 0.6994191261574073, + 0.7080750868055555, + 0.7012608506944443, + 0.7157660590277778, + 0.7237608506944445, + 0.7218901909722222, + 0.7242021122685185, + 0.7233080150462963, + 0.7265342881944444, + 0.7289518229166667, + 0.7290747974537036, + 0.7287854456018519 + ], + "val_f1": [ + 0.5692096847875743, + 0.528957528957529, + 0.5719661335841957, + 0.720763723150358, + 0.5345757724374693, + 0.7722036417688591, + 0.772351615326822, + 0.6666666666666666, + 0.7667173252279635, + 0.6701075268817205, + 0.6611426079372001, + 0.6015733456732995, + 0.708505367464905, + 0.7031315240083508, + 0.7015487651737129 + ] + }, + "test_metrics": { + "accuracy": 0.6358333333333334, + "auc_roc": 0.7374539351851852, + "f1": 0.7076923076923077, + "confusion_matrix": [ + [ + 936, + 264 + ], + [ + 1484, + 2116 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.5383333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 554, + 646 + ] + ], + "n": 1200, + "detection_rate": 0.5383333333333333, + "pairwise_auc": 0.7096381944444444, + "pairwise_f1": 0.6123222748815166 + }, + "insight": { + "accuracy": 0.4166666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 700, + 500 + ] + ], + "n": 1200, + "detection_rate": 0.4166666666666667, + "pairwise_auc": 0.6287520833333333, + "pairwise_f1": 0.5091649694501018 + }, + "text2img": { + "accuracy": 0.8083333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 230, + 970 + ] + ], + "n": 1200, + "detection_rate": 0.8083333333333333, + "pairwise_auc": 0.873971527777778, + "pairwise_f1": 0.7970419063270336 + }, + "wiki": { + "accuracy": 0.78, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 936, + 264 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.21999999999999997 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6591666666666667, + "auc_roc": 0.7096381944444444, + "f1": 0.6123222748815166, + "confusion_matrix": [ + [ + 936, + 264 + ], + [ + 554, + 646 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5983333333333334, + "auc_roc": 0.6287520833333333, + "f1": 0.5091649694501018, + "confusion_matrix": [ + [ + 936, + 264 + ], + [ + 700, + 500 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7941666666666667, + "auc_roc": 0.873971527777778, + "f1": 0.7970419063270336, + "confusion_matrix": [ + [ + 936, + 264 + ], + [ + 230, + 970 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.638625, + "std": 0.009038641967193494, + "ci_95": 0.007922719002267414, + "values": [ + 0.6289583333333333, + 0.6535416666666667, + 0.6375, + 0.6372916666666667, + 0.6358333333333334 + ] + }, + "auc_roc": { + "mean": 0.7346011342592592, + "std": 0.009055157150786027, + "ci_95": 0.00793719519895154, + "values": [ + 0.7213978009259259, + 0.7351962962962963, + 0.7324938657407407, + 0.7464637731481482, + 0.7374539351851852 + ] + }, + "f1": { + "mean": 0.7135705449014305, + "std": 0.010521238176426851, + "ci_95": 0.009222271877822893, + "values": [ + 0.7043983402489626, + 0.7312974632412345, + 0.7140039447731755, + 0.7104606685514718, + 0.7076923076923077 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.5556666666666666, + "std": 0.021109831832584545, + "ci_95": 0.018503583436729216, + "values": [ + 0.5691666666666667, + 0.5816666666666667, + 0.5583333333333333, + 0.5308333333333334, + 0.5383333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.5556666666666666, + "std": 0.021109831832584545, + "ci_95": 0.018503583436729216, + "values": [ + 0.5691666666666667, + 0.5816666666666667, + 0.5583333333333333, + 0.5308333333333334, + 0.5383333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.7138207638888889, + "std": 0.002735795909474015, + "ci_95": 0.002398030845450705, + "values": [ + 0.7173145833333333, + 0.7138361111111111, + 0.7142305555555555, + 0.7140843750000001, + 0.7096381944444444 + ] + }, + "pairwise_f1": { + "mean": 0.6163340498942093, + "std": 0.010131166540143349, + "ci_95": 0.008880359013451697, + "values": [ + 0.6248856358645929, + 0.6276978417266187, + 0.614115490375802, + 0.6026490066225165, + 0.6123222748815166 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.4213333333333333, + "std": 0.026513885251149293, + "ci_95": 0.023240445109525967, + "values": [ + 0.3883333333333333, + 0.4625, + 0.4191666666666667, + 0.42, + 0.4166666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.4213333333333333, + "std": 0.026513885251149293, + "ci_95": 0.023240445109525967, + "values": [ + 0.3883333333333333, + 0.4625, + 0.4191666666666667, + 0.42, + 0.4166666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.620081388888889, + "std": 0.020309497167347312, + "ci_95": 0.01780205917197098, + "values": [ + 0.5887399305555556, + 0.6235090277777777, + 0.6156399305555555, + 0.6437659722222222, + 0.6287520833333333 + ] + }, + "pairwise_f1": { + "mean": 0.5047977198157233, + "std": 0.021643796107226543, + "ci_95": 0.01897162375966632, + "values": [ + 0.473336719146775, + 0.5333974050937049, + 0.4992555831265509, + 0.508833922261484, + 0.5091649694501018 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.8244999999999999, + "std": 0.014392802985443043, + "ci_95": 0.012615848057987322, + "values": [ + 0.8108333333333333, + 0.8416666666666667, + 0.8325, + 0.8291666666666667, + 0.8083333333333333 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8244999999999999, + "std": 0.014392802985443043, + "ci_95": 0.012615848057987322, + "values": [ + 0.8108333333333333, + 0.8416666666666667, + 0.8325, + 0.8291666666666667, + 0.8083333333333333 + ] + }, + "pairwise_auc": { + "mean": 0.8699012500000002, + "std": 0.00863706367765329, + "ci_95": 0.007570720111617017, + "values": [ + 0.8581388888888889, + 0.86824375, + 0.8676111111111111, + 0.8815409722222224, + 0.873971527777778 + ] + }, + "pairwise_f1": { + "mean": 0.7960464106880297, + "std": 0.006779369420033433, + "ci_95": 0.005942379300169007, + "values": [ + 0.7859450726978998, + 0.7965299684542587, + 0.7956989247311828, + 0.8050161812297735, + 0.7970419063270336 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.7529999999999999, + "std": 0.021201742590855322, + "ci_95": 0.018584146768445164, + "values": [ + 0.7475, + 0.7283333333333334, + 0.74, + 0.7691666666666667, + 0.78 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.24699999999999997, + "std": 0.02120174259085532, + "ci_95": 0.01858414676844516, + "values": [ + 0.25249999999999995, + 0.2716666666666666, + 0.26, + 0.23083333333333333, + 0.21999999999999997 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.6543333333333334, + "std": 0.004617298392398352, + "ci_95": 0.004047240486498859, + "values": [ + 0.6583333333333333, + 0.655, + 0.6491666666666667, + 0.65, + 0.6591666666666667 + ] + }, + "auc_roc": { + "mean": 0.7138207638888889, + "std": 0.002735795909474015, + "ci_95": 0.002398030845450705, + "values": [ + 0.7173145833333333, + 0.7138361111111111, + 0.7142305555555555, + 0.7140843750000001, + 0.7096381944444444 + ] + }, + "f1": { + "mean": 0.6163340498942093, + "std": 0.010131166540143349, + "ci_95": 0.008880359013451697, + "values": [ + 0.6248856358645929, + 0.6276978417266187, + 0.614115490375802, + 0.6026490066225165, + 0.6123222748815166 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.5871666666666666, + "std": 0.012998397337107918, + "ci_95": 0.011393597617375616, + "values": [ + 0.5679166666666666, + 0.5954166666666667, + 0.5795833333333333, + 0.5945833333333334, + 0.5983333333333334 + ] + }, + "auc_roc": { + "mean": 0.620081388888889, + "std": 0.020309497167347312, + "ci_95": 0.01780205917197098, + "values": [ + 0.5887399305555556, + 0.6235090277777777, + 0.6156399305555555, + 0.6437659722222222, + 0.6287520833333333 + ] + }, + "f1": { + "mean": 0.5047977198157233, + "std": 0.021643796107226543, + "ci_95": 0.01897162375966632, + "values": [ + 0.473336719146775, + 0.5333974050937049, + 0.4992555831265509, + 0.508833922261484, + 0.5091649694501018 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.7887500000000001, + "std": 0.007905694150420953, + "ci_95": 0.006929646455628169, + "values": [ + 0.7791666666666667, + 0.785, + 0.78625, + 0.7991666666666667, + 0.7941666666666667 + ] + }, + "auc_roc": { + "mean": 0.8699012500000002, + "std": 0.00863706367765329, + "ci_95": 0.007570720111617017, + "values": [ + 0.8581388888888889, + 0.86824375, + 0.8676111111111111, + 0.8815409722222224, + 0.873971527777778 + ] + }, + "f1": { + "mean": 0.7960464106880297, + "std": 0.006779369420033433, + "ci_95": 0.005942379300169007, + "values": [ + 0.7859450726978998, + 0.7965299684542587, + 0.7956989247311828, + 0.8050161812297735, + 0.7970419063270336 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data", + "run_name": "p2d_simplecnn_aug", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2e_resnet18_facecrop_aug.json b/classifier/outputs/logs/p2e_resnet18_facecrop_aug.json new file mode 100644 index 0000000..a9ac325 --- /dev/null +++ b/classifier/outputs/logs/p2e_resnet18_facecrop_aug.json @@ -0,0 +1,1901 @@ +{ + "run_name": "p2e_resnet18_facecrop_aug", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2371339483945458, + 0.17965929021162017, + 0.15553104198641246, + 0.13898775200187055, + 0.1248128803205435, + 0.10731902387062156, + 0.09722404294060888, + 0.08543110447159658, + 0.07318314593371555, + 0.06297332528305964, + 0.05319999221591624, + 0.0490316048954372, + 0.04252813166736073, + 0.03890426906312954, + 0.0383156930814342 + ], + "train_acc": [ + 0.760011574074074, + 0.8401041666666667, + 0.8654513888888888, + 0.8833333333333333, + 0.9017361111111111, + 0.913599537037037, + 0.9214699074074074, + 0.9371527777777777, + 0.9464699074074074, + 0.9534722222222223, + 0.9603587962962963, + 0.9658564814814815, + 0.970949074074074, + 0.9711805555555556, + 0.9736111111111111 + ], + "train_auc": [ + 0.8536383405492685, + 0.9208042641889573, + 0.9413705007573159, + 0.9538272765917925, + 0.9635684227823502, + 0.9724931144975995, + 0.9774411026091678, + 0.982780215835048, + 0.9872993112711477, + 0.9905945466106538, + 0.993267478995199, + 0.9941282382401692, + 0.9957442951245999, + 0.9963484600051441, + 0.9964488847450845 + ], + "train_f1": [ + 0.8246437481500275, + 0.8874954191945926, + 0.9062764542266296, + 0.9194373401534527, + 0.932479720057261, + 0.9409297725024728, + 0.9463403060619242, + 0.9573649497487438, + 0.9638120574312429, + 0.9685839324788996, + 0.9733057947858618, + 0.9770445879698078, + 0.9805093958689237, + 0.980637636080871, + 0.9822981366459628 + ], + "val_loss": [ + 0.2203964632858212, + 0.17454956515381734, + 0.21384386121644638, + 0.14011552351682136, + 0.16310725559790928, + 0.14783345006095866, + 0.13699214116980632, + 0.16235242078376663, + 0.150105018048392, + 0.15297084855070958, + 0.15486972434834267, + 0.15296626980028424, + 0.15368005093381118, + 0.1526549880237629, + 0.15181722667378683 + ], + "val_acc": [ + 0.8609375, + 0.8166666666666667, + 0.8885416666666667, + 0.8796875, + 0.8427083333333333, + 0.9109375, + 0.9015625, + 0.9140625, + 0.8989583333333333, + 0.9026041666666667, + 0.8989583333333333, + 0.9166666666666666, + 0.9036458333333334, + 0.9119791666666667, + 0.9036458333333334 + ], + "val_auc": [ + 0.9226692708333334, + 0.9380533854166667, + 0.949118923611111, + 0.9588577835648148, + 0.9569683159722222, + 0.9646079282407407, + 0.9632609953703706, + 0.963666087962963, + 0.9659418402777779, + 0.9669249131944445, + 0.9672974537037038, + 0.9701721643518517, + 0.9702835648148148, + 0.9693366608796296, + 0.9704723668981481 + ], + "val_f1": [ + 0.9085929476206779, + 0.8646153846153846, + 0.9271613342409802, + 0.9160915365056302, + 0.8856926570779712, + 0.9395118500176866, + 0.93194094346417, + 0.9426086956521739, + 0.9305157593123209, + 0.933522929257021, + 0.9296081277213353, + 0.9435825105782792, + 0.9332852506310855, + 0.9397933737085857, + 0.9332852506310855 + ] + }, + "test_metrics": { + "accuracy": 0.9116666666666666, + "auc_roc": 0.9709605324074074, + "f1": 0.9392724147808651, + "confusion_matrix": [ + [ + 1097, + 103 + ], + [ + 321, + 3279 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9291666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 85, + 1115 + ] + ], + "n": 1200, + "detection_rate": 0.9291666666666667, + "pairwise_auc": 0.9761385416666668, + "pairwise_f1": 0.9222497932175352 + }, + "insight": { + "accuracy": 0.8683333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 158, + 1042 + ] + ], + "n": 1200, + "detection_rate": 0.8683333333333333, + "pairwise_auc": 0.9591041666666666, + "pairwise_f1": 0.8886993603411514 + }, + "text2img": { + "accuracy": 0.935, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 78, + 1122 + ] + ], + "n": 1200, + "detection_rate": 0.935, + "pairwise_auc": 0.9776388888888888, + "pairwise_f1": 0.925360824742268 + }, + "wiki": { + "accuracy": 0.9141666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1097, + 103 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.08583333333333332 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9216666666666666, + "auc_roc": 0.9761385416666668, + "f1": 0.9222497932175352, + "confusion_matrix": [ + [ + 1097, + 103 + ], + [ + 85, + 1115 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.89125, + "auc_roc": 0.9591041666666666, + "f1": 0.8886993603411514, + "confusion_matrix": [ + [ + 1097, + 103 + ], + [ + 158, + 1042 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9245833333333333, + "auc_roc": 0.9776388888888888, + "f1": 0.925360824742268, + "confusion_matrix": [ + [ + 1097, + 103 + ], + [ + 78, + 1122 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.2391183308429188, + 0.17975796078918158, + 0.1562604979370479, + 0.13741093776016325, + 0.1239349705270595, + 0.11139587184276294, + 0.09401690095180162, + 0.08375884603571009, + 0.07211474131585824, + 0.06346744239951173, + 0.05479857998563804, + 0.047455945289780005, + 0.04211521086248535, + 0.03309281969085094, + 0.0358316077704162 + ], + "train_acc": [ + 0.7630208333333334, + 0.835300925925926, + 0.866087962962963, + 0.8814814814814815, + 0.8994791666666667, + 0.9103009259259259, + 0.9276620370370371, + 0.936863425925926, + 0.9476851851851852, + 0.9544560185185185, + 0.960474537037037, + 0.9675347222222223, + 0.9707175925925926, + 0.9768518518518519, + 0.9752314814814815 + ], + "train_auc": [ + 0.852203378629401, + 0.9198489476165981, + 0.940851507130201, + 0.9543257923239598, + 0.9632597182927527, + 0.9705720146747828, + 0.9788218467792639, + 0.9837907682470279, + 0.9876344414437586, + 0.9905915637860082, + 0.992993014474737, + 0.9944546610653864, + 0.9956297778778006, + 0.9972157207361683, + 0.9967796389174668 + ], + "train_f1": [ + 0.8284673061617727, + 0.8838746531744737, + 0.9068587989051683, + 0.9178302038196117, + 0.9309974973185556, + 0.938623584382672, + 0.9508067689885872, + 0.9572274277649273, + 0.9646515992805192, + 0.9693189349343105, + 0.9734127447545642, + 0.9781601588352085, + 0.9803311824613232, + 0.9844636059970481, + 0.9834005584858827 + ], + "val_loss": [ + 0.19780549878875414, + 0.19643723852932454, + 0.14839341659098865, + 0.16046621485923726, + 0.16073516129205626, + 0.15357613482822974, + 0.14441466063726693, + 0.16628367458470167, + 0.152093356110466, + 0.147853947225182, + 0.15383020790371424, + 0.1582224626695582, + 0.15213602410318952, + 0.15260190927268316, + 0.15733170692498485 + ], + "val_acc": [ + 0.765625, + 0.8666666666666667, + 0.8536458333333333, + 0.9052083333333333, + 0.8328125, + 0.8463541666666666, + 0.8885416666666667, + 0.9046875, + 0.91875, + 0.9036458333333334, + 0.9151041666666667, + 0.9083333333333333, + 0.9135416666666667, + 0.915625, + 0.91875 + ], + "val_auc": [ + 0.9194936342592592, + 0.9333340567129629, + 0.9528884548611112, + 0.9567274305555555, + 0.9518489583333333, + 0.9612811053240742, + 0.9607298900462963, + 0.9619111689814815, + 0.9689366319444443, + 0.9675282118055556, + 0.9690921585648148, + 0.968629195601852, + 0.9695789930555556, + 0.9700296585648147, + 0.97015625 + ], + "val_f1": [ + 0.8195669607056937, + 0.9096045197740112, + 0.8950317519611506, + 0.9367176634214186, + 0.8776210446054137, + 0.888130451270383, + 0.9226319595083152, + 0.9354041651959054, + 0.9451862262825017, + 0.9339521599428775, + 0.9426258359732489, + 0.937721160651097, + 0.9410092395167022, + 0.942756183745583, + 0.9450704225352112 + ] + }, + "test_metrics": { + "accuracy": 0.9229166666666667, + "auc_roc": 0.9746640046296297, + "f1": 0.9477843635337285, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 242, + 3358 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9408333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 71, + 1129 + ] + ], + "n": 1200, + "detection_rate": 0.9408333333333333, + "pairwise_auc": 0.9778364583333332, + "pairwise_f1": 0.919006919006919 + }, + "insight": { + "accuracy": 0.8808333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 143, + 1057 + ] + ], + "n": 1200, + "detection_rate": 0.8808333333333334, + "pairwise_auc": 0.9577305555555554, + "pairwise_f1": 0.8863731656184486 + }, + "text2img": { + "accuracy": 0.9766666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 28, + 1172 + ] + ], + "n": 1200, + "detection_rate": 0.9766666666666667, + "pairwise_auc": 0.988425, + "pairwise_f1": 0.9376 + }, + "wiki": { + "accuracy": 0.8933333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.10666666666666669 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9170833333333334, + "auc_roc": 0.9778364583333332, + "f1": 0.919006919006919, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 71, + 1129 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8870833333333333, + "auc_roc": 0.9577305555555554, + "f1": 0.8863731656184486, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 143, + 1057 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.935, + "auc_roc": 0.988425, + "f1": 0.9376, + "confusion_matrix": [ + [ + 1072, + 128 + ], + [ + 28, + 1172 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.241080354043731, + 0.18757241626304608, + 0.16247374398840797, + 0.14176422256838392, + 0.12742253698891512, + 0.11497999549222489, + 0.10160268458917185, + 0.08682891820143494, + 0.07648260152070886, + 0.06777689960414406, + 0.05718743406727703, + 0.05286531300258098, + 0.0479770600606032, + 0.04381440916762653, + 0.038372389554408276 + ], + "train_acc": [ + 0.7534722222222222, + 0.8302083333333333, + 0.860474537037037, + 0.8782407407407408, + 0.8947337962962963, + 0.9088541666666666, + 0.9184606481481481, + 0.9329282407407408, + 0.9407407407407408, + 0.9491898148148148, + 0.9596064814814815, + 0.9638888888888889, + 0.965625, + 0.9722800925925926, + 0.9731481481481481 + ], + "train_auc": [ + 0.8482708279749658, + 0.9132211737682899, + 0.9359448588248742, + 0.9514543681412894, + 0.9614638435213763, + 0.9690645361796982, + 0.9755183506229994, + 0.9820020468964334, + 0.9861288026548926, + 0.9890546499914266, + 0.9922430573416781, + 0.9933878815157751, + 0.9945645433241883, + 0.9954342063900321, + 0.9963013778149292 + ], + "train_f1": [ + 0.8193843805647418, + 0.8801568499305612, + 0.9025740493797227, + 0.9156510583707504, + 0.927503885855486, + 0.9375470875133828, + 0.9442488030704704, + 0.9544364508393285, + 0.9598462865657595, + 0.9656655717190678, + 0.9727981293842557, + 0.9757236227824463, + 0.9769141080450836, + 0.9814161008729388, + 0.9819903741655023 + ], + "val_loss": [ + 0.18238736015434068, + 0.15811058304583034, + 0.1319792718393728, + 0.22681371789706947, + 0.13507361768667275, + 0.12749196560277293, + 0.1358266538474709, + 0.13207181595110645, + 0.13650758892763407, + 0.12482405715078736, + 0.1842558211152209, + 0.13220274263876491, + 0.13752564690075814, + 0.13722982438048348, + 0.137083099995895 + ], + "val_acc": [ + 0.8119791666666667, + 0.8895833333333333, + 0.8916666666666667, + 0.8885416666666667, + 0.8875, + 0.8869791666666667, + 0.8864583333333333, + 0.8989583333333333, + 0.9151041666666667, + 0.91875, + 0.9192708333333334, + 0.9213541666666667, + 0.9109375, + 0.9197916666666667, + 0.9203125 + ], + "val_auc": [ + 0.924947193287037, + 0.9467513020833334, + 0.9583492476851853, + 0.9346137152777776, + 0.9612058738425926, + 0.967642505787037, + 0.9648336226851852, + 0.968669704861111, + 0.9696961805555556, + 0.9740458622685184, + 0.9646144386574074, + 0.9754036458333333, + 0.9747142650462963, + 0.9735807291666667, + 0.973878761574074 + ], + "val_f1": [ + 0.8609934539853678, + 0.925035360678925, + 0.9250180245133381, + 0.9267624914442163, + 0.9211678832116789, + 0.9206581352833638, + 0.9202049780380673, + 0.9303661162957645, + 0.9421780773323873, + 0.9444444444444444, + 0.9460869565217391, + 0.9461291473421334, + 0.9382002168413445, + 0.9451566951566952, + 0.9455709711846318 + ] + }, + "test_metrics": { + "accuracy": 0.9154166666666667, + "auc_roc": 0.9729494212962962, + "f1": 0.9420827389443652, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 298, + 3302 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9358333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 77, + 1123 + ] + ], + "n": 1200, + "detection_rate": 0.9358333333333333, + "pairwise_auc": 0.9761270833333333, + "pairwise_f1": 0.9238996297819827 + }, + "insight": { + "accuracy": 0.8591666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 169, + 1031 + ] + ], + "n": 1200, + "detection_rate": 0.8591666666666666, + "pairwise_auc": 0.9557927083333334, + "pairwise_f1": 0.8815733219324497 + }, + "text2img": { + "accuracy": 0.9566666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 52, + 1148 + ] + ], + "n": 1200, + "detection_rate": 0.9566666666666667, + "pairwise_auc": 0.9869284722222222, + "pairwise_f1": 0.9348534201954397 + }, + "wiki": { + "accuracy": 0.91, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.08999999999999997 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9229166666666667, + "auc_roc": 0.9761270833333333, + "f1": 0.9238996297819827, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 77, + 1123 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8845833333333334, + "auc_roc": 0.9557927083333334, + "f1": 0.8815733219324497, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 169, + 1031 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9333333333333333, + "auc_roc": 0.9869284722222222, + "f1": 0.9348534201954397, + "confusion_matrix": [ + [ + 1092, + 108 + ], + [ + 52, + 1148 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.23917138579267042, + 0.17964389969905217, + 0.1583930908322886, + 0.13833872029488836, + 0.12386842469719272, + 0.11030089819320926, + 0.10064527483105108, + 0.08629975835189085, + 0.0727624413928155, + 0.06635727010305142, + 0.0563277630618325, + 0.04666085629379032, + 0.04453519208279128, + 0.036597319391202525, + 0.03935435012961669 + ], + "train_acc": [ + 0.7579282407407407, + 0.8363425925925926, + 0.8634259259259259, + 0.8835069444444444, + 0.898900462962963, + 0.9122106481481481, + 0.9226273148148149, + 0.9330439814814815, + 0.9443287037037037, + 0.950925925925926, + 0.960300925925926, + 0.9663194444444444, + 0.9679398148148148, + 0.9741319444444444, + 0.9736689814814815 + ], + "train_auc": [ + 0.8512407210933928, + 0.9202943976480338, + 0.9394323702560586, + 0.9535609299982853, + 0.9637053201446045, + 0.970969257258802, + 0.9764405346221994, + 0.9822673128858025, + 0.9869508834162095, + 0.9898530110453817, + 0.9924085505258345, + 0.9946581629372429, + 0.995199706361454, + 0.9965422364397576, + 0.9961478516518061 + ], + "train_f1": [ + 0.8228368133497099, + 0.8848440426744849, + 0.9047695908320555, + 0.9192377131394183, + 0.9304842624646851, + 0.9399374430850853, + 0.9474098257483381, + 0.9545793585364896, + 0.9622685911515532, + 0.9669240970434512, + 0.9732824427480916, + 0.9773417425835085, + 0.9784502878481407, + 0.9826063270944395, + 0.9823403842421891 + ], + "val_loss": [ + 0.2241378645102183, + 0.13984972628143927, + 0.14124439058747765, + 0.17640043124944593, + 0.1428463432394589, + 0.14185509137654057, + 0.13818038610431055, + 0.17534095707039038, + 0.17639769293988744, + 0.14742097199584048, + 0.1480519696798486, + 0.15402708195227507, + 0.14837629409351696, + 0.1516194027673919, + 0.1507140000835837 + ], + "val_acc": [ + 0.7421875, + 0.8651041666666667, + 0.8947916666666667, + 0.909375, + 0.8989583333333333, + 0.8854166666666666, + 0.9015625, + 0.8546875, + 0.871875, + 0.9046875, + 0.9171875, + 0.921875, + 0.9145833333333333, + 0.9145833333333333, + 0.9119791666666667 + ], + "val_auc": [ + 0.9230924479166667, + 0.9523017939814814, + 0.9572497106481482, + 0.9584809027777778, + 0.9615393518518518, + 0.961529224537037, + 0.9649168113425926, + 0.9660575810185185, + 0.9574211516203703, + 0.9681322337962963, + 0.9668706597222222, + 0.9679224537037038, + 0.9684201388888889, + 0.969301215277778, + 0.969660011574074 + ], + "val_f1": [ + 0.7952006619776583, + 0.9049541284403669, + 0.9285208775654635, + 0.9401650618982118, + 0.9308131241084165, + 0.9193548387096774, + 0.9313476207773338, + 0.8939566704675028, + 0.9094922737306843, + 0.9341963322545846, + 0.94315337861995, + 0.9465431218816821, + 0.9411764705882353, + 0.9409647228221742, + 0.9390551749008295 + ] + }, + "test_metrics": { + "accuracy": 0.9116666666666666, + "auc_roc": 0.9726403935185185, + "f1": 0.9389225007202535, + "confusion_matrix": [ + [ + 1117, + 83 + ], + [ + 341, + 3259 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9091666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 109, + 1091 + ] + ], + "n": 1200, + "detection_rate": 0.9091666666666667, + "pairwise_auc": 0.9723756944444445, + "pairwise_f1": 0.9191238416175231 + }, + "insight": { + "accuracy": 0.8658333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 161, + 1039 + ] + ], + "n": 1200, + "detection_rate": 0.8658333333333333, + "pairwise_auc": 0.9651961805555554, + "pairwise_f1": 0.8949181739879414 + }, + "text2img": { + "accuracy": 0.9408333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 71, + 1129 + ] + ], + "n": 1200, + "detection_rate": 0.9408333333333333, + "pairwise_auc": 0.9803493055555554, + "pairwise_f1": 0.9361525704809287 + }, + "wiki": { + "accuracy": 0.9308333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1117, + 83 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.06916666666666671 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.92, + "auc_roc": 0.9723756944444445, + "f1": 0.9191238416175231, + "confusion_matrix": [ + [ + 1117, + 83 + ], + [ + 109, + 1091 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.8983333333333333, + "auc_roc": 0.9651961805555554, + "f1": 0.8949181739879414, + "confusion_matrix": [ + [ + 1117, + 83 + ], + [ + 161, + 1039 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9358333333333333, + "auc_roc": 0.9803493055555554, + "f1": 0.9361525704809287, + "confusion_matrix": [ + [ + 1117, + 83 + ], + [ + 71, + 1129 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.24166752225546925, + 0.18522660414239875, + 0.16268459580562733, + 0.14325540715276644, + 0.12862643143507066, + 0.11178907558011512, + 0.09732602106890192, + 0.08520252432898377, + 0.0752523358505978, + 0.06626564090709305, + 0.054519248216550936, + 0.051981810135853096, + 0.045029509474773445, + 0.04335176301613467, + 0.03864068022467782 + ], + "train_acc": [ + 0.7587384259259259, + 0.8346643518518518, + 0.8579861111111111, + 0.880150462962963, + 0.8976851851851851, + 0.9119791666666667, + 0.9217013888888889, + 0.9351273148148148, + 0.944212962962963, + 0.9523148148148148, + 0.9605324074074074, + 0.9649884259259259, + 0.9700810185185185, + 0.9697337962962963, + 0.9737847222222222 + ], + "train_auc": [ + 0.847336542995542, + 0.9154143625685871, + 0.9352137720764746, + 0.950587795067444, + 0.960722683041838, + 0.9701250732310243, + 0.9772534079218107, + 0.9825693819301554, + 0.9864663440929355, + 0.9895381265717879, + 0.9929674014774805, + 0.9934308109710791, + 0.9949776913294468, + 0.995536444044353, + 0.9963720636145406 + ], + "train_f1": [ + 0.8245739532926573, + 0.8834210633696495, + 0.9006397279131914, + 0.9169107321965898, + 0.9296906068559612, + 0.9397170147834014, + 0.946650368676314, + 0.9558887183724866, + 0.9622316251371258, + 0.9677974050336096, + 0.9734402990887141, + 0.9764508972013546, + 0.9799043806118086, + 0.9796727428193867, + 0.9824029833352756 + ], + "val_loss": [ + 0.25112481862306596, + 0.14030918213538826, + 0.12420670604333281, + 0.1263726758149763, + 0.14944143609609456, + 0.20810104956229528, + 0.11866681970035037, + 0.11524346641575296, + 0.11781146025750786, + 0.12625389968355497, + 0.11159159503877163, + 0.1271476188674569, + 0.13850631505871813, + 0.12152798459089051, + 0.12106094028567896 + ], + "val_acc": [ + 0.7151041666666667, + 0.8859375, + 0.8770833333333333, + 0.875, + 0.884375, + 0.790625, + 0.89375, + 0.9, + 0.9125, + 0.9010416666666666, + 0.9276041666666667, + 0.9083333333333333, + 0.9015625, + 0.9276041666666667, + 0.9234375 + ], + "val_auc": [ + 0.8857732928240741, + 0.9526909722222222, + 0.964263599537037, + 0.9617975983796296, + 0.9546021412037036, + 0.9632544849537038, + 0.9700629340277779, + 0.9747837094907407, + 0.9750636574074074, + 0.9736841724537038, + 0.9787311921296294, + 0.9767194733796296, + 0.9738274016203703, + 0.9782602719907407, + 0.9777828414351852 + ], + "val_f1": [ + 0.7731231854002488, + 0.9209100758396533, + 0.9126572908956329, + 0.9118942731277533, + 0.9206576125804146, + 0.8393285371702638, + 0.925601750547046, + 0.9299781181619255, + 0.9399571122230165, + 0.9308084486525856, + 0.9506567270145545, + 0.9363241678726484, + 0.9311976701856571, + 0.95069173465768, + 0.9475187433059622 + ] + }, + "test_metrics": { + "accuracy": 0.9254166666666667, + "auc_roc": 0.9772423611111111, + "f1": 0.9490753911806543, + "confusion_matrix": [ + [ + 1106, + 94 + ], + [ + 264, + 3336 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.9191666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 97, + 1103 + ] + ], + "n": 1200, + "detection_rate": 0.9191666666666667, + "pairwise_auc": 0.9749111111111111, + "pairwise_f1": 0.9203170629954109 + }, + "insight": { + "accuracy": 0.8916666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 130, + 1070 + ] + ], + "n": 1200, + "detection_rate": 0.8916666666666667, + "pairwise_auc": 0.9674319444444444, + "pairwise_f1": 0.9052453468697124 + }, + "text2img": { + "accuracy": 0.9691666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 37, + 1163 + ] + ], + "n": 1200, + "detection_rate": 0.9691666666666666, + "pairwise_auc": 0.9893840277777778, + "pairwise_f1": 0.9466829466829467 + }, + "wiki": { + "accuracy": 0.9216666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 1106, + 94 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.07833333333333337 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.9204166666666667, + "auc_roc": 0.9749111111111111, + "f1": 0.9203170629954109, + "confusion_matrix": [ + [ + 1106, + 94 + ], + [ + 97, + 1103 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.9066666666666666, + "auc_roc": 0.9674319444444444, + "f1": 0.9052453468697124, + "confusion_matrix": [ + [ + 1106, + 94 + ], + [ + 130, + 1070 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.9454166666666667, + "auc_roc": 0.9893840277777778, + "f1": 0.9466829466829467, + "confusion_matrix": [ + [ + 1106, + 94 + ], + [ + 37, + 1163 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.9174166666666667, + "std": 0.006410440702479072, + "ci_95": 0.005618999021178086, + "values": [ + 0.9116666666666666, + 0.9229166666666667, + 0.9154166666666667, + 0.9116666666666666, + 0.9254166666666667 + ] + }, + "auc_roc": { + "mean": 0.9736913425925925, + "std": 0.0023805497994612776, + "ci_95": 0.0020866439007642124, + "values": [ + 0.9709605324074074, + 0.9746640046296297, + 0.9729494212962962, + 0.9726403935185185, + 0.9772423611111111 + ] + }, + "f1": { + "mean": 0.9434274818319734, + "std": 0.004749975490040382, + "ci_95": 0.004163537090177762, + "values": [ + 0.9392724147808651, + 0.9477843635337285, + 0.9420827389443652, + 0.9389225007202535, + 0.9490753911806543 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.9268333333333333, + "std": 0.012780193008453854, + "ci_95": 0.011202333090328382, + "values": [ + 0.9291666666666667, + 0.9408333333333333, + 0.9358333333333333, + 0.9091666666666667, + 0.9191666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9268333333333333, + "std": 0.012780193008453854, + "ci_95": 0.011202333090328382, + "values": [ + 0.9291666666666667, + 0.9408333333333333, + 0.9358333333333333, + 0.9091666666666667, + 0.9191666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.9754777777777779, + "std": 0.0020227217247155895, + "ci_95": 0.0017729937642036325, + "values": [ + 0.9761385416666668, + 0.9778364583333332, + 0.9761270833333333, + 0.9723756944444445, + 0.9749111111111111 + ] + }, + "pairwise_f1": { + "mean": 0.920919449323874, + "std": 0.0021151914201433647, + "ci_95": 0.0018540470259390336, + "values": [ + 0.9222497932175352, + 0.919006919006919, + 0.9238996297819827, + 0.9191238416175231, + 0.9203170629954109 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.8731666666666668, + "std": 0.012982359826574964, + "ci_95": 0.011379540119588928, + "values": [ + 0.8683333333333333, + 0.8808333333333334, + 0.8591666666666666, + 0.8658333333333333, + 0.8916666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.8731666666666668, + "std": 0.012982359826574964, + "ci_95": 0.011379540119588928, + "values": [ + 0.8683333333333333, + 0.8808333333333334, + 0.8591666666666666, + 0.8658333333333333, + 0.8916666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.9610511111111112, + "std": 0.005009094637947759, + "ci_95": 0.004390665037542907, + "values": [ + 0.9591041666666666, + 0.9577305555555554, + 0.9557927083333334, + 0.9651961805555554, + 0.9674319444444444 + ] + }, + "pairwise_f1": { + "mean": 0.8913618737499409, + "std": 0.009126746824678685, + "ci_95": 0.007999946314857462, + "values": [ + 0.8886993603411514, + 0.8863731656184486, + 0.8815733219324497, + 0.8949181739879414, + 0.9052453468697124 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.9556666666666667, + "std": 0.017828270309321142, + "ci_95": 0.01562716793849024, + "values": [ + 0.935, + 0.9766666666666667, + 0.9566666666666667, + 0.9408333333333333, + 0.9691666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.9556666666666667, + "std": 0.017828270309321142, + "ci_95": 0.01562716793849024, + "values": [ + 0.935, + 0.9766666666666667, + 0.9566666666666667, + 0.9408333333333333, + 0.9691666666666666 + ] + }, + "pairwise_auc": { + "mean": 0.9845451388888888, + "std": 0.005230911778120413, + "ci_95": 0.004585096333511164, + "values": [ + 0.9776388888888888, + 0.988425, + 0.9869284722222222, + 0.9803493055555554, + 0.9893840277777778 + ] + }, + "pairwise_f1": { + "mean": 0.9361299524203165, + "std": 0.007601494669431217, + "ci_95": 0.006663003854088594, + "values": [ + 0.925360824742268, + 0.9376, + 0.9348534201954397, + 0.9361525704809287, + 0.9466829466829467 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.914, + "std": 0.014011404085402865, + "ci_95": 0.012281537182110197, + "values": [ + 0.9141666666666667, + 0.8933333333333333, + 0.91, + 0.9308333333333333, + 0.9216666666666666 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.08600000000000001, + "std": 0.014011404085402865, + "ci_95": 0.012281537182110197, + "values": [ + 0.08583333333333332, + 0.10666666666666669, + 0.08999999999999997, + 0.06916666666666671, + 0.07833333333333337 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.9204166666666665, + "std": 0.0021850184336878135, + "ci_95": 0.0019152531019278984, + "values": [ + 0.9216666666666666, + 0.9170833333333334, + 0.9229166666666667, + 0.92, + 0.9204166666666667 + ] + }, + "auc_roc": { + "mean": 0.9754777777777779, + "std": 0.0020227217247155895, + "ci_95": 0.0017729937642036325, + "values": [ + 0.9761385416666668, + 0.9778364583333332, + 0.9761270833333333, + 0.9723756944444445, + 0.9749111111111111 + ] + }, + "f1": { + "mean": 0.920919449323874, + "std": 0.0021151914201433647, + "ci_95": 0.0018540470259390336, + "values": [ + 0.9222497932175352, + 0.919006919006919, + 0.9238996297819827, + 0.9191238416175231, + 0.9203170629954109 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.8935833333333333, + "std": 0.008978175699377257, + "ci_95": 0.00786971798167575, + "values": [ + 0.89125, + 0.8870833333333333, + 0.8845833333333334, + 0.8983333333333333, + 0.9066666666666666 + ] + }, + "auc_roc": { + "mean": 0.9610511111111112, + "std": 0.005009094637947759, + "ci_95": 0.004390665037542907, + "values": [ + 0.9591041666666666, + 0.9577305555555554, + 0.9557927083333334, + 0.9651961805555554, + 0.9674319444444444 + ] + }, + "f1": { + "mean": 0.8913618737499409, + "std": 0.009126746824678685, + "ci_95": 0.007999946314857462, + "values": [ + 0.8886993603411514, + 0.8863731656184486, + 0.8815733219324497, + 0.8949181739879414, + 0.9052453468697124 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.9348333333333333, + "std": 0.00742204860908063, + "ci_95": 0.006505712447107124, + "values": [ + 0.9245833333333333, + 0.935, + 0.9333333333333333, + 0.9358333333333333, + 0.9454166666666667 + ] + }, + "auc_roc": { + "mean": 0.9845451388888888, + "std": 0.005230911778120413, + "ci_95": 0.004585096333511164, + "values": [ + 0.9776388888888888, + 0.988425, + 0.9869284722222222, + 0.9803493055555554, + 0.9893840277777778 + ] + }, + "f1": { + "mean": 0.9361299524203165, + "std": 0.007601494669431217, + "ci_95": 0.006663003854088594, + "values": [ + 0.925360824742268, + 0.9376, + 0.9348534201954397, + 0.9361525704809287, + 0.9466829466829467 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data_cropped", + "run_name": "p2e_resnet18_facecrop_aug", + "backbone": "resnet18", + "pretrained": true, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } + } +} \ No newline at end of file diff --git a/classifier/outputs/logs/p2e_simplecnn_facecrop_aug.json b/classifier/outputs/logs/p2e_simplecnn_facecrop_aug.json new file mode 100644 index 0000000..c2ccbd7 --- /dev/null +++ b/classifier/outputs/logs/p2e_simplecnn_facecrop_aug.json @@ -0,0 +1,1902 @@ +{ + "run_name": "p2e_simplecnn_facecrop_aug", + "n_folds": 5, + "fold_results": [ + { + "fold": 0, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.34250954982859116, + 0.3376636618817294, + 0.3330855401025878, + 0.32984588631877193, + 0.3280518215563562, + 0.32636189808448157, + 0.326084821616058, + 0.32358511391061323, + 0.32303755029484077, + 0.32166281822654935, + 0.32160404658427943, + 0.3209127681950728, + 0.3192480807503065, + 0.3193571959932645, + 0.3194699074383135 + ], + "train_acc": [ + 0.48258101851851853, + 0.5648726851851852, + 0.569212962962963, + 0.595949074074074, + 0.595949074074074, + 0.6107060185185185, + 0.5993055555555555, + 0.6072337962962963, + 0.619386574074074, + 0.6201388888888889, + 0.6282407407407408, + 0.624537037037037, + 0.6241319444444444, + 0.6253472222222223, + 0.626099537037037 + ], + "train_auc": [ + 0.576252911379744, + 0.6063105674154092, + 0.6321536351165981, + 0.6460552858510518, + 0.6532327567729768, + 0.6600235410951074, + 0.6595649184099222, + 0.6684782861082533, + 0.6711645161751256, + 0.6771737111339735, + 0.676314568329904, + 0.6792555887774347, + 0.6846313443072702, + 0.6853407921810699, + 0.6830309517175355 + ], + "train_f1": [ + 0.5428702898921213, + 0.6559912156288603, + 0.6548910523875754, + 0.6847570886761785, + 0.6830109870153455, + 0.6995936230071897, + 0.687008407919718, + 0.6937965260545905, + 0.7078836331334666, + 0.7070166041778254, + 0.7164798305234354, + 0.7124623293742244, + 0.7110636594154545, + 0.7111626661907736, + 0.7137477293872668 + ], + "val_loss": [ + 0.3373192215959231, + 0.32989710122346877, + 0.3270208664238453, + 0.32336600745717686, + 0.3233827282985052, + 0.3197261206805706, + 0.32033544008930526, + 0.31920718351999916, + 0.3153636800746123, + 0.31680572082599007, + 0.3169862660268942, + 0.31672322303056716, + 0.3138315826654434, + 0.3151903450489044, + 0.31521817247072853 + ], + "val_acc": [ + 0.621875, + 0.6098958333333333, + 0.6223958333333334, + 0.6197916666666666, + 0.5713541666666667, + 0.5817708333333333, + 0.5510416666666667, + 0.5973958333333333, + 0.6411458333333333, + 0.5807291666666666, + 0.5869791666666667, + 0.5895833333333333, + 0.6208333333333333, + 0.6046875, + 0.6072916666666667 + ], + "val_auc": [ + 0.6182016782407407, + 0.6552770543981481, + 0.6664547164351853, + 0.6742621527777778, + 0.675212673611111, + 0.6837651909722222, + 0.6946636284722223, + 0.6847149884259259, + 0.6989597800925925, + 0.6965928819444445, + 0.6948271122685186, + 0.6962565104166667, + 0.70158203125, + 0.6985951967592593, + 0.6990458622685185 + ], + "val_f1": [ + 0.7207692307692307, + 0.6951566951566952, + 0.7128712871287128, + 0.7061191626409018, + 0.6447993094518775, + 0.6587335316617084, + 0.6038602941176471, + 0.6767043078209954, + 0.7301214257735997, + 0.6513642269380684, + 0.6586310804993543, + 0.6626712328767124, + 0.7016393442622951, + 0.6814939152328997, + 0.6839899413243923 + ] + }, + "test_metrics": { + "accuracy": 0.6302083333333334, + "auc_roc": 0.7064768518518518, + "f1": 0.7105820968530898, + "confusion_matrix": [ + [ + 846, + 354 + ], + [ + 1421, + 2179 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.5833333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 500, + 700 + ] + ], + "n": 1200, + "detection_rate": 0.5833333333333334, + "pairwise_auc": 0.6924965277777778, + "pairwise_f1": 0.6211180124223602 + }, + "insight": { + "accuracy": 0.4075, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 711, + 489 + ] + ], + "n": 1200, + "detection_rate": 0.4075, + "pairwise_auc": 0.57799375, + "pairwise_f1": 0.4787077826725404 + }, + "text2img": { + "accuracy": 0.825, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 210, + 990 + ] + ], + "n": 1200, + "detection_rate": 0.825, + "pairwise_auc": 0.8489402777777777, + "pairwise_f1": 0.7783018867924528 + }, + "wiki": { + "accuracy": 0.705, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 846, + 354 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.29500000000000004 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6441666666666667, + "auc_roc": 0.6924965277777778, + "f1": 0.6211180124223602, + "confusion_matrix": [ + [ + 846, + 354 + ], + [ + 500, + 700 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.55625, + "auc_roc": 0.57799375, + "f1": 0.4787077826725404, + "confusion_matrix": [ + [ + 846, + 354 + ], + [ + 711, + 489 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.765, + "auc_roc": 0.8489402777777777, + "f1": 0.7783018867924528, + "confusion_matrix": [ + [ + 846, + 354 + ], + [ + 210, + 990 + ] + ] + } + } + }, + { + "fold": 1, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3412294729164353, + 0.33524748757481576, + 0.33143752046205377, + 0.32817126962321774, + 0.3260459143806387, + 0.3247155515959969, + 0.3228809499906169, + 0.3218548322993296, + 0.3198599721270579, + 0.3191719652050071, + 0.31699595376849177, + 0.3166507471766737, + 0.3161918653106248, + 0.31708175101213987, + 0.3164502556677218 + ], + "train_acc": [ + 0.5309606481481481, + 0.5645254629629629, + 0.5969328703703703, + 0.6009837962962963, + 0.6148726851851852, + 0.6166087962962963, + 0.6202546296296296, + 0.6245949074074074, + 0.6263310185185185, + 0.6325810185185186, + 0.6331018518518519, + 0.6351273148148148, + 0.6360532407407408, + 0.641261574074074, + 0.640625 + ], + "train_auc": [ + 0.5839247095764746, + 0.6213177744198675, + 0.6413356177840649, + 0.65409596300583, + 0.6611723126000228, + 0.666099385216621, + 0.671488527734911, + 0.6752198359625057, + 0.6810925800897348, + 0.6843713920324646, + 0.6921578592964106, + 0.6917148026691815, + 0.6941662915809328, + 0.6911473872599452, + 0.6933547310813901 + ], + "train_f1": [ + 0.6158221548087406, + 0.6520553012438156, + 0.6871490814355657, + 0.6894004234424974, + 0.7033917190355217, + 0.7058301141157142, + 0.7093630968199132, + 0.7131296157077787, + 0.7144057676146667, + 0.7209108092663413, + 0.720162429378531, + 0.7218668666460806, + 0.7231832386988863, + 0.7287920549503435, + 0.7266965936097175 + ], + "val_loss": [ + 0.3356058232486248, + 0.33023042157292365, + 0.3237359471619129, + 0.3368230534096559, + 0.32140354240934055, + 0.3242058935264746, + 0.3183568445344766, + 0.3202626677850882, + 0.3121804813543955, + 0.31201686561107633, + 0.31169315924247104, + 0.31493208358685176, + 0.308826203395923, + 0.3088033119837443, + 0.3088999457657337 + ], + "val_acc": [ + 0.5760416666666667, + 0.6645833333333333, + 0.61875, + 0.45, + 0.6078125, + 0.5057291666666667, + 0.5583333333333333, + 0.5276041666666667, + 0.6161458333333333, + 0.5947916666666667, + 0.5927083333333333, + 0.559375, + 0.6135416666666667, + 0.5994791666666667, + 0.5921875 + ], + "val_auc": [ + 0.6347902199074074, + 0.6543974247685185, + 0.6806083622685186, + 0.6611588541666666, + 0.6805432581018519, + 0.6970847800925926, + 0.7040125868055556, + 0.7057168692129631, + 0.7132371238425927, + 0.717824074074074, + 0.7138585069444444, + 0.7133239293981481, + 0.7227611400462963, + 0.722876880787037, + 0.7238534432870372 + ], + "val_f1": [ + 0.6619601328903655, + 0.766158315177923, + 0.7026807473598701, + 0.4623217922606925, + 0.6889714993804213, + 0.5399903053805138, + 0.6148955495004541, + 0.5711583924349882, + 0.6894226717235566, + 0.6620330147697654, + 0.6614718614718614, + 0.6147540983606558, + 0.6845238095238095, + 0.6686772942697113, + 0.6597131681877445 + ] + }, + "test_metrics": { + "accuracy": 0.601875, + "auc_roc": 0.7163495370370371, + "f1": 0.6754966887417219, + "confusion_matrix": [ + [ + 900, + 300 + ], + [ + 1611, + 1989 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.49833333333333335, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 602, + 598 + ] + ], + "n": 1200, + "detection_rate": 0.49833333333333335, + "pairwise_auc": 0.687120138888889, + "pairwise_f1": 0.5700667302192565 + }, + "insight": { + "accuracy": 0.37666666666666665, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 748, + 452 + ] + ], + "n": 1200, + "detection_rate": 0.37666666666666665, + "pairwise_auc": 0.6108368055555555, + "pairwise_f1": 0.46311475409836067 + }, + "text2img": { + "accuracy": 0.7825, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 261, + 939 + ] + ], + "n": 1200, + "detection_rate": 0.7825, + "pairwise_auc": 0.8510916666666667, + "pairwise_f1": 0.7699876998769988 + }, + "wiki": { + "accuracy": 0.75, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 900, + 300 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.25 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6241666666666666, + "auc_roc": 0.687120138888889, + "f1": 0.5700667302192565, + "confusion_matrix": [ + [ + 900, + 300 + ], + [ + 602, + 598 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5633333333333334, + "auc_roc": 0.6108368055555555, + "f1": 0.46311475409836067, + "confusion_matrix": [ + [ + 900, + 300 + ], + [ + 748, + 452 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.76625, + "auc_roc": 0.8510916666666667, + "f1": 0.7699876998769988, + "confusion_matrix": [ + [ + 900, + 300 + ], + [ + 261, + 939 + ] + ] + } + } + }, + { + "fold": 2, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3428048950654489, + 0.3364038331365144, + 0.33301248649756116, + 0.33060640320181844, + 0.32781746550290675, + 0.32700540197116357, + 0.3247033803275338, + 0.3239840049158644, + 0.32147704183503434, + 0.32147702795487865, + 0.31971182058806774, + 0.3201450396743086, + 0.31812678009823514, + 0.31741044584799694, + 0.3170434526547238 + ], + "train_acc": [ + 0.6049189814814815, + 0.5714699074074074, + 0.585300925925926, + 0.5928240740740741, + 0.602025462962963, + 0.601099537037037, + 0.6172453703703704, + 0.6179398148148149, + 0.6263888888888889, + 0.6273726851851852, + 0.6274305555555556, + 0.6285879629629629, + 0.6353587962962963, + 0.6391782407407407, + 0.6419560185185185 + ], + "train_auc": [ + 0.5752403049268404, + 0.6169889278263603, + 0.6324047014317558, + 0.6435954646776405, + 0.655721450617284, + 0.6569104009487883, + 0.6655561181841564, + 0.6684251132401692, + 0.6779166130829904, + 0.6753714241826703, + 0.682090522476566, + 0.6820929337419983, + 0.6889940379229538, + 0.6927793674268404, + 0.6938219539466164 + ], + "train_f1": [ + 0.7128496319663512, + 0.6623193032058006, + 0.6755410667391107, + 0.6811961939284096, + 0.6900437192950827, + 0.6893225762834092, + 0.7064358632933866, + 0.7063689734922611, + 0.7138805176387165, + 0.7161059917993033, + 0.7152839200424553, + 0.7179892784954741, + 0.7228014605604681, + 0.7252214534396897, + 0.7295300546448087 + ], + "val_loss": [ + 0.3408234044909477, + 0.3308235712349415, + 0.3284244440495968, + 0.3261048227548599, + 0.3263484557469686, + 0.3201168750723203, + 0.3191030167043209, + 0.3180046024421851, + 0.3167616233229637, + 0.31409477355579535, + 0.3121136459211508, + 0.3125723938147227, + 0.31143277063965796, + 0.3118800289928913, + 0.3120772957801819 + ], + "val_acc": [ + 0.6494791666666667, + 0.4973958333333333, + 0.5734375, + 0.5869791666666667, + 0.5458333333333333, + 0.6354166666666666, + 0.5671875, + 0.5473958333333333, + 0.5588541666666667, + 0.6848958333333334, + 0.5963541666666666, + 0.5963541666666666, + 0.6088541666666667, + 0.6010416666666667, + 0.5932291666666667 + ], + "val_auc": [ + 0.6018995949074074, + 0.6708622685185184, + 0.6592180266203704, + 0.6655295138888888, + 0.6767860243055557, + 0.6874037905092593, + 0.6909114583333333, + 0.6967375578703704, + 0.7030490451388889, + 0.7084816261574074, + 0.7108716724537038, + 0.7109700520833333, + 0.7125651041666667, + 0.7128096064814815, + 0.7133203124999999 + ], + "val_f1": [ + 0.7590404582885786, + 0.5294978059483179, + 0.6492505353319058, + 0.6672261854804867, + 0.604355716878403, + 0.7231012658227848, + 0.6340819022457067, + 0.6062528318985048, + 0.618983355825461, + 0.7745061498322773, + 0.6683782627299957, + 0.6683782627299957, + 0.6864300626304801, + 0.6754237288135593, + 0.665524625267666 + ] + }, + "test_metrics": { + "accuracy": 0.60375, + "auc_roc": 0.7129336805555555, + "f1": 0.676530612244898, + "confusion_matrix": [ + [ + 909, + 291 + ], + [ + 1611, + 1989 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.5041666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 595, + 605 + ] + ], + "n": 1200, + "detection_rate": 0.5041666666666667, + "pairwise_auc": 0.6873760416666667, + "pairwise_f1": 0.5772900763358778 + }, + "insight": { + "accuracy": 0.37666666666666665, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 748, + 452 + ] + ], + "n": 1200, + "detection_rate": 0.37666666666666665, + "pairwise_auc": 0.6129118055555555, + "pairwise_f1": 0.465259907359753 + }, + "text2img": { + "accuracy": 0.7766666666666666, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 268, + 932 + ] + ], + "n": 1200, + "detection_rate": 0.7766666666666666, + "pairwise_auc": 0.8385131944444445, + "pairwise_f1": 0.7692942633099463 + }, + "wiki": { + "accuracy": 0.7575, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 909, + 291 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.24250000000000005 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6308333333333334, + "auc_roc": 0.6873760416666667, + "f1": 0.5772900763358778, + "confusion_matrix": [ + [ + 909, + 291 + ], + [ + 595, + 605 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.5670833333333334, + "auc_roc": 0.6129118055555555, + "f1": 0.465259907359753, + "confusion_matrix": [ + [ + 909, + 291 + ], + [ + 748, + 452 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7670833333333333, + "auc_roc": 0.8385131944444445, + "f1": 0.7692942633099463, + "confusion_matrix": [ + [ + 909, + 291 + ], + [ + 268, + 932 + ] + ] + } + } + }, + { + "fold": 3, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3414186562652941, + 0.3347257332117469, + 0.3306057039786268, + 0.3286680884383343, + 0.3260611911338788, + 0.3245939609905084, + 0.32393084116004134, + 0.3218083945137483, + 0.3209881587160958, + 0.3199780968880212, + 0.31905122070952696, + 0.3175611142207075, + 0.318304718396178, + 0.31691962596442963, + 0.3174062580422119 + ], + "train_acc": [ + 0.6168402777777777, + 0.5803819444444445, + 0.6042824074074075, + 0.5981481481481481, + 0.6126157407407408, + 0.6152199074074074, + 0.6154513888888888, + 0.6321180555555556, + 0.6203125, + 0.6320023148148148, + 0.6344907407407407, + 0.6374421296296297, + 0.6326388888888889, + 0.6371527777777778, + 0.633912037037037 + ], + "train_auc": [ + 0.5838262585019434, + 0.6244880883487653, + 0.6438454414580476, + 0.6499886581218565, + 0.6613168099136946, + 0.6647539884116369, + 0.667136238283036, + 0.6745214531178555, + 0.6766778388631687, + 0.6812602344821674, + 0.6850776338163008, + 0.6904468699988568, + 0.6860041848851166, + 0.6929258473365341, + 0.6894789880544124 + ], + "train_f1": [ + 0.7252355064945843, + 0.670603734157089, + 0.6961699102461566, + 0.6862745098039216, + 0.7011073405965351, + 0.7056792527997875, + 0.703652499665522, + 0.7226076711611468, + 0.7095231770487449, + 0.720323701455777, + 0.7223736263736263, + 0.7249418272819071, + 0.7201304999559122, + 0.7243713733075435, + 0.7214442976662263 + ], + "val_loss": [ + 0.34170921295881274, + 0.32730790972709656, + 0.3251084687809149, + 0.3289539257685343, + 0.31907327647010486, + 0.3195965270201365, + 0.31829335962732636, + 0.31370966633160907, + 0.3131824423869451, + 0.3141396964589755, + 0.31391477808356283, + 0.31248470867673556, + 0.3123863366742929, + 0.31234221855799355, + 0.3114614337682724 + ], + "val_acc": [ + 0.6473958333333333, + 0.56875, + 0.5375, + 0.6276041666666666, + 0.6244791666666667, + 0.559375, + 0.5614583333333333, + 0.6515625, + 0.5994791666666667, + 0.6067708333333334, + 0.6119791666666666, + 0.615625, + 0.6020833333333333, + 0.5989583333333334, + 0.6135416666666667 + ], + "val_auc": [ + 0.6078537326388889, + 0.6618149594907408, + 0.6744625289351852, + 0.6531937210648149, + 0.682349537037037, + 0.6897627314814815, + 0.6956286168981481, + 0.7025361689814815, + 0.7063020833333334, + 0.7027734375, + 0.7046556712962962, + 0.7073256655092592, + 0.7095449942129629, + 0.7100730613425925, + 0.7102039930555556 + ], + "val_f1": [ + 0.7588172426077663, + 0.642795513373598, + 0.590027700831025, + 0.726786396637371, + 0.7098591549295775, + 0.6178861788617886, + 0.6210621062106211, + 0.7395873880887505, + 0.6703814830690099, + 0.6821052631578948, + 0.6836518046709129, + 0.6904362416107382, + 0.6692640692640692, + 0.6649260226283725, + 0.6853265479219678 + ] + }, + "test_metrics": { + "accuracy": 0.6058333333333333, + "auc_roc": 0.7125254629629629, + "f1": 0.6773533424283765, + "confusion_matrix": [ + [ + 922, + 278 + ], + [ + 1614, + 1986 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.52, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 576, + 624 + ] + ], + "n": 1200, + "detection_rate": 0.52, + "pairwise_auc": 0.6924263888888889, + "pairwise_f1": 0.5937202664129401 + }, + "insight": { + "accuracy": 0.3591666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 769, + 431 + ] + ], + "n": 1200, + "detection_rate": 0.3591666666666667, + "pairwise_auc": 0.5975333333333332, + "pairwise_f1": 0.45154531168150863 + }, + "text2img": { + "accuracy": 0.7758333333333334, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 269, + 931 + ] + ], + "n": 1200, + "detection_rate": 0.7758333333333334, + "pairwise_auc": 0.8476166666666666, + "pairwise_f1": 0.7729348277293483 + }, + "wiki": { + "accuracy": 0.7683333333333333, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 922, + 278 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.2316666666666667 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6441666666666667, + "auc_roc": 0.6924263888888889, + "f1": 0.5937202664129401, + "confusion_matrix": [ + [ + 922, + 278 + ], + [ + 576, + 624 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.56375, + "auc_roc": 0.5975333333333332, + "f1": 0.45154531168150863, + "confusion_matrix": [ + [ + 922, + 278 + ], + [ + 769, + 431 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7720833333333333, + "auc_roc": 0.8476166666666666, + "f1": 0.7729348277293483, + "confusion_matrix": [ + [ + 922, + 278 + ], + [ + 269, + 931 + ] + ] + } + } + }, + { + "fold": 4, + "train_size": 17280, + "val_size": 1920, + "test_size": 4800, + "history": { + "train_loss": [ + 0.3423150075254617, + 0.33536940708204555, + 0.33069259171132687, + 0.32756170426253917, + 0.3250589860810174, + 0.3227024991479185, + 0.3215038531356388, + 0.320797758963373, + 0.3207396794800405, + 0.3196071598816801, + 0.31734457018750684, + 0.3172116314371427, + 0.3159799447490109, + 0.3158282593168594, + 0.31625425983910205 + ], + "train_acc": [ + 0.5155092592592593, + 0.5817129629629629, + 0.5889467592592592, + 0.5991898148148148, + 0.617824074074074, + 0.6166087962962963, + 0.6214699074074074, + 0.6221643518518518, + 0.6228587962962963, + 0.627199074074074, + 0.6394675925925926, + 0.633275462962963, + 0.6366898148148148, + 0.6401041666666667, + 0.6359375 + ], + "train_auc": [ + 0.5777596129115227, + 0.6250315607853224, + 0.6444195369655922, + 0.6550420899777092, + 0.6667966517346822, + 0.6740149087648605, + 0.6781764403292181, + 0.6812684327846366, + 0.6819964206104252, + 0.6828543577817787, + 0.6929046371313443, + 0.6926175447245084, + 0.6971132062328531, + 0.6967731731538638, + 0.6955447763060127 + ], + "train_f1": [ + 0.5945762711864406, + 0.671842368110415, + 0.6753507929978518, + 0.687002892263196, + 0.7064627966930394, + 0.7034334571825059, + 0.7083426227315289, + 0.708383581222922, + 0.7093349984389635, + 0.7151574106827026, + 0.7254539044597215, + 0.7192912513842746, + 0.7221632147282705, + 0.7258299166776881, + 0.722655733368602 + ], + "val_loss": [ + 0.33888801087935766, + 0.33369783982634543, + 0.3261004028220971, + 0.32097681413094203, + 0.31939894606669744, + 0.31984718119104705, + 0.31838227684299153, + 0.31674387902021406, + 0.3140927150845528, + 0.3150302181641261, + 0.31202618380387626, + 0.31279333755373956, + 0.3109373077750206, + 0.31124774167935054, + 0.31136900608738266 + ], + "val_acc": [ + 0.6583333333333333, + 0.46979166666666666, + 0.6239583333333333, + 0.6494791666666667, + 0.5833333333333334, + 0.5713541666666667, + 0.525, + 0.5692708333333333, + 0.5786458333333333, + 0.5453125, + 0.6026041666666667, + 0.5677083333333334, + 0.5770833333333333, + 0.58125, + 0.5796875 + ], + "val_auc": [ + 0.6135778356481482, + 0.6564084201388889, + 0.6611682581018519, + 0.6818113425925926, + 0.6846795428240741, + 0.6832913773148147, + 0.6975173611111111, + 0.6930577256944443, + 0.6995167824074076, + 0.7053754340277779, + 0.7037883391203703, + 0.7097287326388889, + 0.7119610821759259, + 0.7098343460648149, + 0.7089539930555555 + ], + "val_f1": [ + 0.7675407512402551, + 0.491, + 0.716640502354788, + 0.7398531117124082, + 0.6630160067396799, + 0.646932646932647, + 0.5750232991612302, + 0.6387068588903452, + 0.6481078729882558, + 0.6047985513807153, + 0.6806195060694852, + 0.6314387211367674, + 0.6472632493483927, + 0.6507384882710686, + 0.6508005192557335 + ] + }, + "test_metrics": { + "accuracy": 0.6029166666666667, + "auc_roc": 0.7196997685185185, + "f1": 0.6727335164835165, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 1641, + 1959 + ] + ] + }, + "source_metrics": { + "inpainting": { + "accuracy": 0.4841666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 619, + 581 + ] + ], + "n": 1200, + "detection_rate": 0.4841666666666667, + "pairwise_auc": 0.681788888888889, + "pairwise_f1": 0.5679374389051809 + }, + "insight": { + "accuracy": 0.36833333333333335, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 758, + 442 + ] + ], + "n": 1200, + "detection_rate": 0.36833333333333335, + "pairwise_auc": 0.620317361111111, + "pairwise_f1": 0.4635553224960671 + }, + "text2img": { + "accuracy": 0.78, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 0, + 0 + ], + [ + 264, + 936 + ] + ], + "n": 1200, + "detection_rate": 0.78, + "pairwise_auc": 0.8569930555555555, + "pairwise_f1": 0.7796751353602666 + }, + "wiki": { + "accuracy": 0.7791666666666667, + "auc_roc": null, + "f1": null, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 0, + 0 + ] + ], + "n": 1200, + "false_alarm_rate": 0.22083333333333333 + } + }, + "pair_metrics": { + "wiki_vs_inpainting": { + "sources": [ + "wiki", + "inpainting" + ], + "n": 2400, + "accuracy": 0.6316666666666667, + "auc_roc": 0.681788888888889, + "f1": 0.5679374389051809, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 619, + 581 + ] + ] + }, + "wiki_vs_insight": { + "sources": [ + "wiki", + "insight" + ], + "n": 2400, + "accuracy": 0.57375, + "auc_roc": 0.620317361111111, + "f1": 0.4635553224960671, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 758, + 442 + ] + ] + }, + "wiki_vs_text2img": { + "sources": [ + "wiki", + "text2img" + ], + "n": 2400, + "accuracy": 0.7795833333333333, + "auc_roc": 0.8569930555555555, + "f1": 0.7796751353602666, + "confusion_matrix": [ + [ + 935, + 265 + ], + [ + 264, + 936 + ] + ] + } + } + } + ], + "aggregated_metrics": { + "accuracy": { + "mean": 0.6089166666666668, + "std": 0.011990845756010903, + "ci_95": 0.010510439717516853, + "values": [ + 0.6302083333333334, + 0.601875, + 0.60375, + 0.6058333333333333, + 0.6029166666666667 + ] + }, + "auc_roc": { + "mean": 0.7135970601851851, + "std": 0.004927091882425876, + "ci_95": 0.00431878645315273, + "values": [ + 0.7064768518518518, + 0.7163495370370371, + 0.7129336805555555, + 0.7125254629629629, + 0.7196997685185185 + ] + }, + "f1": { + "mean": 0.6825392513503206, + "std": 0.0157729888061219, + "ci_95": 0.013825634270102072, + "values": [ + 0.7105820968530898, + 0.6754966887417219, + 0.676530612244898, + 0.6773533424283765, + 0.6727335164835165 + ] + } + }, + "aggregated_per_source": { + "inpainting": { + "accuracy": { + "mean": 0.518, + "std": 0.03871458955541754, + "ci_95": 0.033934833955031454, + "values": [ + 0.5833333333333334, + 0.49833333333333335, + 0.5041666666666667, + 0.52, + 0.4841666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.518, + "std": 0.03871458955541754, + "ci_95": 0.033934833955031454, + "values": [ + 0.5833333333333334, + 0.49833333333333335, + 0.5041666666666667, + 0.52, + 0.4841666666666667 + ] + }, + "pairwise_auc": { + "mean": 0.6882415972222222, + "std": 0.004451438401635665, + "ci_95": 0.00390185779457446, + "values": [ + 0.6924965277777778, + 0.687120138888889, + 0.6873760416666667, + 0.6924263888888889, + 0.681788888888889 + ] + }, + "pairwise_f1": { + "mean": 0.5860265048591231, + "std": 0.022072957249148185, + "ci_95": 0.0193478000864285, + "values": [ + 0.6211180124223602, + 0.5700667302192565, + 0.5772900763358778, + 0.5937202664129401, + 0.5679374389051809 + ] + } + }, + "insight": { + "accuracy": { + "mean": 0.3776666666666667, + "std": 0.01817545658909898, + "ci_95": 0.01593149013048614, + "values": [ + 0.4075, + 0.37666666666666665, + 0.37666666666666665, + 0.3591666666666667, + 0.36833333333333335 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.3776666666666667, + "std": 0.01817545658909898, + "ci_95": 0.01593149013048614, + "values": [ + 0.4075, + 0.37666666666666665, + 0.37666666666666665, + 0.3591666666666667, + 0.36833333333333335 + ] + }, + "pairwise_auc": { + "mean": 0.603918611111111, + "std": 0.016662293946055343, + "ci_95": 0.014605144594389479, + "values": [ + 0.57799375, + 0.6108368055555555, + 0.6129118055555555, + 0.5975333333333332, + 0.620317361111111 + ] + }, + "pairwise_f1": { + "mean": 0.464436615661646, + "std": 0.009657299454525379, + "ci_95": 0.00846499619928103, + "values": [ + 0.4787077826725404, + 0.46311475409836067, + 0.465259907359753, + 0.45154531168150863, + 0.4635553224960671 + ] + } + }, + "text2img": { + "accuracy": { + "mean": 0.7879999999999999, + "std": 0.020854988745035438, + "ci_95": 0.018280203621525767, + "values": [ + 0.825, + 0.7825, + 0.7766666666666666, + 0.7758333333333334, + 0.78 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "detection_rate": { + "mean": 0.7879999999999999, + "std": 0.020854988745035438, + "ci_95": 0.018280203621525767, + "values": [ + 0.825, + 0.7825, + 0.7766666666666666, + 0.7758333333333334, + 0.78 + ] + }, + "pairwise_auc": { + "mean": 0.8486309722222222, + "std": 0.006698383530183817, + "ci_95": 0.005871392037839563, + "values": [ + 0.8489402777777777, + 0.8510916666666667, + 0.8385131944444445, + 0.8476166666666666, + 0.8569930555555555 + ] + }, + "pairwise_f1": { + "mean": 0.7740387626138026, + "std": 0.004745602719112771, + "ci_95": 0.004159704187464446, + "values": [ + 0.7783018867924528, + 0.7699876998769988, + 0.7692942633099463, + 0.7729348277293483, + 0.7796751353602666 + ] + } + }, + "wiki": { + "accuracy": { + "mean": 0.752, + "std": 0.028495857398732356, + "ci_95": 0.024977720294516705, + "values": [ + 0.705, + 0.75, + 0.7575, + 0.7683333333333333, + 0.7791666666666667 + ] + }, + "n": { + "mean": 1200.0, + "std": 0.0, + "ci_95": 0.0, + "values": [ + 1200, + 1200, + 1200, + 1200, + 1200 + ] + }, + "false_alarm_rate": { + "mean": 0.24800000000000005, + "std": 0.028495857398732356, + "ci_95": 0.024977720294516705, + "values": [ + 0.29500000000000004, + 0.25, + 0.24250000000000005, + 0.2316666666666667, + 0.22083333333333333 + ] + } + } + }, + "aggregated_pairwise": { + "wiki_vs_inpainting": { + "accuracy": { + "mean": 0.635, + "std": 0.008858454843945538, + "ci_95": 0.00776477802501641, + "values": [ + 0.6441666666666667, + 0.6241666666666666, + 0.6308333333333334, + 0.6441666666666667, + 0.6316666666666667 + ] + }, + "auc_roc": { + "mean": 0.6882415972222222, + "std": 0.004451438401635665, + "ci_95": 0.00390185779457446, + "values": [ + 0.6924965277777778, + 0.687120138888889, + 0.6873760416666667, + 0.6924263888888889, + 0.681788888888889 + ] + }, + "f1": { + "mean": 0.5860265048591231, + "std": 0.022072957249148185, + "ci_95": 0.0193478000864285, + "values": [ + 0.6211180124223602, + 0.5700667302192565, + 0.5772900763358778, + 0.5937202664129401, + 0.5679374389051809 + ] + } + }, + "wiki_vs_insight": { + "accuracy": { + "mean": 0.5648333333333333, + "std": 0.0063574103742535405, + "ci_95": 0.005572515889015771, + "values": [ + 0.55625, + 0.5633333333333334, + 0.5670833333333334, + 0.56375, + 0.57375 + ] + }, + "auc_roc": { + "mean": 0.603918611111111, + "std": 0.016662293946055343, + "ci_95": 0.014605144594389479, + "values": [ + 0.57799375, + 0.6108368055555555, + 0.6129118055555555, + 0.5975333333333332, + 0.620317361111111 + ] + }, + "f1": { + "mean": 0.464436615661646, + "std": 0.009657299454525379, + "ci_95": 0.00846499619928103, + "values": [ + 0.4787077826725404, + 0.46311475409836067, + 0.465259907359753, + 0.45154531168150863, + 0.4635553224960671 + ] + } + }, + "wiki_vs_text2img": { + "accuracy": { + "mean": 0.7699999999999999, + "std": 0.0059947894041408835, + "ci_95": 0.005254664594434154, + "values": [ + 0.765, + 0.76625, + 0.7670833333333333, + 0.7720833333333333, + 0.7795833333333333 + ] + }, + "auc_roc": { + "mean": 0.8486309722222222, + "std": 0.006698383530183817, + "ci_95": 0.005871392037839563, + "values": [ + 0.8489402777777777, + 0.8510916666666667, + 0.8385131944444445, + 0.8476166666666666, + 0.8569930555555555 + ] + }, + "f1": { + "mean": 0.7740387626138026, + "std": 0.004745602719112771, + "ci_95": 0.004159704187464446, + "values": [ + 0.7783018867924528, + 0.7699876998769988, + 0.7692942633099463, + 0.7729348277293483, + 0.7796751353602666 + ] + } + } + }, + "config": { + "seed": 42, + "cv_folds": 5, + "batch_size": 32, + "num_workers": 4, + "early_stopping_patience": 5, + "lr": 0.0001, + "weight_decay": 0.0001, + "T_max": 15, + "data_dir": "data_cropped", + "run_name": "p2e_simplecnn_facecrop_aug", + "backbone": "simple_cnn", + "cnn_preset": "medium", + "dropout": 0.0, + "epochs": 15, + "image_size": 224, + "subsample": 0.2, + "augment": { + "hflip_p": 0.5, + "rotation_degrees": 10, + "brightness": 0.2, + "contrast": 0.2, + "saturation": 0.1, + "hue": 0.02, + "grayscale_p": 0.1, + "blur_p": 0.1, + "erase_p": 0.2, + "noise_p": 0.3, + "noise_std": 0.04 + } + } +} \ No newline at end of file diff --git a/classifier/outputs/models/.gitkeep b/classifier/outputs/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold0_best.pt b/classifier/outputs/models/p1_resnet18_baseline_fold0_best.pt new file mode 100644 index 0000000..0357eaa --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ee1c88a6695b859f3a975d5ce052e8fb7906910f4d23e5578693c4ac72d4ec +size 44790030 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold0_final.pt b/classifier/outputs/models/p1_resnet18_baseline_fold0_final.pt new file mode 100644 index 0000000..027a615 --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b385422d8aed090c6bc9ec033641d5292e60cf55e810efbaaab151c6609d49 +size 44790156 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold1_best.pt b/classifier/outputs/models/p1_resnet18_baseline_fold1_best.pt new file mode 100644 index 0000000..9627db9 --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbf0b95f5ba8b0f9cd0d1cc08040e085e98367c5dac0df35838495fec19ad6d +size 44790030 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold1_final.pt b/classifier/outputs/models/p1_resnet18_baseline_fold1_final.pt new file mode 100644 index 0000000..6660793 --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:621af84b9a84167f0c68fae0a199206bd4589a894975b6a9203a0378074e396f +size 44790156 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold2_best.pt b/classifier/outputs/models/p1_resnet18_baseline_fold2_best.pt new file mode 100644 index 0000000..2200e06 --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7eb58d2f252a86c8694eb624a59b3094b5817d87149bfb0e3391a3af8aa517 +size 44790030 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold2_final.pt b/classifier/outputs/models/p1_resnet18_baseline_fold2_final.pt new file mode 100644 index 0000000..5ec3a9f --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df1d73eaa23e4bd69375a13328c4e1fa5852851fc2ade738685a16d691eb612 +size 44790156 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold3_best.pt b/classifier/outputs/models/p1_resnet18_baseline_fold3_best.pt new file mode 100644 index 0000000..6678feb --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a6b2380e69ffef66ea529810b6938a9b2409c006f0704b4633a0e31790fc478 +size 44790030 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold3_final.pt b/classifier/outputs/models/p1_resnet18_baseline_fold3_final.pt new file mode 100644 index 0000000..a9dfd2b --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6094eeec7b7e6a543fe9f8743feb4a2b7b9f4a8c9d44c8a27053fa0f49b3fdcd +size 44790156 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold4_best.pt b/classifier/outputs/models/p1_resnet18_baseline_fold4_best.pt new file mode 100644 index 0000000..16f901f --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d24765285392ed15b8dfd82ac5b46c87c53f3052b742172b6f8eee8aaf25a3 +size 44790030 diff --git a/classifier/outputs/models/p1_resnet18_baseline_fold4_final.pt b/classifier/outputs/models/p1_resnet18_baseline_fold4_final.pt new file mode 100644 index 0000000..d724780 --- /dev/null +++ b/classifier/outputs/models/p1_resnet18_baseline_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba51251743d997c242b0ebde6cc2a171663c63703beee56d0e2fe35d6698578 +size 44790156 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold0_best.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold0_best.pt new file mode 100644 index 0000000..fb43be2 --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d5a19568f8ce196e6c8fe2bec695adba2f86090c600fb54a75c1c47e47d050 +size 256078 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold0_final.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold0_final.pt new file mode 100644 index 0000000..452825a --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1007ce27e8362ca38e08e038f089c3c7f94be7d5656a1a48abd0e20b5f5e6f37 +size 256112 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold1_best.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold1_best.pt new file mode 100644 index 0000000..8f81eaa --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb5b10836dacea355e426402d20788d76ac38e45d14315b3616a076e8b257c8 +size 256078 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold1_final.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold1_final.pt new file mode 100644 index 0000000..2392346 --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442bc71972a45297b35fe4b20fed651e289494e4beaddc426a44f330cfe899cb +size 256112 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold2_best.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold2_best.pt new file mode 100644 index 0000000..80ba869 --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271728ad1bd0e4a274db072ca8ec7dd56ac135519b94e48a7233ae1a8997cf64 +size 256078 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold2_final.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold2_final.pt new file mode 100644 index 0000000..9589dd2 --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790aeb90b8177dcd65d71c84ebfcafbf037d97bf75db01cd4331d5e3c5a55ecf +size 256112 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold3_best.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold3_best.pt new file mode 100644 index 0000000..0036ee5 --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a4a01561dd5e9651df65e4e47f5575955f9231172b80711b904d3e2375bd09 +size 256078 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold3_final.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold3_final.pt new file mode 100644 index 0000000..670a8ab --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c41eb0023a38e610863fec52a37d1972849f7d47b39ec4d1d8a2611bbf2ac46 +size 256112 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold4_best.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold4_best.pt new file mode 100644 index 0000000..2778768 --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8faa6535ae5d094cbc85cff62e5da8cfdd35f2a4a8b5f986b92fa29610b98758 +size 256078 diff --git a/classifier/outputs/models/p1_simplecnn_baseline_fold4_final.pt b/classifier/outputs/models/p1_simplecnn_baseline_fold4_final.pt new file mode 100644 index 0000000..2c09c2a --- /dev/null +++ b/classifier/outputs/models/p1_simplecnn_baseline_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f97756db3766d3c147da8b02af86bd8d3d655489d11df6999adf2f80013b8a4c +size 256112 diff --git a/classifier/outputs/models/p2a_t1_original_fold0_best.pt b/classifier/outputs/models/p2a_t1_original_fold0_best.pt new file mode 100644 index 0000000..cb7e116 --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06495120c9a5dd88a2a4873e9266be5f476efc5b8fdc55d70b95b514e22c49bd +size 44789400 diff --git a/classifier/outputs/models/p2a_t1_original_fold0_final.pt b/classifier/outputs/models/p2a_t1_original_fold0_final.pt new file mode 100644 index 0000000..af6172f --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447b376e9a166a1178016bbd8f16c49ccffa59e99f1ba30275324466268662aa +size 44789526 diff --git a/classifier/outputs/models/p2a_t1_original_fold1_best.pt b/classifier/outputs/models/p2a_t1_original_fold1_best.pt new file mode 100644 index 0000000..1244f5f --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ef5b7fe8d213ce50059951dbcd6e0939f976b2c3575965d405cda0e060a3c33 +size 44789400 diff --git a/classifier/outputs/models/p2a_t1_original_fold1_final.pt b/classifier/outputs/models/p2a_t1_original_fold1_final.pt new file mode 100644 index 0000000..ef8521e --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d725f466029b29d46be45532e4b65c1384a45bf8304fe0f3df30eb83b60652d +size 44789526 diff --git a/classifier/outputs/models/p2a_t1_original_fold2_best.pt b/classifier/outputs/models/p2a_t1_original_fold2_best.pt new file mode 100644 index 0000000..9928f50 --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4393dcec4543dea50a30db94297698602a09a28228dde5c279831a810c442971 +size 44789400 diff --git a/classifier/outputs/models/p2a_t1_original_fold2_final.pt b/classifier/outputs/models/p2a_t1_original_fold2_final.pt new file mode 100644 index 0000000..e7f6956 --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da98d4c2213d08695ce6d51a6e7a89a49595aa4754a593dee85eeb0c6f77c87c +size 44789526 diff --git a/classifier/outputs/models/p2a_t1_original_fold3_best.pt b/classifier/outputs/models/p2a_t1_original_fold3_best.pt new file mode 100644 index 0000000..dde8d00 --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427262853484e8059ae64e16b22b4bc81867ce14d71ff28777e8bda17eaec505 +size 44789400 diff --git a/classifier/outputs/models/p2a_t1_original_fold3_final.pt b/classifier/outputs/models/p2a_t1_original_fold3_final.pt new file mode 100644 index 0000000..989dac5 --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff5eef349a297b30c1b088e97dbe56761eca83fc8b216527d51bf1628bf6ea2 +size 44789526 diff --git a/classifier/outputs/models/p2a_t1_original_fold4_best.pt b/classifier/outputs/models/p2a_t1_original_fold4_best.pt new file mode 100644 index 0000000..63b1b08 --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e5a4f03cdbfa1015e8943d5da392b9e4081602f0a9ca91123d2adfcfd6181b1 +size 44789400 diff --git a/classifier/outputs/models/p2a_t1_original_fold4_final.pt b/classifier/outputs/models/p2a_t1_original_fold4_final.pt new file mode 100644 index 0000000..7a554c9 --- /dev/null +++ b/classifier/outputs/models/p2a_t1_original_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2b92f7dfc86714035338c8b7b78605ddc80b995b8aa0547e69dd927cbc2215 +size 44789526 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold0_best.pt b/classifier/outputs/models/p2a_t2_real_norm_fold0_best.pt new file mode 100644 index 0000000..fc7eddf --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e847e646675b7f20fd4c1cd6719d7903beee91c384a3428df9cf227d94f7aff +size 44789526 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold0_final.pt b/classifier/outputs/models/p2a_t2_real_norm_fold0_final.pt new file mode 100644 index 0000000..e4b1519 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c82f7e3a763bc864f853cdbcc9b7325e2cf8dcaf4619495b690cf276f541dfa8 +size 44789652 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold1_best.pt b/classifier/outputs/models/p2a_t2_real_norm_fold1_best.pt new file mode 100644 index 0000000..c1769e3 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b267d20de656e35006cfe9daf00b251feeeab14146258e565b51cc3c230b83 +size 44789526 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold1_final.pt b/classifier/outputs/models/p2a_t2_real_norm_fold1_final.pt new file mode 100644 index 0000000..eea1ac8 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79901d9cdd71e845a132e521a18e4ccc6f554978e67cdcb481ccdc7b6f332e6f +size 44789652 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold2_best.pt b/classifier/outputs/models/p2a_t2_real_norm_fold2_best.pt new file mode 100644 index 0000000..e2c230e --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add2b131a0f47c4f52675509504e8a803c415d26f53ed02ad7bbdeb4ebbfd40b +size 44789526 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold2_final.pt b/classifier/outputs/models/p2a_t2_real_norm_fold2_final.pt new file mode 100644 index 0000000..77dd9b7 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a55671428d8cbf1cc53ace3b9e9fb041db648ed71403d5c9b1d68ed99ba69679 +size 44789652 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold3_best.pt b/classifier/outputs/models/p2a_t2_real_norm_fold3_best.pt new file mode 100644 index 0000000..ca2f371 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ea34fe32775e6572b1d3f5d94edd4ea318eb9a4c72ec8cd8e0577eba26430e +size 44789526 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold3_final.pt b/classifier/outputs/models/p2a_t2_real_norm_fold3_final.pt new file mode 100644 index 0000000..da9e827 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00aafe513a44be7390f52ef2f8abf8b701bb8611e3e5f860476d7d50b9cdbac1 +size 44789652 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold4_best.pt b/classifier/outputs/models/p2a_t2_real_norm_fold4_best.pt new file mode 100644 index 0000000..cbf1736 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8020e0d3edd653588e981f22b8c3640dbf2e96bae7749e9807567cb46d6852cc +size 44789526 diff --git a/classifier/outputs/models/p2a_t2_real_norm_fold4_final.pt b/classifier/outputs/models/p2a_t2_real_norm_fold4_final.pt new file mode 100644 index 0000000..2509ac0 --- /dev/null +++ b/classifier/outputs/models/p2a_t2_real_norm_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc9a889a92181c588d85bbcc0043fc4dceed3baa91cea665339e899cf4288a9 +size 44789652 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold0_best.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold0_best.pt new file mode 100644 index 0000000..248d37d --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb6ac4e08c637f92152e837e3bec1de35ef622fd2e88fed84aff704e7dbd3b7 +size 44790660 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold0_final.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold0_final.pt new file mode 100644 index 0000000..34ef53e --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99fe6e867f9c0a8473a2770d76cc75f31a0640f45c6656f84cc48bd509b42e0b +size 44790786 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold1_best.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold1_best.pt new file mode 100644 index 0000000..85bbc5d --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10bd7430de8bce226209f79899af2fa10e6aa9d18414607e0d34b07973976f3a +size 44790660 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold1_final.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold1_final.pt new file mode 100644 index 0000000..1450f46 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1674078db9d0b86336bcc33da3d598c6207bf793a482c9b9df2ec16c5dc83452 +size 44790786 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold2_best.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold2_best.pt new file mode 100644 index 0000000..798dfb8 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12c7218475c1ec1e5385ef4ca7a5a0d774b93fe7fcb78ca6711e5aa88878de9c +size 44790660 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold2_final.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold2_final.pt new file mode 100644 index 0000000..7038544 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a43bed4daf711461b8bad60c767a6973d3c0c9415d57c752ad3baf2a1bb596bb +size 44790786 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold3_best.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold3_best.pt new file mode 100644 index 0000000..87749c7 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e8728b11e34be9b21914528829c70e3ed5b9e1fb1c23240283244b1c39577ba +size 44790660 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold3_final.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold3_final.pt new file mode 100644 index 0000000..6b9a50d --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:433c18a4ad361c922402d41d617c31e479ecef4bca9b360d05a8844b9860071d +size 44790786 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold4_best.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold4_best.pt new file mode 100644 index 0000000..8ee6d52 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62f2f954fbf9aac2a099c8e80695425d04e616fc39bdbb84d483971e0e6b333 +size 44790660 diff --git a/classifier/outputs/models/p2a_t3_holdout_inpainting_fold4_final.pt b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold4_final.pt new file mode 100644 index 0000000..4b38293 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_inpainting_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:111afb239ddcd86627cd652df99bfc26eccb6d28203d3c53169a79ce5f05d9a7 +size 44790786 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold0_best.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold0_best.pt new file mode 100644 index 0000000..2e4c2e5 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2359296f1ca7ff5b876cd2c0d6f8f01b6c49d526989541095312db4e2211cb5 +size 44790282 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold0_final.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold0_final.pt new file mode 100644 index 0000000..0854f80 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed7cf9fac0e49bd824e0c39431773fbb8d658aea0c049b2f48663387994900eb +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold1_best.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold1_best.pt new file mode 100644 index 0000000..914f5ac --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e7a5612764a56be07129c7599ae6f5a9b022eb22ac22a019585421667abf2bf +size 44790282 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold1_final.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold1_final.pt new file mode 100644 index 0000000..e884a46 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56fdc30d0104ecd5137aebff49d0077f402f10deaa81c5e7216a196e70ed9f5a +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold2_best.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold2_best.pt new file mode 100644 index 0000000..c657a8c --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0250a8a85951f6058ae2419cf6d2d00748caff9d4f37647b8523d4611a0f5b8c +size 44790282 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold2_final.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold2_final.pt new file mode 100644 index 0000000..515f94a --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e572458393b42f5eecdc2733be7c2a9aab6e74e83e105b72c1d569274723251c +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold3_best.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold3_best.pt new file mode 100644 index 0000000..da7a0a1 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7dfdc40e9cbb543416cb7ccd0775580e76f3b35e1dbac5a57f65a6fc4cb93a4 +size 44790282 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold3_final.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold3_final.pt new file mode 100644 index 0000000..19d1499 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9623ec2fba74618cb1570d32f7cab7b69081e52d2d964b87805468a26a801dd5 +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold4_best.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold4_best.pt new file mode 100644 index 0000000..5b108e5 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5610d24937d6c6bf81fa9e6a2765de85e153467b52616b70514a7609eb3b2e67 +size 44790282 diff --git a/classifier/outputs/models/p2a_t3_holdout_insight_fold4_final.pt b/classifier/outputs/models/p2a_t3_holdout_insight_fold4_final.pt new file mode 100644 index 0000000..c64c208 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_insight_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c93446827aaaa4c43ecf5c2a88fab41d9ab5e30d8698565d604c6fde24c21427 +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold0_best.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold0_best.pt new file mode 100644 index 0000000..6bf2c6f --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03cea07d1508fa9ac1bb889cee1f231a6f0980d7da13409ea063e723b809b99 +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold0_final.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold0_final.pt new file mode 100644 index 0000000..96f8735 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa74d215ffc044f336d70344e8bce30618b276551aada10e507aafb2bff0332 +size 44790534 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold1_best.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold1_best.pt new file mode 100644 index 0000000..f050d58 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6b4b14dcad106e22b26457702573e69b963c53f8bc979a12c20a8ab72edc12 +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold1_final.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold1_final.pt new file mode 100644 index 0000000..8f7328c --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ddfc6f6cde2c659c106b0758bfab07755dbeb4a7d6e2f0fb0fc26fa489a479 +size 44790534 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold2_best.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold2_best.pt new file mode 100644 index 0000000..10ee1c5 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5375d9031fa1632f06fc344227471e94199d7731325c1e69c436e0fad7e36741 +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold2_final.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold2_final.pt new file mode 100644 index 0000000..5d18d3d --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea68c0cf026668fbd2893814caed13e406d313c6c418796aeba6ebde67125d0c +size 44790534 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold3_best.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold3_best.pt new file mode 100644 index 0000000..1932da1 --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb7d5ee0f647df1736d32c7119774ff4cd24e579a18f3554f2949b7275f4e3e +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold3_final.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold3_final.pt new file mode 100644 index 0000000..b3f868f --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1380b8f92e21abf9c5af2df51b454c832e342f5d530465434849075058fc910e +size 44790534 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold4_best.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold4_best.pt new file mode 100644 index 0000000..07ce65d --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b307b8e44b8260fc7be96487696d8c7695a435015fe89dbcdc432e650fdfd5ae +size 44790408 diff --git a/classifier/outputs/models/p2a_t3_holdout_text2img_fold4_final.pt b/classifier/outputs/models/p2a_t3_holdout_text2img_fold4_final.pt new file mode 100644 index 0000000..cd4648e --- /dev/null +++ b/classifier/outputs/models/p2a_t3_holdout_text2img_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7442de0b3592fb3c763c1233a67199dda33ca8836430fbf281f9acf083df1b5 +size 44790534 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold0_best.pt b/classifier/outputs/models/p2b_resnet18_224_fold0_best.pt new file mode 100644 index 0000000..f389b57 --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae4104184da467eed8a9dcd64233c26746ab3a01e816ca1b5d4ed5f2cb82c361 +size 44789526 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold0_final.pt b/classifier/outputs/models/p2b_resnet18_224_fold0_final.pt new file mode 100644 index 0000000..a4df601 --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e6da95dfe024e5c99d5c017aa2ec044a88344ae3fa260825a70799cbbee1c1 +size 44789652 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold1_best.pt b/classifier/outputs/models/p2b_resnet18_224_fold1_best.pt new file mode 100644 index 0000000..2d83705 --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:059b3124231acdfc5c48086ebe042e69f562c57ccea59d32d7adf4af48988fab +size 44789526 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold1_final.pt b/classifier/outputs/models/p2b_resnet18_224_fold1_final.pt new file mode 100644 index 0000000..be3c357 --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2ab306ec680b6883dc80c3ae6933d3a789b49f4f3defbdeb0835b9ef4e59a9 +size 44789652 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold2_best.pt b/classifier/outputs/models/p2b_resnet18_224_fold2_best.pt new file mode 100644 index 0000000..bec189f --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec687bc31af92124d2edba3ad7d0d0aa502e4a679972621f59b22ef64bc47ad +size 44789526 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold2_final.pt b/classifier/outputs/models/p2b_resnet18_224_fold2_final.pt new file mode 100644 index 0000000..7a756fd --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2177a203a72694a07d9fed3fced83072c3dd01516666d8e46d37db7d05c8fd87 +size 44789652 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold3_best.pt b/classifier/outputs/models/p2b_resnet18_224_fold3_best.pt new file mode 100644 index 0000000..68c25ce --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bcf7d9c9ecd3a9249bf1cdfd8e5de3dd7af1d36ffd0dc54ad6a4ee026eeec2 +size 44789526 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold3_final.pt b/classifier/outputs/models/p2b_resnet18_224_fold3_final.pt new file mode 100644 index 0000000..07a92c0 --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544ab220e44e4433a3b4df05479761c1f77c51569540263499064d9cfaf36e50 +size 44789652 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold4_best.pt b/classifier/outputs/models/p2b_resnet18_224_fold4_best.pt new file mode 100644 index 0000000..fefd416 --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:537666c92c242b7807e5eb7832b774c1e4c72aa314da8da70f90ea666fb92814 +size 44789526 diff --git a/classifier/outputs/models/p2b_resnet18_224_fold4_final.pt b/classifier/outputs/models/p2b_resnet18_224_fold4_final.pt new file mode 100644 index 0000000..2cb6dbf --- /dev/null +++ b/classifier/outputs/models/p2b_resnet18_224_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677095ea3519bdc8f453a9e586260fac596b195ca9a985c42ef170242db5f669 +size 44789652 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold0_best.pt b/classifier/outputs/models/p2b_simplecnn_224_fold0_best.pt new file mode 100644 index 0000000..600f5e5 --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f004970fc306fdf171d73930a2567a1a825bfa355ba8033c7e36e28f07c49d +size 255942 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold0_final.pt b/classifier/outputs/models/p2b_simplecnn_224_fold0_final.pt new file mode 100644 index 0000000..d934796 --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8526067196024f1f27752455c0691e1243067d008f7adcaa4e598a7d2d0cead3 +size 255976 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold1_best.pt b/classifier/outputs/models/p2b_simplecnn_224_fold1_best.pt new file mode 100644 index 0000000..4db48fc --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b53fb40c23f6d75090bfaf3c1add96b342ceff95a3a625a27743607ba4d944 +size 255942 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold1_final.pt b/classifier/outputs/models/p2b_simplecnn_224_fold1_final.pt new file mode 100644 index 0000000..bf2df1d --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393f25174c5e700922510c0ae1cfdd444db4d9a52fcd3ea7ec0934c8d097dd29 +size 255976 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold2_best.pt b/classifier/outputs/models/p2b_simplecnn_224_fold2_best.pt new file mode 100644 index 0000000..715ce59 --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9888b07ee842c3f82c5c209ca22eba404f6d378082c86428b9da13f6b934ef05 +size 255942 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold2_final.pt b/classifier/outputs/models/p2b_simplecnn_224_fold2_final.pt new file mode 100644 index 0000000..30681ca --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0583f6e8921cad35a3fe4f2c5e02ae0afbd50e7b5a04066ada56dc73efc4594 +size 255976 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold3_best.pt b/classifier/outputs/models/p2b_simplecnn_224_fold3_best.pt new file mode 100644 index 0000000..4fe5a78 --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e2373e6da83b26e1ff8ec48255c99166411537d2d9674efb7553bf40c75fe85 +size 255942 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold3_final.pt b/classifier/outputs/models/p2b_simplecnn_224_fold3_final.pt new file mode 100644 index 0000000..6d89177 --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99edd5e58da0e81df6705e52812cac16e1b0b0fab32e94deb1186e182f0a3701 +size 255976 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold4_best.pt b/classifier/outputs/models/p2b_simplecnn_224_fold4_best.pt new file mode 100644 index 0000000..77463ab --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b11573d6b43c3272ece0a0f58bcdb0496cb570268e612728f059d582e04be4 +size 255942 diff --git a/classifier/outputs/models/p2b_simplecnn_224_fold4_final.pt b/classifier/outputs/models/p2b_simplecnn_224_fold4_final.pt new file mode 100644 index 0000000..bb3d4d2 --- /dev/null +++ b/classifier/outputs/models/p2b_simplecnn_224_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea18ed50ba6c9bb7711c615fe545e56a5f775809c1688657d4b57d9796eead2c +size 255976 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold0_best.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold0_best.pt new file mode 100644 index 0000000..082a605 --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60a66a733b7bb2c112448149b9f9d6e8c1cda11ba276ccfe30ebacc985568283 +size 44790156 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold0_final.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold0_final.pt new file mode 100644 index 0000000..696934a --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016429ce922e21e0010a050f4f6d35b51dcb33c83b7204aca0a2f0c5c3ae6cc5 +size 44790282 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold1_best.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold1_best.pt new file mode 100644 index 0000000..1741ef4 --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6144195967f34043e3fc10e03663faf479e2785356bd640d37b671c5167a1408 +size 44790156 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold1_final.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold1_final.pt new file mode 100644 index 0000000..9894de2 --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e8d1538df32ad00571742acb7cde881bf0378fe6c568c78da449b60878f837d +size 44790282 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold2_best.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold2_best.pt new file mode 100644 index 0000000..8672d19 --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac4c7163a4152c8d8292d98e85cc14cc5b45e72fea32b6e1a03d2a31515267c +size 44790156 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold2_final.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold2_final.pt new file mode 100644 index 0000000..3752426 --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943d187e0a34f06bcb17dfc888a3981c811dabcc970c67298c362cb2d095d8d6 +size 44790282 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold3_best.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold3_best.pt new file mode 100644 index 0000000..4683833 --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f46bb1be9583a78e5240f9e8fe84894df82b110c0853c63b06ce53c08173455 +size 44790156 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold3_final.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold3_final.pt new file mode 100644 index 0000000..2f5d95d --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:487e7e3e83d445c54b1384dbfebcae2fa0c5042f82ab8f936356591c923d8a11 +size 44790282 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold4_best.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold4_best.pt new file mode 100644 index 0000000..92a9327 --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb69837ee9aa14b6f9276178baeb3a0a3db7399bd2b36ae163c8c0dcd6a9cd29 +size 44790156 diff --git a/classifier/outputs/models/p2c_resnet18_facecrop_fold4_final.pt b/classifier/outputs/models/p2c_resnet18_facecrop_fold4_final.pt new file mode 100644 index 0000000..9d93a2b --- /dev/null +++ b/classifier/outputs/models/p2c_resnet18_facecrop_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd936e8bbe29385d987d6d1852d7e950ac275e6c860bf764eebbe2a72960f034 +size 44790282 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold0_best.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold0_best.pt new file mode 100644 index 0000000..9f58201 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c849a6f10dbc608cfefc52ee19c64eb61cb05ce430365fcd0aa9650ccb024915 +size 256112 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold0_final.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold0_final.pt new file mode 100644 index 0000000..e0d1c04 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540f6cda3df41eed20fa4528ead0d80e613ad689961d3f73f5f4ada67867cadf +size 256146 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold1_best.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold1_best.pt new file mode 100644 index 0000000..1c86bb6 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497507f094310be932a9aa541a3144a4490a1c71e5045bb8dc87ed6d0b579c69 +size 256112 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold1_final.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold1_final.pt new file mode 100644 index 0000000..2a195c6 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c02e4b0d9601153d8a666cbb380208306ae98069070e04148d6578e2ff78fdb1 +size 256146 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold2_best.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold2_best.pt new file mode 100644 index 0000000..0f34862 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee452eef651213be1180da057f2249d7a33d3663faf0b829f463ded220679b5 +size 256112 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold2_final.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold2_final.pt new file mode 100644 index 0000000..ac47621 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57026c0641354530f35b792b20f89207ca2b06952e2bf8aa3c11ac6b3799272a +size 256146 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold3_best.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold3_best.pt new file mode 100644 index 0000000..1d5b1db --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6666b15fde3010ca2fe70b2e20ae15a5b7cb1f8803422044c843acaf8513e62e +size 256112 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold3_final.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold3_final.pt new file mode 100644 index 0000000..355c642 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ddc112a11bf00b68c9064c79a97c009510351916bac143e35a7c523118edc74 +size 256146 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold4_best.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold4_best.pt new file mode 100644 index 0000000..761eaa5 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b076d7aa1f82921b7b8c5cad00087d341ff16ca741498252eca9eabea254d69a +size 256112 diff --git a/classifier/outputs/models/p2c_simplecnn_facecrop_fold4_final.pt b/classifier/outputs/models/p2c_simplecnn_facecrop_fold4_final.pt new file mode 100644 index 0000000..f963e81 --- /dev/null +++ b/classifier/outputs/models/p2c_simplecnn_facecrop_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df2e1ac4a8378715e5da4933166120cd8c1abc385249482d64235e00a30509a +size 256146 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold0_best.pt b/classifier/outputs/models/p2d_resnet18_aug_fold0_best.pt new file mode 100644 index 0000000..cb09e6a --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ada51e3daa262427fc6520c5082cba53228eee83457816f23d1baae55be2d1 +size 44789526 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold0_final.pt b/classifier/outputs/models/p2d_resnet18_aug_fold0_final.pt new file mode 100644 index 0000000..3e980cf --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7beeafd54858069acc9e82f9afff2576baa637c60188a4a9152b767f548848bc +size 44789652 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold1_best.pt b/classifier/outputs/models/p2d_resnet18_aug_fold1_best.pt new file mode 100644 index 0000000..b477ee0 --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2386b92c2ddedabe2385409eaf5c6826bdabbedf60e69431949262e5eee62ac8 +size 44789526 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold1_final.pt b/classifier/outputs/models/p2d_resnet18_aug_fold1_final.pt new file mode 100644 index 0000000..04145b8 --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e47ac473a32cdafd1afb8983c2c1c12bb048b9a6f4a9c93795ceeaa6b6f22a2 +size 44789652 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold2_best.pt b/classifier/outputs/models/p2d_resnet18_aug_fold2_best.pt new file mode 100644 index 0000000..a5546bf --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe12bd48a51a8fc8155f48871beba94151249c610e030b1ae6f9730e12627154 +size 44789526 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold2_final.pt b/classifier/outputs/models/p2d_resnet18_aug_fold2_final.pt new file mode 100644 index 0000000..f9c03d1 --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed65488f4b7f3d12d912cedc14106e6f38ecb3ed2466feac77b52066d77e9cd +size 44789652 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold3_best.pt b/classifier/outputs/models/p2d_resnet18_aug_fold3_best.pt new file mode 100644 index 0000000..aaf54e8 --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:134c2a527e7b90928f78982bc9c05f83bbe8e9ed2ca38930c70c8828e53b97c7 +size 44789526 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold3_final.pt b/classifier/outputs/models/p2d_resnet18_aug_fold3_final.pt new file mode 100644 index 0000000..0b6ebde --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572d37742ebb4e3efbda18dc36d2407ddc6f102fd2e47cfad318a804873818ff +size 44789652 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold4_best.pt b/classifier/outputs/models/p2d_resnet18_aug_fold4_best.pt new file mode 100644 index 0000000..f360396 --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ae1f2519d08250737d59fd9f5bf9dc0b07c96cc039bbc746d5101924611178 +size 44789526 diff --git a/classifier/outputs/models/p2d_resnet18_aug_fold4_final.pt b/classifier/outputs/models/p2d_resnet18_aug_fold4_final.pt new file mode 100644 index 0000000..bc8f8b5 --- /dev/null +++ b/classifier/outputs/models/p2d_resnet18_aug_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a428f203839d6956d926dea7bb8569e088e4a56dd4ee45179bb60142f498e2 +size 44789652 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold0_best.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold0_best.pt new file mode 100644 index 0000000..a01b634 --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb4e02011bceaa4d1be2ad90502d96e20b276c87c15dffb8d65ed656a73f237 +size 255942 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold0_final.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold0_final.pt new file mode 100644 index 0000000..010329a --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4cc7e137782b66e2eb5c5a6075e30404329515bf5c0ebe3364b4a10a4a4939 +size 255976 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold1_best.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold1_best.pt new file mode 100644 index 0000000..34a6cc6 --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1abae27d1f82ff3b781fe8174e1bc03e150900a4df2608cd491e1aa60c5bf0 +size 255942 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold1_final.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold1_final.pt new file mode 100644 index 0000000..34f493c --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f2bd92ad52f28e42ef1f1827e98c7b8473d6c7e7c3b5d7e3afdfaf535718b9 +size 255976 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold2_best.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold2_best.pt new file mode 100644 index 0000000..d6befd5 --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136b050cc1144bf3c2a2701e3989ca579f82373bcf84d318020cdaf2464f504e +size 255942 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold2_final.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold2_final.pt new file mode 100644 index 0000000..2de7283 --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522b03c3e99e63eda01305c16fc0bfef8ec1f5ae92eb269d2bcb43cb77667b43 +size 255976 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold3_best.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold3_best.pt new file mode 100644 index 0000000..2050ecc --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cda4d608072941879f3e786156c294096fbfc2c8a42f96c54af3a3a05c0a2af +size 255942 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold3_final.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold3_final.pt new file mode 100644 index 0000000..11104cd --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13fc6b2fe849b29d5c9ca7b9b63976336005e94dcb4ad0cb0f9f7058de72e110 +size 255976 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold4_best.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold4_best.pt new file mode 100644 index 0000000..cdaa4df --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa28a1381cfc0968a9cfe282f19b1823fbfd267bf053d4e557eea4c60d6a778 +size 255942 diff --git a/classifier/outputs/models/p2d_simplecnn_aug_fold4_final.pt b/classifier/outputs/models/p2d_simplecnn_aug_fold4_final.pt new file mode 100644 index 0000000..f205ff2 --- /dev/null +++ b/classifier/outputs/models/p2d_simplecnn_aug_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:681225710eb713b65ce2fc9620c1e03e4540a6423fa147c002b6eb0f331ffd36 +size 255976 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold0_best.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold0_best.pt new file mode 100644 index 0000000..c83902d --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5fc1bf1eed2a269c456ba27e1b1e47815e8f240a8db1c68c06409a4149e2c9a +size 44790660 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold0_final.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold0_final.pt new file mode 100644 index 0000000..6190a43 --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94438ddd1adadbff47e4065ea6533fc4a81347b9231d3b64ec8956b18b97e66c +size 44790786 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold1_best.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold1_best.pt new file mode 100644 index 0000000..be51e13 --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8aed252d0c33ee8e17bbd0b7fe44a6f0af473e68789c233488c0d5118806547 +size 44790660 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold1_final.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold1_final.pt new file mode 100644 index 0000000..486c3d2 --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b68b1a91b45e91e68e619bb25fd2fd6dd4bd43a5a7d217d7d1ded861b8d69c +size 44790786 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold2_best.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold2_best.pt new file mode 100644 index 0000000..f9a7d97 --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1def3abb9ee3aa019bd0d6983136eda1100808acdc1e153c674e2ab8f7880507 +size 44790660 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold2_final.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold2_final.pt new file mode 100644 index 0000000..6c24831 --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0643ba2c7586033ac9dd7577d83fceb0d7e4395c6c9a239664e10ac8c5235198 +size 44790786 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold3_best.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold3_best.pt new file mode 100644 index 0000000..dad03c8 --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:760f6724c6ad041ad37f48028453145421d986a721769a44bab32c448feaa7e4 +size 44790660 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold3_final.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold3_final.pt new file mode 100644 index 0000000..9a88cc0 --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff2fabeb940bc80a1e2fc07ea1700b2b5acdf9b3c7826e109830c5b18a402ed +size 44790786 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold4_best.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold4_best.pt new file mode 100644 index 0000000..98556fd --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286e1ed4800a12f19205eb6cf3afeacec3841aa59c1c0930fcca29dd25546b03 +size 44790660 diff --git a/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold4_final.pt b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold4_final.pt new file mode 100644 index 0000000..fcc03bc --- /dev/null +++ b/classifier/outputs/models/p2e_resnet18_facecrop_aug_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43c45579f698bfb5d23fb24fd2aa27b76bdd0d12a5462f0de315d7460361daa2 +size 44790786 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold0_best.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold0_best.pt new file mode 100644 index 0000000..5c21127 --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold0_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3a3e378123e70ed1e35186849441f8ae8cf79d4a0e5143232712542eb2a347 +size 256248 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold0_final.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold0_final.pt new file mode 100644 index 0000000..9ec1d6f --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold0_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd078a4d36b4985350549b88f1193917baefca59ff3471be8f8c747dcc2f9de +size 256282 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold1_best.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold1_best.pt new file mode 100644 index 0000000..0c1fe25 --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold1_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723a47d04dbb2369da6b580251ab0a9167fafd45b74a578f6a6f670ff4b95769 +size 256248 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold1_final.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold1_final.pt new file mode 100644 index 0000000..394619b --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold1_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c40e73a3bad0757f6c281fdcddd87a89f3665ba8cf8cb8151a9b20cef19ddd +size 256282 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold2_best.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold2_best.pt new file mode 100644 index 0000000..657fb93 --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold2_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:580173475a5c8e57cf2a03b8afdef4ed93ebbf8a95d5d6e5be538e5dbef0524e +size 256248 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold2_final.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold2_final.pt new file mode 100644 index 0000000..062325c --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold2_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6364bfc5c9bd2814d83010ce15378fb19fcf59541be4747a98afa6219ec41783 +size 256282 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold3_best.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold3_best.pt new file mode 100644 index 0000000..1e4a887 --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold3_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5587bb7dbe692279b93fa0dfe1aabca3353cf359d247d5ac8cf30e07e0a69162 +size 256248 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold3_final.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold3_final.pt new file mode 100644 index 0000000..028ffd9 --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold3_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf2cc821b33ef7ba47cf7f6f4734ffebcc71f836106f0441d4e747c6768c84b0 +size 256282 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold4_best.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold4_best.pt new file mode 100644 index 0000000..235bd5e --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold4_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0fd8da39558618c928d73ff0163d7e3f683cd4227a2b576239f47b90cd0ff9 +size 256248 diff --git a/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold4_final.pt b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold4_final.pt new file mode 100644 index 0000000..d08ac81 --- /dev/null +++ b/classifier/outputs/models/p2e_simplecnn_facecrop_aug_fold4_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725b2d15ac3b49ce46c5bd818534ab0a2d3da1e6f2da0c3806e7d458ee21cfc4 +size 256282 diff --git a/classifier/outputs/pipeline/.gitkeep b/classifier/outputs/pipeline/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/classifier/outputs/pipeline/20260428T225925.003648+0000.json b/classifier/outputs/pipeline/20260428T225925.003648+0000.json new file mode 100644 index 0000000..de7a1d4 --- /dev/null +++ b/classifier/outputs/pipeline/20260428T225925.003648+0000.json @@ -0,0 +1,24 @@ +{ + "created_at": "2026-04-28T22:59:25.003648+00:00", + "config_paths": [ + "classifier/configs/phase2/p2a_t1_original.json", + "classifier/configs/phase2/p2a_t2_real_norm.json", + "classifier/configs/phase2/p2a_t3_holdout_inpainting.json", + "classifier/configs/phase2/p2a_t3_holdout_insight.json", + "classifier/configs/phase2/p2a_t3_holdout_text2img.json", + "classifier/configs/phase2/p2b_resnet18_224.json", + "classifier/configs/phase2/p2b_simplecnn_224.json", + "classifier/configs/phase2/p2c_resnet18_facecrop.json", + "classifier/configs/phase2/p2c_simplecnn_facecrop.json", + "classifier/configs/phase2/p2d_resnet18_aug.json", + "classifier/configs/phase2/p2d_simplecnn_aug.json", + "classifier/configs/phase2/p2e_resnet18_facecrop_aug.json", + "classifier/configs/phase2/p2e_simplecnn_facecrop_aug.json" + ], + "instance_id": 35781518, + "offer_id": 35775446, + "ssh_host": "ssh1.vast.ai", + "ssh_port": 21518, + "status": "cancelled", + "remote_workspace": "/workspace/DRL_PROJ" +} \ No newline at end of file diff --git a/classifier/outputs/pipeline/20260429T080055.565968+0000.json b/classifier/outputs/pipeline/20260429T080055.565968+0000.json new file mode 100644 index 0000000..cd88264 --- /dev/null +++ b/classifier/outputs/pipeline/20260429T080055.565968+0000.json @@ -0,0 +1,12 @@ +{ + "created_at": "2026-04-29T08:00:55.565968+00:00", + "config_paths": [ + "classifier/configs/phase2/p2e_simplecnn_facecrop_aug.json" + ], + "instance_id": 35812907, + "offer_id": 31977988, + "ssh_host": "ssh6.vast.ai", + "ssh_port": 12906, + "status": "completed", + "remote_workspace": "/workspace/DRL_PROJ" +} \ No newline at end of file diff --git a/classifier/outputs/pipeline/20260429T093529.509392+0000.json b/classifier/outputs/pipeline/20260429T093529.509392+0000.json new file mode 100644 index 0000000..45806fa --- /dev/null +++ b/classifier/outputs/pipeline/20260429T093529.509392+0000.json @@ -0,0 +1,13 @@ +{ + "created_at": "2026-04-29T09:35:29.509392+00:00", + "config_paths": [ + "classifier/configs/phase1/p1_resnet18_baseline.json", + "classifier/configs/phase1/p1_simplecnn_baseline.json" + ], + "instance_id": 35818211, + "offer_id": 31962198, + "ssh_host": "ssh7.vast.ai", + "ssh_port": 18210, + "status": "completed", + "remote_workspace": "/workspace/DRL_PROJ" +} \ No newline at end of file diff --git a/classifier/run.py b/classifier/run.py new file mode 100644 index 0000000..8ffc73a --- /dev/null +++ b/classifier/run.py @@ -0,0 +1,120 @@ +""" +Train a classifier with 5-fold stratified group cross-validation from a config file. + +Usage: + python run.py configs/phase1/p1_simplecnn_baseline.json + python run.py configs/phase1/p1_resnet18_baseline.json --data-dir /mnt/data/DFF --output-root /mnt/results +""" +import argparse +import json +import sys +import warnings +from pathlib import Path + +# PIL warns on corrupt EXIF metadata in some JPEGs — benign, not actionable. +warnings.filterwarnings("ignore", message="Corrupt EXIF data", category=UserWarning) + + +def parse_args(argv=None): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("config_path", help="Path to the JSON experiment config.") + parser.add_argument("--data-dir", default=None, help="Override cfg['data_dir'] for this run.") + parser.add_argument("--output-root", default="classifier/outputs", help="Directory where models/logs are written. Default: classifier/outputs") + parser.add_argument("--use-gpu", action="store_true", help="Use GPU for training.") + return parser.parse_args(argv) + + +# ── Training entrypoint ───────────────────────────────────────────────────── + +def main(config_path, *, data_dir_override=None, output_root="classifier/outputs", use_gpu=False): + import numpy as np + import torch + + from src.models import get_model + from src.data import DFFDataset, apply_subsample, build_transforms, get_splits + from src.training import train_classifier_cv + from src.utils import load_config + + # Load merged config (supports extends + shared defaults). + cfg = load_config(config_path) + + # Set seeds and optional cuDNN determinism for reproducible runs. + seed = cfg.get("seed", 42) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + deterministic = cfg.get("deterministic", False) + torch.backends.cudnn.deterministic = deterministic + torch.backends.cudnn.benchmark = not deterministic + + run_name = cfg["run_name"] + device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu" + if use_gpu and not torch.cuda.is_available(): + print("Warning: --use-gpu specified but CUDA not available, falling back to CPU") + + # Resolve runtime paths. + data_dir = data_dir_override or cfg.get("data_dir", "data") + output_root = Path(output_root) + models_dir = output_root / "models" + logs_dir = output_root / "logs" + + print(f"Run: {run_name}") + print(f"Device: {device}") + print(f"Data dir: {data_dir}") + print(f"Output root: {output_root}") + + # Build raw dataset once, then derive fold-specific transformed subsets. + raw_ds = DFFDataset(data_dir, sources=cfg.get("dataset_sources")) + + # Apply deterministic subsample (if configured) before split generation. + sampled = apply_subsample(raw_ds, cfg) + if sampled is not None: + n_samples, total = sampled + print(f"Subsampled to {n_samples}/{total} samples") + + # Create grouped CV folds and a transform builder callable for train/eval. + splits = get_splits(raw_ds, cfg) + transform_builder = build_transforms(raw_ds, cfg, augment=cfg.get("augment")) + + print(f"\nCV Split sizes:") + for fold_idx, (train_idx, val_idx, test_idx) in enumerate(splits): + print(f" Fold {fold_idx}: Train={len(train_idx)}, Val={len(val_idx)}, Test={len(test_idx)}") + + # Train across folds, save checkpoints, and collect aggregate metrics. + logs_dir.mkdir(parents=True, exist_ok=True) + results = train_classifier_cv( + lambda: get_model(cfg), + raw_ds, + splits, + epochs=cfg["epochs"], + batch_size=cfg["batch_size"], + lr=cfg["lr"], + weight_decay=cfg.get("weight_decay", 1e-4), + device=device, + save_dir=models_dir, + run_name=run_name, + early_stopping_patience=cfg.get("early_stopping_patience", 0), + num_workers=cfg.get("num_workers", 4), + transform_builder=transform_builder, + T_max=cfg.get("T_max", cfg["epochs"]), + normalization=cfg.get("normalization"), + logs_dir=logs_dir, + ) + + # Persist metrics + config snapshot as the canonical run artifact. + results["config"] = cfg + out = logs_dir / f"{run_name}.json" + with open(out, "w") as f: + json.dump(results, f, indent=2) + print(f"\nSaved results to {out}") + + +if __name__ == "__main__": + args = parse_args(sys.argv[1:]) + main( + args.config_path, + data_dir_override=args.data_dir, + output_root=args.output_root, + use_gpu=args.use_gpu, + ) diff --git a/classifier/src/data/__init__.py b/classifier/src/data/__init__.py new file mode 100644 index 0000000..74cc3f8 --- /dev/null +++ b/classifier/src/data/__init__.py @@ -0,0 +1,4 @@ +from src.data.dataset import DFFDataset, PathDataset, SOURCES, get_source_name +from src.data.splits import TransformSubset, apply_subsample, build_transforms, get_splits + +__all__ = ["DFFDataset", "PathDataset", "SOURCES", "TransformSubset", "apply_subsample", "build_transforms", "get_source_name", "get_splits"] diff --git a/classifier/src/data/dataset.py b/classifier/src/data/dataset.py new file mode 100644 index 0000000..6b3ed89 --- /dev/null +++ b/classifier/src/data/dataset.py @@ -0,0 +1,82 @@ +from collections import Counter +from pathlib import Path + +from PIL import Image +from torch.utils.data import Dataset + +# One real source (wiki) and three fake sources; 0 = real, 1 = fake +# The same identity basename appears in every source - splitting must happen +# at the identity level to prevent leakage (see splits.py), e.g: +# data_dir/SOURCE/identity/BASENAME.jpg -> same BASENAME for all SOURCEs go into same split +SOURCES = { + "wiki": 0, + "inpainting": 1, + "text2img": 1, + "insight": 1, +} + + +# Extracts source name from path assuming data_dir/source/identity/image.jpg layout +def get_source_name(path: Path) -> str: + return Path(path).parent.parent.name + + +# Walks data_dir/source/identity/*.jpg and collects (path, label) pairs +class DFFDataset(Dataset): + def __init__(self, data_dir, sources=None, transform=None): + self.transform = transform + self.samples = [] + + data_dir = Path(data_dir) + if not data_dir.exists(): + raise FileNotFoundError( + f"Dataset root not found: {data_dir}. Expected a directory containing " + "wiki/, inpainting/, text2img/, and insight/." + ) + if sources is None: + sources = list(SOURCES.keys()) + + for source in sources: + label = SOURCES[source] + source_dir = data_dir / source + if not source_dir.exists(): + raise FileNotFoundError( + f"Missing source directory: {source_dir}. Check `data_dir` in the config." + ) + for subdir in sorted(source_dir.iterdir()): + if subdir.is_dir(): + for img_path in sorted(subdir.glob("*.jpg")): + self.samples.append((img_path, label)) + + def __len__(self): + return len(self.samples) + + def __getitem__(self, idx): + path, label = self.samples[idx] + image = Image.open(path).convert("RGB") + if self.transform: + image = self.transform(image) + return image, label + + # Useful for quickly verifying class balance before training + def label_counts(self): + return Counter(label for _, label in self.samples) + + +# Wraps a list of prediction records as a Dataset for re-scoring via DataLoader. +class PathDataset(Dataset): + def __init__(self, records, image_size, preprocess=None): + from src.preprocessing import get_transforms + self.records = records + self.transform = get_transforms(train=False, image_size=image_size) + self.preprocess = preprocess + + def __len__(self): + return len(self.records) + + def __getitem__(self, idx): + record = self.records[idx] + image = Image.open(record["path"]).convert("RGB") + if self.preprocess is not None: + image = self.preprocess(image) + return self.transform(image), record["label"], idx diff --git a/classifier/src/data/splits.py b/classifier/src/data/splits.py new file mode 100644 index 0000000..f756c68 --- /dev/null +++ b/classifier/src/data/splits.py @@ -0,0 +1,116 @@ +import random +from typing import Callable, List, Tuple + +from torch.utils.data import Dataset, Subset + +from src.data.dataset import get_source_name +from src.preprocessing import get_transforms +from src.utils import create_group_kfold_splits, get_basename + + +# Defined at module level so DataLoader workers (num_workers > 0) can serialize it safely +class TransformSubset(Dataset): + def __init__(self, subset, transform): + self.subset, self.transform = subset, transform + + def __len__(self): + return len(self.subset) + + def __getitem__(self, idx): + img, label = self.subset[idx] + return self.transform(img), label + + +# ── Splitting ────────────────────────────────────────────────────────────── + +# Builds grouped CV fold indices from config; this is the only split strategy used. +def get_splits( + raw_dataset, + cfg, +) -> List[Tuple[List[int], List[int], List[int]]]: + splits = create_group_kfold_splits( + raw_dataset.samples, + n_splits=cfg.get("cv_folds", 5), + seed=cfg.get("seed", 42), + ) + + # Optional source holdout: train/val from train_sources, test from eval_sources. + train_sources = cfg.get("train_sources") + eval_sources = cfg.get("eval_sources") + if train_sources or eval_sources: + all_sources = {get_source_name(path) for path, _ in raw_dataset.samples} + ts = set(train_sources or all_sources) + es = set(eval_sources or all_sources) + unknown = (ts | es) - all_sources + if unknown: + raise ValueError(f"Unknown sources requested: {sorted(unknown)}") + splits = [ + ( + [i for i in tr if get_source_name(raw_dataset.samples[i][0]) in ts], + [i for i in val if get_source_name(raw_dataset.samples[i][0]) in ts], + [i for i in te if get_source_name(raw_dataset.samples[i][0]) in es], + ) + for tr, val, te in splits + ] + return splits + + +# Deterministic subsampling shared by training and reevaluation. +def apply_subsample(raw_dataset, cfg) -> tuple[int, int] | None: + subsample = cfg.get("subsample", 1.0) + if subsample >= 1.0: + return None + + total = len(raw_dataset.samples) + if total == 0: + return 0, 0 + + # Subsample at basename-group level to preserve identity grouping guarantees. + group_to_indices = {} + for idx, (path, _) in enumerate(raw_dataset.samples): + group = get_basename(str(path)) + group_to_indices.setdefault(group, []).append(idx) + + groups = list(group_to_indices.keys()) + n_groups = len(groups) + target_groups = max(1, int(n_groups * subsample)) + rng = random.Random(cfg.get("seed", 42)) + rng.shuffle(groups) + keep_groups = set(groups[:target_groups]) + + keep_indices = [ + idx + for group in keep_groups + for idx in group_to_indices[group] + ] + keep_indices.sort() + raw_dataset.samples = [raw_dataset.samples[i] for i in keep_indices] + return len(raw_dataset.samples), total + + +# Controls stochastic augmentations (flip, jitter, etc.) +# augment=False -> NO_AUGMENT preset (square-crop + resize + normalize still run) +# augment=None, pipeline defaults, augment=dict -> override specific params +# Face cropping is handled upstream via data_dir swap, not here +def build_transforms(raw_dataset, cfg, augment=None) -> Callable: + image_size = cfg["image_size"] + + if augment is False: + from src.preprocessing.pipeline import DFFImagePipeline + + augment = DFFImagePipeline.NO_AUGMENT + + def transform_builder(indices, train=True, normalize_mean=None, normalize_std=None): + subset = Subset(raw_dataset, indices) + return TransformSubset( + subset, + get_transforms( + train=train, + image_size=image_size, + augment=augment, + normalize_mean=normalize_mean, + normalize_std=normalize_std, + ), + ) + return transform_builder + diff --git a/classifier/src/evaluation/__init__.py b/classifier/src/evaluation/__init__.py new file mode 100644 index 0000000..29aa457 --- /dev/null +++ b/classifier/src/evaluation/__init__.py @@ -0,0 +1,11 @@ +from src.evaluation.evaluate import ( + predict_rows, + rescore_rows, + save_errors, + save_hists, + save_preds, + save_summary, +) +from src.evaluation.metrics import binary_metrics, calc_metrics, pair_metrics, source_metrics + +__all__ = ["binary_metrics", "calc_metrics", "pair_metrics", "predict_rows", "rescore_rows", "save_errors", "save_hists", "save_preds", "save_summary", "source_metrics"] diff --git a/classifier/src/evaluation/evaluate.py b/classifier/src/evaluation/evaluate.py new file mode 100644 index 0000000..5ba7581 --- /dev/null +++ b/classifier/src/evaluation/evaluate.py @@ -0,0 +1,135 @@ +import csv +import json +from pathlib import Path + +import matplotlib.pyplot as plt +import torch +from torch.utils.data import DataLoader + +from src.data import PathDataset, get_source_name + + +# ── Inference ────────────────────────────────────────────────────────────── + +# Run model inference and return one prediction record per sample +# raw_dataset and indices supply path/source/label metadata, decoupling this +# function from the wrapper layout (Subset/TransformSubset) of `dataset` +def predict_rows(model, dataset, raw_dataset, indices, batch_size, device, *, num_workers=4): + loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False) + records = [] + + model.eval().to(device) + with torch.no_grad(): + offset = 0 + for images, labels in loader: + logits = model(images.to(device)).squeeze(1).cpu() + probs = torch.sigmoid(logits) + preds = (probs >= 0.5).long() + + for i in range(len(labels)): + sample_idx = indices[offset + i] + path, label = raw_dataset.samples[sample_idx] + records.append({ + "path": str(path), + "basename": Path(path).name, + "source": get_source_name(path), + "label": int(label), + "pred": int(preds[i].item()), + "prob_fake": float(probs[i].item()), + "logit": float(logits[i].item()), + }) + offset += len(labels) + + return records + + +# Re-run model scoring for an existing list of records +def rescore_rows( + model, records, image_size, batch_size, device, preprocess=None, *, num_workers=4 +): + dataset = PathDataset(records, image_size=image_size, preprocess=preprocess) + loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False) + outputs = [] + + model.eval().to(device) + with torch.no_grad(): + for images, _, indices in loader: + logits = model(images.to(device)).squeeze(1).cpu() + probs = torch.sigmoid(logits) + preds = (probs >= 0.5).long() + + for j in range(len(indices)): + base = dict(records[int(indices[j].item())]) + base["prob_fake"] = float(probs[j].item()) + base["pred"] = int(preds[j].item()) + base["logit"] = float(logits[j].item()) + outputs.append(base) + + return outputs + + +# ── Export ───────────────────────────────────────────────────────────────── + +# Writes all prediction records to CSV in a fixed column order +def save_preds(records, output_path): + output_path.parent.mkdir(parents=True, exist_ok=True) + fieldnames = ["path", "basename", "source", "label", "pred", "prob_fake", "logit"] + with open(output_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(records) + + +# Saves the top-k highest-confidence false positives and false negatives for error analysis +def save_errors(records, output_path, top_k=32): + false_positives = sorted( + (r for r in records if r["label"] == 0 and r["pred"] == 1), + key=lambda r: r["prob_fake"], + reverse=True, + )[:top_k] + false_negatives = sorted( + (r for r in records if r["label"] == 1 and r["pred"] == 0), + key=lambda r: r["prob_fake"], + )[:top_k] + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + json.dump({ + "top_false_positives": false_positives, + "top_false_negatives": false_negatives, + }, f, indent=2) + + +# Saves P(fake) histograms: one overall class comparison and one per source +def save_hists(records, output_dir): + output_dir.mkdir(parents=True, exist_ok=True) + + fig, ax = plt.subplots(figsize=(8, 5)) + real_probs = [r["prob_fake"] for r in records if r["label"] == 0] + fake_probs = [r["prob_fake"] for r in records if r["label"] == 1] + ax.hist(real_probs, bins=30, alpha=0.6, label="real", density=True) + ax.hist(fake_probs, bins=30, alpha=0.6, label="fake", density=True) + ax.set_xlabel("Predicted P(fake)") + ax.set_ylabel("Density") + ax.set_title("Confidence by class") + ax.legend() + fig.tight_layout() + fig.savefig(output_dir / "confidence_by_class.png", dpi=160) + plt.close(fig) + + for source in sorted({r["source"] for r in records}): + fig, ax = plt.subplots(figsize=(8, 5)) + source_probs = [r["prob_fake"] for r in records if r["source"] == source] + ax.hist(source_probs, bins=30, alpha=0.8) + ax.set_xlabel("Predicted P(fake)") + ax.set_ylabel("Count") + ax.set_title(f"Confidence distribution: {source}") + fig.tight_layout() + fig.savefig(output_dir / f"confidence_{source}.png", dpi=160) + plt.close(fig) + + +# Saves the aggregated results dict as a formatted JSON file +def save_summary(summary, output_path): + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + json.dump(summary, f, indent=2) diff --git a/classifier/src/evaluation/metrics.py b/classifier/src/evaluation/metrics.py new file mode 100644 index 0000000..8199330 --- /dev/null +++ b/classifier/src/evaluation/metrics.py @@ -0,0 +1,70 @@ +import torch +import numpy as np +from sklearn.metrics import ( + accuracy_score, + roc_auc_score, + f1_score, + confusion_matrix, +) + +# AUC and F1 are undefined when only one class is present in the batch; +# returns None for those fields rather than raising +def binary_metrics(logits: torch.Tensor, labels: torch.Tensor) -> dict: + probs = torch.sigmoid(logits).numpy() + preds = (probs >= 0.5).astype(int) + y = labels.numpy().astype(int) + has_both_classes = len(np.unique(y)) > 1 + auc_roc = float(roc_auc_score(y, probs)) if has_both_classes else None + f1 = float(f1_score(y, preds, zero_division=0)) if has_both_classes else None + return { + "accuracy": float(accuracy_score(y, preds)), + "auc_roc": auc_roc, + "f1": f1, + "confusion_matrix": confusion_matrix(y, preds, labels=[0, 1]), + } + + +# Converts per-sample records to tensors and delegates to binary_metrics +def calc_metrics(records): + logits = torch.tensor([r["logit"] for r in records], dtype=torch.float32) + labels = torch.tensor([r["label"] for r in records], dtype=torch.float32) + metrics = binary_metrics(logits, labels) + metrics["confusion_matrix"] = metrics["confusion_matrix"].tolist() + return metrics + + +# Per-source summaries; fake sources get detection_rate + pairwise_auc since AUC is undefined single-class +def source_metrics(records, real_source="wiki"): + wiki_records = [r for r in records if r["source"] == real_source] + by_source = {} + for source in sorted({r["source"] for r in records}): + source_records = [r for r in records if r["source"] == source] + metrics = calc_metrics(source_records) + metrics["n"] = len(source_records) + labels = [r["label"] for r in source_records] + if len(set(labels)) == 1: + if labels[0] == 1: # all fake + metrics["detection_rate"] = metrics["accuracy"] + if wiki_records: + pair_m = calc_metrics(wiki_records + source_records) + metrics["pairwise_auc"] = pair_m["auc_roc"] + metrics["pairwise_f1"] = pair_m["f1"] + else: # all real (wiki) + metrics["false_alarm_rate"] = 1.0 - metrics["accuracy"] + by_source[source] = metrics + return by_source + + +# Real-vs-one-fake AUC/F1 per fake source - more interpretable than global AUC when class ratios vary +def pair_metrics(records, real_source="wiki"): + fake_sources = sorted({r["source"] for r in records if r["source"] != real_source}) + pairwise = {} + for fake_source in fake_sources: + subset = [r for r in records if r["source"] in {real_source, fake_source}] + if subset: + pairwise[f"{real_source}_vs_{fake_source}"] = { + "sources": [real_source, fake_source], + "n": len(subset), + **calc_metrics(subset), + } + return pairwise diff --git a/classifier/src/models/__init__.py b/classifier/src/models/__init__.py new file mode 100644 index 0000000..8fc0004 --- /dev/null +++ b/classifier/src/models/__init__.py @@ -0,0 +1,35 @@ +from pathlib import Path +from typing import Callable, Union + +import torch +import torch.nn as nn + +# Maps backbone name -> builder function; populated by each model module at import time +_REGISTRY: dict[str, Callable[[dict], nn.Module]] = {} + + +# Called by each model module to advertise its backbone(s) to get_model +def register(name: str, builder: Callable[[dict], nn.Module]) -> None: + _REGISTRY[name] = builder + + +# Instantiates the backbone requested in cfg["backbone"] +def get_model(cfg: dict) -> nn.Module: + backbone = cfg.get("backbone", "simple_cnn") + builder = _REGISTRY.get(backbone) + if builder is None: + available = ", ".join(sorted(_REGISTRY)) + raise ValueError(f"Unknown backbone: {backbone!r}. Available: {available}") + return builder(cfg) + + +# Loads a saved state-dict into model in-place and returns it +def load_checkpoint(model: nn.Module, path: Union[Path, str], device) -> nn.Module: + model.load_state_dict(torch.load(path, map_location=device, weights_only=True)) + return model + + +# Importing the modules triggers their register() calls +from src.models import simple_cnn, resnet, efficientnet # noqa: E402, F401 + +__all__ = ["get_model", "load_checkpoint", "register"] diff --git a/classifier/src/models/efficientnet.py b/classifier/src/models/efficientnet.py new file mode 100644 index 0000000..08b40e8 --- /dev/null +++ b/classifier/src/models/efficientnet.py @@ -0,0 +1,23 @@ +import torch.nn as nn +from torchvision import models + +from src.models import register + + +# EfficientNet's classification head is a Sequential; [-1] targets the final Linear +def build(cfg: dict) -> nn.Module: + backbone = cfg.get("backbone", "efficientnet_b0") + pretrained = cfg.get("pretrained", True) + + if backbone == "efficientnet_b0": + weights = models.EfficientNet_B0_Weights.DEFAULT if pretrained else None + model = models.efficientnet_b0(weights=weights) + else: + raise ValueError(f"Unsupported EfficientNet backbone: {backbone!r}. Supported: efficientnet_b0") + + in_features = model.classifier[-1].in_features + model.classifier[-1] = nn.Linear(in_features, 1) + return model + + +register("efficientnet_b0", build) diff --git a/classifier/src/models/resnet.py b/classifier/src/models/resnet.py new file mode 100644 index 0000000..0a13298 --- /dev/null +++ b/classifier/src/models/resnet.py @@ -0,0 +1,30 @@ +import torch.nn as nn +from torchvision import models + +from src.models import register + + +# Loads pretrained ResNet and replaces the 1000-class head with a single logit for binary detection +def build(cfg: dict) -> nn.Module: + backbone = cfg.get("backbone", "resnet18") + pretrained = cfg.get("pretrained", True) + + if backbone == "resnet18": + weights = models.ResNet18_Weights.DEFAULT if pretrained else None + model = models.resnet18(weights=weights) + elif backbone == "resnet34": + weights = models.ResNet34_Weights.DEFAULT if pretrained else None + model = models.resnet34(weights=weights) + elif backbone == "resnet50": + weights = models.ResNet50_Weights.DEFAULT if pretrained else None + model = models.resnet50(weights=weights) + else: + raise ValueError(f"Unsupported backbone: {backbone!r}") + + model.fc = nn.Linear(model.fc.in_features, 1) + return model + + +register("resnet18", build) +register("resnet34", build) +register("resnet50", build) diff --git a/classifier/src/models/simple_cnn.py b/classifier/src/models/simple_cnn.py new file mode 100644 index 0000000..3e679c8 --- /dev/null +++ b/classifier/src/models/simple_cnn.py @@ -0,0 +1,49 @@ +import torch.nn as nn + +from src.models import register + +# Named presets map cnn_preset config values to channel lists +CNN_PRESETS = { + "micro": [8, 16], + "small": [8, 16, 32], + "medium": [16, 32, 64, 64], + "large": [32, 64, 128, 256], +} + + +# Each entry in channels builds a Conv -> BN -> ReLU -> Pool block +# the last block pools to 1×1 so the head is resolution-independent +class SimpleCNN(nn.Module): + def __init__(self, channels=None, in_channels=3, dropout=0.0): + super().__init__() + if channels is None: + channels = CNN_PRESETS["medium"] + + layers = [] + prev = in_channels + for i, ch in enumerate(channels): + layers += [nn.Conv2d(prev, ch, 3, padding=1), nn.BatchNorm2d(ch), nn.ReLU()] + if i < len(channels) - 1: + layers.append(nn.MaxPool2d(2)) + else: + layers.append(nn.AdaptiveAvgPool2d(1)) + prev = ch + self.features = nn.Sequential(*layers) + + head = [] + if dropout > 0: + head.append(nn.Dropout(dropout)) + head.append(nn.Linear(channels[-1], 1)) + self.classifier = nn.Sequential(*head) + + def forward(self, x): + return self.classifier(self.features(x).flatten(1)) + + +# Resolves cnn_channels > cnn_preset > "medium" fallback +def build(cfg: dict) -> nn.Module: + channels = cfg.get("cnn_channels") or CNN_PRESETS.get(cfg.get("cnn_preset", "medium"), CNN_PRESETS["medium"]) + return SimpleCNN(channels=channels, dropout=cfg.get("dropout", 0.0)) + + +register("simple_cnn", build) diff --git a/classifier/src/preprocessing/__init__.py b/classifier/src/preprocessing/__init__.py new file mode 100644 index 0000000..6ffd2ae --- /dev/null +++ b/classifier/src/preprocessing/__init__.py @@ -0,0 +1,3 @@ +from src.preprocessing.pipeline import DFFImagePipeline, get_transforms + +__all__ = ["DFFImagePipeline", "get_transforms"] diff --git a/classifier/src/preprocessing/pipeline.py b/classifier/src/preprocessing/pipeline.py new file mode 100644 index 0000000..1a960ef --- /dev/null +++ b/classifier/src/preprocessing/pipeline.py @@ -0,0 +1,231 @@ +import io +import random + +import numpy as np +import torch +from PIL import Image, ImageFilter +from torchvision.transforms import InterpolationMode +from torchvision.transforms import functional as F + +# Per-channel mean and std of the ImageNet training set (RGB order) +# Required when using torchvision pretrained weights — they were trained with +# this exact normalisation and expect it at inference time +_IMAGENET_MEAN = (0.485, 0.456, 0.406) +_IMAGENET_STD = (0.229, 0.224, 0.225) + + +# Computes per-channel mean and std from real (label=0) training samples only +# Used for the real-norm experiment to test whether the model relies on +# colour/brightness differences between real and fake rather than identity cues +def compute_real_stats(dataset, indices, max_samples=1000, seed=42): + real_indices = [i for i in indices if dataset.samples[i][1] == 0] + if not real_indices: + return _IMAGENET_MEAN, _IMAGENET_STD + + if len(real_indices) > max_samples: + rng = np.random.RandomState(seed) + real_indices = rng.choice(real_indices, max_samples, replace=False).tolist() + + means, vars_ = [], [] + for i in real_indices: + path, _ = dataset.samples[i] + img = np.array(Image.open(path).convert("RGB"), dtype=np.float32) / 255.0 + means.append(img.mean(axis=(0, 1))) + vars_.append(img.var(axis=(0, 1))) + + mean = tuple(float(x) for x in np.mean(means, axis=0)) + std = tuple(float(x) for x in np.sqrt(np.mean(vars_, axis=0))) + return mean, std + + +# Single-image preprocessing pipeline for training and evaluation +# Square-crops first to remove the real-rectangular / fake-square geometry cue, +# then resizes, augments (train only), and normalizes +# augment=None uses DEFAULTS; augment=dict overrides specific keys; NO_AUGMENT disables all stochastic ops +class DFFImagePipeline: + + DEFAULTS = { + "crop_scale": [0.85, 1.0], + "center_jitter": 0.1, + "hflip_p": 0.5, + "rotation_degrees": 15, + "brightness": 0.4, + "contrast": 0.4, + "saturation": 0.3, + "hue": 0.05, + "grayscale_p": 0.2, + "blur_p": 0.3, + "blur_radius": [0.1, 1.5], + "jpeg_p": 0.3, + "jpeg_quality": [65, 95], + "erase_p": 0.3, + "erase_scale": [0.02, 0.15], + "noise_p": 0.2, + "noise_std": 0.05, + } + + # Pass as augment=DFFImagePipeline.NO_AUGMENT to keep crop+resize+normalize but skip all randomness + NO_AUGMENT: dict = { + "crop_scale": [1.0, 1.0], + "center_jitter": 0.0, + "hflip_p": 0.0, + "rotation_degrees": 0, + "brightness": 0.0, + "contrast": 0.0, + "saturation": 0.0, + "hue": 0.0, + "grayscale_p": 0.0, + "blur_p": 0.0, + "jpeg_p": 0.0, + "erase_p": 0.0, + "noise_p": 0.0, + } + + def __init__(self, *, image_size: int, train: bool, augment: dict | None = None, + normalize_mean=None, normalize_std=None): + self.image_size = image_size + self.train = train + self.normalize_mean = normalize_mean or _IMAGENET_MEAN + self.normalize_std = normalize_std or _IMAGENET_STD + + cfg = {**self.DEFAULTS, **(augment or {})} + self.crop_scale = tuple(cfg["crop_scale"]) + self.center_jitter = cfg["center_jitter"] + self.hflip_p = cfg["hflip_p"] + self.rotation_degrees = cfg["rotation_degrees"] + self.brightness = cfg["brightness"] + self.contrast = cfg["contrast"] + self.saturation = cfg["saturation"] + self.hue = cfg["hue"] + self.grayscale_p = cfg["grayscale_p"] + self.blur_p = cfg["blur_p"] + self.blur_radius = tuple(cfg["blur_radius"]) + self.jpeg_p = cfg["jpeg_p"] + self.jpeg_quality = tuple(cfg["jpeg_quality"]) + self.erase_p = cfg["erase_p"] + self.erase_scale = tuple(cfg["erase_scale"]) + self.noise_p = cfg["noise_p"] + self.noise_std = cfg["noise_std"] + + # Geometry transforms + def _crop_square(self, img: Image.Image) -> Image.Image: + width, height = img.size + short_side = min(width, height) + center_top = max((height - short_side) // 2, 0) + center_left = max((width - short_side) // 2, 0) + + if self.train: + min_scale, max_scale = self.crop_scale + scale = random.uniform(min_scale, max_scale) + crop_size = max(1, int(short_side * scale)) + top = max((height - crop_size) // 2, 0) + left = max((width - crop_size) // 2, 0) + + jitter_y = int((height - crop_size) * self.center_jitter) + jitter_x = int((width - crop_size) * self.center_jitter) + if jitter_y > 0: + top += random.randint(-jitter_y, jitter_y) + if jitter_x > 0: + left += random.randint(-jitter_x, jitter_x) + + top = max(0, min(top, height - crop_size)) + left = max(0, min(left, width - crop_size)) + else: + crop_size = short_side + top = center_top + left = center_left + + return F.crop(img, top=top, left=left, height=crop_size, width=crop_size) + + def _maybe_flip(self, img: Image.Image) -> Image.Image: + if self.train and random.random() < self.hflip_p: + return F.hflip(img) + return img + + def _maybe_rotate(self, img: Image.Image) -> Image.Image: + if self.train and self.rotation_degrees > 0: + angle = random.uniform(-self.rotation_degrees, self.rotation_degrees) + return F.rotate(img, angle, interpolation=InterpolationMode.BILINEAR, fill=0) + return img + + # Photometric transforms + def _jitter_factor(self, amount: float) -> float: + return random.uniform(max(0.0, 1.0 - amount), 1.0 + amount) + + def _maybe_color_jitter(self, img: Image.Image) -> Image.Image: + if not self.train: + return img + img = F.adjust_brightness(img, self._jitter_factor(self.brightness)) + img = F.adjust_contrast(img, self._jitter_factor(self.contrast)) + img = F.adjust_saturation(img, self._jitter_factor(self.saturation)) + img = F.adjust_hue(img, random.uniform(-self.hue, self.hue)) + return img + + def _maybe_grayscale(self, img: Image.Image) -> Image.Image: + if self.train and random.random() < self.grayscale_p: + return F.to_grayscale(img, num_output_channels=3) + return img + + def _maybe_blur(self, img: Image.Image) -> Image.Image: + if self.train and random.random() < self.blur_p: + radius = random.uniform(*self.blur_radius) + return img.filter(ImageFilter.GaussianBlur(radius=radius)) + return img + + # Bias-reduction transforms + # JPEG recompression removes high-frequency GAN artifacts that survive other augmentations + def _maybe_jpeg(self, img: Image.Image) -> Image.Image: + if self.train and random.random() < self.jpeg_p: + quality = random.randint(*self.jpeg_quality) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=quality) + buf.seek(0) + img = Image.open(buf).convert("RGB") + img.load() # decode pixels while buf is still in scope + return img + + # Random erasing forces the model to use multiple regions rather than a single discriminative patch + def _maybe_erase(self, tensor: torch.Tensor) -> torch.Tensor: + if self.train and random.random() < self.erase_p: + c, h, w = tensor.shape + area = h * w + min_scale, max_scale = self.erase_scale + erase_area = area * random.uniform(min_scale, max_scale) + aspect = random.uniform(0.3, 1.0 / 0.3) + eh = int(round((erase_area * aspect) ** 0.5)) + ew = int(round((erase_area / aspect) ** 0.5)) + eh, ew = min(eh, h), min(ew, w) + top = random.randint(0, h - eh) + left = random.randint(0, w - ew) + tensor = tensor.clone() + tensor[:, top:top + eh, left:left + ew] = torch.rand(c, eh, ew) + return tensor + + # Gaussian noise improves robustness to sensor noise vs GAN noise patterns + def _maybe_noise(self, tensor: torch.Tensor) -> torch.Tensor: + if self.train and random.random() < self.noise_p: + noise = torch.randn_like(tensor) * self.noise_std + tensor = tensor + noise + return tensor + + # Pipeline entrypoint + def __call__(self, img: Image.Image) -> torch.Tensor: + img = self._crop_square(img) + img = F.resize(img, [self.image_size, self.image_size], interpolation=InterpolationMode.BILINEAR) + img = self._maybe_flip(img) + img = self._maybe_rotate(img) + img = self._maybe_color_jitter(img) + img = self._maybe_grayscale(img) + img = self._maybe_blur(img) + img = self._maybe_jpeg(img) + tensor = F.to_tensor(img) + tensor = self._maybe_erase(tensor) + tensor = self._maybe_noise(tensor) + return F.normalize(tensor, self.normalize_mean, self.normalize_std) + + +# Convenience wrapper used by splits.py and evaluate.py +def get_transforms(train=True, image_size=224, augment=None, + normalize_mean=None, normalize_std=None): + return DFFImagePipeline(image_size=image_size, train=train, augment=augment, + normalize_mean=normalize_mean, normalize_std=normalize_std) diff --git a/classifier/src/training/__init__.py b/classifier/src/training/__init__.py new file mode 100644 index 0000000..ceea034 --- /dev/null +++ b/classifier/src/training/__init__.py @@ -0,0 +1,3 @@ +from src.training.trainer import train_classifier, train_classifier_cv + +__all__ = ["train_classifier", "train_classifier_cv"] diff --git a/classifier/src/training/trainer.py b/classifier/src/training/trainer.py new file mode 100644 index 0000000..192ea3f --- /dev/null +++ b/classifier/src/training/trainer.py @@ -0,0 +1,374 @@ +import json +from collections import Counter +from pathlib import Path + +import torch +import torch.nn as nn +from torch.utils.data import DataLoader, Subset +from tqdm import tqdm + +from src.evaluation.metrics import binary_metrics + +# ── AMP compatibility shim ───────────────────────────────────────────────── + +# torch.amp.GradScaler / autocast moved from torch.cuda.amp in PyTorch 2.3+ +if hasattr(torch.amp, "GradScaler"): + _GradScaler = torch.amp.GradScaler + _autocast = torch.amp.autocast +else: + from torch.cuda.amp import GradScaler as _OldGradScaler, autocast as _OldAutocast + _GradScaler = lambda device="", enabled=True, **kw: _OldGradScaler(enabled=enabled, **kw) + _autocast = lambda device_type="", enabled=True, **kw: _OldAutocast(enabled=enabled, **kw) + + +# ── Single-fold training ─────────────────────────────────────────────────── + +# Trains one fold; saves best checkpoint by val AUC-ROC and final checkpoint. +# pos_weight is passed through to BCEWithLogitsLoss to handle class imbalance. +def train_classifier( + model, + train_dataset, + val_dataset, + *, + epochs=10, + batch_size=16, + lr=1e-4, + weight_decay=1e-4, + device="cuda", + save_dir="outputs/models", + run_name="classifier", + early_stopping_patience=0, + num_workers=4, + grad_clip_norm=1.0, + T_max=None, + pos_weight=None, +): + device = torch.device(device) + if device.type == "cuda": + print(f"Using GPU: {torch.cuda.get_device_name(0)}") + else: + print("Using CPU") + + use_amp = device.type == "cuda" + + model = model.to(device) + + n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + print(f"Trainable parameters: {n_params:,}") + + train_loader = DataLoader( + train_dataset, batch_size=batch_size, shuffle=True, + num_workers=num_workers, pin_memory=True + ) + val_loader = DataLoader( + val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True + ) + + pw = torch.tensor([pos_weight], device=device) if pos_weight is not None else None + criterion = nn.BCEWithLogitsLoss(pos_weight=pw) + optimizer = torch.optim.AdamW( + filter(lambda p: p.requires_grad, model.parameters()), + lr=lr, weight_decay=weight_decay, + ) + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=T_max or epochs) + scaler = _GradScaler("cuda", enabled=use_amp) + print(f"Device: {device} AMP: {use_amp}") + + save_dir = Path(save_dir) + save_dir.mkdir(parents=True, exist_ok=True) + + history = { + "train_loss": [], "train_acc": [], "train_auc": [], "train_f1": [], + "val_loss": [], "val_acc": [], "val_auc": [], "val_f1": [], + } + best_auc = 0.0 + patience_counter = 0 + + for epoch in range(1, epochs + 1): + # ── train ── + model.train() + total_loss = 0.0 + train_logits, train_labels = [], [] + for images, labels in tqdm(train_loader, desc=f"Epoch {epoch}/{epochs} [train]", leave=False): + images = images.to(device) + labels = labels.float().to(device) + + optimizer.zero_grad() + with _autocast("cuda", enabled=use_amp): + logits = model(images).squeeze(1) + loss = criterion(logits, labels) + + scaler.scale(loss).backward() + scaler.unscale_(optimizer) + torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip_norm) + scaler.step(optimizer) + scaler.update() + total_loss += loss.item() * len(images) + train_logits.append(logits.detach().cpu()) + train_labels.append(labels.detach().cpu()) + + train_loss = total_loss / sum(len(logit) for logit in train_logits) + train_m = binary_metrics(torch.cat(train_logits), torch.cat(train_labels)) + scheduler.step() + + # ── validate ── + model.eval() + val_loss = 0.0 + all_logits, all_labels = [], [] + with torch.no_grad(): + for images, labels in tqdm(val_loader, desc=f"Epoch {epoch}/{epochs} [val]", leave=False): + images = images.to(device) + labels = labels.float().to(device) + with _autocast("cuda", enabled=use_amp): + logits = model(images).squeeze(1) + batch_loss = criterion(logits, labels) + if not (torch.isnan(batch_loss) or torch.isinf(batch_loss)): + val_loss += batch_loss.item() * len(images) + all_logits.append(logits.cpu()) + all_labels.append(labels.cpu()) + + val_loss /= len(val_dataset) + val_m = binary_metrics(torch.cat(all_logits), torch.cat(all_labels)) + + # ── record ── + history["train_loss"].append(train_loss) + history["train_acc"].append(train_m["accuracy"]) + history["train_auc"].append(train_m["auc_roc"]) + history["train_f1"].append(train_m["f1"]) + history["val_loss"].append(val_loss) + history["val_acc"].append(val_m["accuracy"]) + history["val_auc"].append(val_m["auc_roc"]) + history["val_f1"].append(val_m["f1"]) + + gap_loss = train_loss - val_loss + gap_acc = train_m["accuracy"] - val_m["accuracy"] + print( + f"[{epoch:03d}/{epochs}] " + f"loss: {train_loss:.4f}/{val_loss:.4f} (gap {gap_loss:+.4f}) " + f"acc: {train_m['accuracy']:.4f}/{val_m['accuracy']:.4f} (gap {gap_acc:+.4f}) " + f"auc: {train_m['auc_roc']:.4f}/{val_m['auc_roc']:.4f} " + f"f1: {train_m['f1']:.4f}/{val_m['f1']:.4f}" + ) + + # ── checkpoint ── + if val_m["auc_roc"] is not None and val_m["auc_roc"] > best_auc: + best_auc = val_m["auc_roc"] + torch.save(model.state_dict(), save_dir / f"{run_name}_best.pt") + patience_counter = 0 + else: + patience_counter += 1 + + if early_stopping_patience > 0 and patience_counter >= early_stopping_patience: + print(f"Early stopping at epoch {epoch} (no improvement for {early_stopping_patience} epochs)") + break + + torch.save(model.state_dict(), save_dir / f"{run_name}_final.pt") + return history + + +# ── CV training ──────────────────────────────────────────────────────────── + +# Iterates over pre-built splits, trains one model per fold, evaluates on the +# held-out test fold, then aggregates metrics across folds with mean ± std +def train_classifier_cv( + model_fn, + raw_dataset, + splits, + *, + epochs=10, + batch_size=16, + lr=1e-4, + weight_decay=1e-4, + device="cuda", + save_dir="outputs/models", + run_name="classifier_cv", + early_stopping_patience=0, + num_workers=4, + transform_builder=None, + grad_clip_norm=1.0, + T_max=None, + normalization=None, + logs_dir=None, +): + from src.evaluation.evaluate import ( + predict_rows, save_errors, save_hists, save_preds, save_summary, + ) + from src.evaluation.metrics import binary_metrics, calc_metrics, source_metrics, pair_metrics + from src.utils.cross_validation import aggregate_fold_metrics + + device = torch.device(device if torch.cuda.is_available() else "cpu") + save_dir = Path(save_dir) + save_dir.mkdir(parents=True, exist_ok=True) + + eval_dir = Path(logs_dir) / run_name if logs_dir is not None else None + if eval_dir is not None: + eval_dir.mkdir(parents=True, exist_ok=True) + + fold_results = [] + all_records = [] + + for fold_idx, (train_idx, val_idx, test_idx) in enumerate(splits): + print(f"\n{'='*60}") + print(f"Fold {fold_idx + 1}/{len(splits)}") + print(f" Train: {len(train_idx)} Val: {len(val_idx)} Test: {len(test_idx)}") + print(f"{'='*60}") + + model = model_fn().to(device) + + norm_mean = norm_std = None + if normalization == "real_norm": + from src.preprocessing.pipeline import compute_real_stats + norm_mean, norm_std = compute_real_stats(raw_dataset, train_idx) + print(f" Real-norm stats: mean={norm_mean}, std={norm_std}") + + if transform_builder is not None: + train_dataset = transform_builder(train_idx, train=True, + normalize_mean=norm_mean, normalize_std=norm_std) + val_dataset = transform_builder(val_idx, train=False, + normalize_mean=norm_mean, normalize_std=norm_std) + test_dataset = transform_builder(test_idx, train=False, + normalize_mean=norm_mean, normalize_std=norm_std) + else: + train_dataset = Subset(raw_dataset, train_idx) + val_dataset = Subset(raw_dataset, val_idx) + test_dataset = Subset(raw_dataset, test_idx) + + # Compute pos_weight = n_real / n_fake for BCEWithLogitsLoss class balancing + train_labels = [raw_dataset.samples[i][1] for i in train_idx] + class_counts = Counter(train_labels) + pos_weight = class_counts[0] / class_counts[1] if class_counts[1] > 0 else 1.0 + + fold_run_name = f"{run_name}_fold{fold_idx}" + history = train_classifier( + model, + train_dataset, + val_dataset, + epochs=epochs, + batch_size=batch_size, + lr=lr, + weight_decay=weight_decay, + device=device, + save_dir=save_dir, + run_name=fold_run_name, + early_stopping_patience=early_stopping_patience, + num_workers=num_workers, + grad_clip_norm=grad_clip_norm, + T_max=T_max, + pos_weight=pos_weight, + ) + + # Load best checkpoint and evaluate on test set + checkpoint_path = save_dir / f"{fold_run_name}_best.pt" + if checkpoint_path.exists(): + model.load_state_dict(torch.load(checkpoint_path, map_location=device, weights_only=True)) + + model.eval() + + records = predict_rows( + model, test_dataset, raw_dataset, test_idx, + batch_size, device, num_workers=num_workers, + ) + + # Compute aggregate and per-source test metrics + test_metrics = calc_metrics(records) + src_metrics = source_metrics(records) + pairwise = pair_metrics(records) + + fold_result = { + "fold": fold_idx, + "train_size": len(train_idx), + "val_size": len(val_idx), + "test_size": len(test_idx), + "history": history, + "test_metrics": test_metrics, + "source_metrics": src_metrics, + "pair_metrics": pairwise, + } + fold_results.append(fold_result) + all_records.extend(records) + + if eval_dir is not None: + fold_dir = eval_dir / f"fold{fold_idx}" + save_preds(records, fold_dir / "preds.csv") + save_errors(records, fold_dir / "errors.json") + + print(f"\nFold {fold_idx + 1} Test Metrics:") + for key, value in test_metrics.items(): + if key != "confusion_matrix": + print(f" {key}: {value}") + print(f" Per-source AUC:") + for source, sm in sorted(src_metrics.items()): + pa = sm.get("pairwise_auc") + dr = sm.get("detection_rate") + label = f"pairwise_auc={pa:.4f}" if pa is not None else f"detection_rate={dr:.4f}" if dr is not None else "" + print(f" {source}: {label}") + + # Aggregate metrics across folds + test_metrics_list = [f["test_metrics"] for f in fold_results] + aggregated = aggregate_fold_metrics(test_metrics_list) + + # Aggregate per-source metrics across folds + all_sources = sorted({s for f in fold_results for s in f["source_metrics"]}) + aggregated_per_source = {} + for source in all_sources: + source_fold_metrics = [] + for f in fold_results: + sm = f["source_metrics"].get(source) + if sm: + # Only keep scalar numeric fields for aggregation + source_fold_metrics.append({ + k: v for k, v in sm.items() + if isinstance(v, (int, float)) and k != "fold" + }) + if source_fold_metrics: + aggregated_per_source[source] = aggregate_fold_metrics(source_fold_metrics) + + # Aggregate pairwise source metrics across folds + all_pairs = sorted({p for f in fold_results for p in f["pair_metrics"]}) + aggregated_pairwise = {} + for pair in all_pairs: + pair_fold_metrics = [] + for f in fold_results: + pm = f["pair_metrics"].get(pair) + if pm: + pair_fold_metrics.append({ + k: v for k, v in pm.items() + if isinstance(v, (int, float)) and k not in ("fold", "n") + }) + if pair_fold_metrics: + aggregated_pairwise[pair] = aggregate_fold_metrics(pair_fold_metrics) + + results = { + "run_name": run_name, + "n_folds": len(splits), + "fold_results": fold_results, + "aggregated_metrics": aggregated, + "aggregated_per_source": aggregated_per_source, + "aggregated_pairwise": aggregated_pairwise, + } + + if eval_dir is not None: + save_hists(all_records, eval_dir / "hists") + save_summary(results, eval_dir / "summary.json") + + print(f"\n{'='*60}") + print("Cross-Validation Results (Aggregated)") + print(f"{'='*60}") + for key, value in aggregated.items(): + print(f" {key}:") + print(f" mean: {value['mean']:.4f}") + print(f" std: {value['std']:.4f}") + print(f" 95% CI: ±{value['ci_95']:.4f}") + + if aggregated_per_source: + print(f"\nPer-Source Pairwise AUC (wiki vs. fake source):") + for source in sorted(aggregated_per_source): + ps = aggregated_per_source[source] + pa = ps.get("pairwise_auc", {}) + if pa: + print(f" {source}: {pa['mean']:.4f} ± {pa['std']:.4f}") + dr = ps.get("detection_rate") + if dr and not pa: + print(f" {source}: detection_rate={dr['mean']:.4f} ± {dr['std']:.4f}") + + return results diff --git a/classifier/src/utils/__init__.py b/classifier/src/utils/__init__.py new file mode 100644 index 0000000..b873fa4 --- /dev/null +++ b/classifier/src/utils/__init__.py @@ -0,0 +1,13 @@ +from src.utils.config import load_config +from src.utils.cross_validation import ( + aggregate_fold_metrics, + create_group_kfold_splits, + get_basename, +) + +__all__ = [ + "load_config", + "aggregate_fold_metrics", + "create_group_kfold_splits", + "get_basename", +] diff --git a/classifier/src/utils/config.py b/classifier/src/utils/config.py new file mode 100644 index 0000000..dccd0d6 --- /dev/null +++ b/classifier/src/utils/config.py @@ -0,0 +1,60 @@ +import json +from pathlib import Path +from typing import Any, Dict, Optional + + +# ── Loading ──────────────────────────────────────────────────────────────── + +# Resolves the extends chain first, then overlays shared.json underneath so +# experiment-level keys always win over shared defaults +def load_config(config_path: str, shared_path: Optional[str] = None) -> Dict[str, Any]: + config_path = Path(config_path) + cfg = _load_extends(config_path) + + if shared_path is None: + shared_path = config_path.parent.parent / "shared.json" + else: + shared_path = Path(shared_path) + + if shared_path.exists(): + with open(shared_path) as f: + shared_cfg = json.load(f) + cfg = _deep_merge(shared_cfg, cfg) + + return cfg + + +# Pops the "extends" key and recursively merges the parent config underneath; +# the seen set catches circular inheritance before it recurses infinitely +def _load_extends(config_path: Path, seen: Optional[set[Path]] = None) -> Dict[str, Any]: + if seen is None: + seen = set() + resolved_path = config_path.resolve() + if resolved_path in seen: + chain = " -> ".join(str(p) for p in [*seen, resolved_path]) + raise ValueError(f"Circular config inheritance detected: {chain}") + seen.add(resolved_path) + + with open(config_path) as f: + cfg = json.load(f) + + base_ref = cfg.pop("extends", None) + if not base_ref: + seen.remove(resolved_path) + return cfg + + base_path = (config_path.parent / base_ref).resolve() + base_cfg = _load_extends(base_path, seen=seen) + seen.remove(resolved_path) + return _deep_merge(base_cfg, cfg) + + +# override always wins; nested dicts are merged recursively rather than replaced +def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: + result = base.copy() + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = value + return result diff --git a/classifier/src/utils/cross_validation.py b/classifier/src/utils/cross_validation.py new file mode 100644 index 0000000..3bb052c --- /dev/null +++ b/classifier/src/utils/cross_validation.py @@ -0,0 +1,85 @@ +from pathlib import Path +from typing import Any, Dict, List, Tuple + +import numpy as np +from sklearn.model_selection import StratifiedGroupKFold + + +# Groups by filename stem so sibling images of the same identity (same name +# across wiki/inpainting/text2img/insight) always stay in the same fold +def get_basename(path: str) -> str: + return Path(path).stem + + +# ── Splits ───────────────────────────────────────────────────────────────── + +# Outer fold: StratifiedGroupKFold holds out one fold as test. +# Inner val: 10% of remaining groups are held out randomly — no per-class +# stratification needed since every DFF basename is multi-source (mixed label). +def create_group_kfold_splits( + samples: List[Tuple[str, int]], + n_splits: int = 5, + seed: int = 42, +) -> List[Tuple[List[int], List[int], List[int]]]: + paths = [s[0] for s in samples] + labels = np.array([s[1] for s in samples]) + groups = np.array([get_basename(p) for p in paths]) + + sgkf = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=seed) + splits = [] + + for fold_idx, (train_val_idx, test_idx) in enumerate(sgkf.split(paths, labels, groups)): + train_val_groups = groups[train_val_idx] + unique_groups = np.unique(train_val_groups) + n_val_groups = max(1, int(len(unique_groups) * 0.1)) + + rng = np.random.RandomState(seed + fold_idx) + val_groups = set(rng.choice(unique_groups, n_val_groups, replace=False)) + + train_idx = [] + val_idx = [] + for i, g in enumerate(train_val_groups): + if g in val_groups: + val_idx.append(train_val_idx[i]) + else: + train_idx.append(train_val_idx[i]) + + splits.append((train_idx, val_idx, test_idx.tolist())) + + return splits + + +# ── Aggregation ──────────────────────────────────────────────────────────── + +# Infers numeric keys from the first fold if metric_keys is not supplied; +# uses sample std (ddof=1) and normal-approximation 95% CI +def aggregate_fold_metrics( + fold_metrics: List[Dict[str, Any]], + metric_keys: List[str] = None, +) -> Dict[str, Any]: + if metric_keys is None: + metric_keys = [ + k for k, v in fold_metrics[0].items() + if isinstance(v, (int, float)) and not isinstance(v, bool) + ] + + aggregated = {} + for key in metric_keys: + values = [fold[key] for fold in fold_metrics if key in fold] + if not values: + continue + values = np.array(values) + mean = np.mean(values) + std = np.std(values, ddof=1) + n = len(values) + ci_95 = 1.96 * std / np.sqrt(n) if n > 1 else 0.0 + aggregated[key] = { + "mean": float(mean), + "std": float(std), + "ci_95": float(ci_95), + "values": values.tolist(), + } + + return aggregated + + diff --git a/classifier/tests/__init__.py b/classifier/tests/__init__.py new file mode 100644 index 0000000..43fd959 --- /dev/null +++ b/classifier/tests/__init__.py @@ -0,0 +1,11 @@ +from pathlib import Path +import sys + +# Full suite +# python -m unittest discover -s classifier/tests -p "test_*.py" -t classifier + +# Allow `from src...` imports when tests are run from repo root. +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + diff --git a/classifier/tests/test_config.py b/classifier/tests/test_config.py new file mode 100644 index 0000000..7e4f47d --- /dev/null +++ b/classifier/tests/test_config.py @@ -0,0 +1,42 @@ +""" +Tests for config loading: shared.json inheritance and extends merging. +""" +import json +import tempfile +import unittest +from pathlib import Path + +from src.utils.config import load_config + + +class ConfigMergeTests(unittest.TestCase): + def test_shared_and_extends_merge(self): + with tempfile.TemporaryDirectory() as td: + root = Path(td) + cfg_dir = root / "configs" / "phaseX" + cfg_dir.mkdir(parents=True) + + (root / "configs" / "shared.json").write_text(json.dumps({ + "batch_size": 32, + "lr": 1e-4, + "augment": {"hflip_p": 0.5, "blur_p": 0.2}, + })) + (cfg_dir / "base.json").write_text(json.dumps({ + "epochs": 10, + "augment": {"hflip_p": 0.1}, + })) + (cfg_dir / "exp.json").write_text(json.dumps({ + "extends": "base.json", + "epochs": 15, + "augment": {"blur_p": 0.0}, + })) + + cfg = load_config(cfg_dir / "exp.json") + self.assertEqual(cfg["batch_size"], 32) + self.assertEqual(cfg["epochs"], 15) + self.assertEqual(cfg["augment"]["hflip_p"], 0.1) + self.assertEqual(cfg["augment"]["blur_p"], 0.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/classifier/tests/test_metrics.py b/classifier/tests/test_metrics.py new file mode 100644 index 0000000..f37fa24 --- /dev/null +++ b/classifier/tests/test_metrics.py @@ -0,0 +1,22 @@ +""" +Tests for binary_metrics edge cases: single-class inputs return null AUC/F1. +""" +import unittest + +import torch + +from src.evaluation.metrics import binary_metrics + + +class OneClassMetricTests(unittest.TestCase): + def test_one_class_returns_none_for_auc_and_f1(self): + logits = torch.tensor([0.1, -0.2, 0.3], dtype=torch.float32) + labels = torch.tensor([1.0, 1.0, 1.0], dtype=torch.float32) + metrics = binary_metrics(logits, labels) + self.assertIsNone(metrics["auc_roc"]) + self.assertIsNone(metrics["f1"]) + self.assertIn("accuracy", metrics) + + +if __name__ == "__main__": + unittest.main() diff --git a/classifier/tests/test_splits.py b/classifier/tests/test_splits.py new file mode 100644 index 0000000..17abd49 --- /dev/null +++ b/classifier/tests/test_splits.py @@ -0,0 +1,51 @@ +""" +Tests for CV split integrity: group leakage and subsample consistency. +""" +import unittest + +from src.data.splits import apply_subsample, get_splits + + +class _DummyDataset: + def __init__(self, samples): + self.samples = list(samples) + + +def _mk_samples(): + samples = [] + sources = ["wiki", "inpainting", "text2img", "insight"] + for person in ["a", "b", "c", "d", "e"]: + for source in sources: + label = 0 if source == "wiki" else 1 + samples.append((f"/data/{source}/id/{person}.jpg", label)) + return samples + + +class SplitGroupingTests(unittest.TestCase): + def test_group_leakage_is_blocked_across_folds(self): + ds = _DummyDataset(_mk_samples()) + cfg = {"cv_folds": 5, "seed": 42} + for train_idx, val_idx, test_idx in get_splits(ds, cfg): + train_bases = {ds.samples[i][0].split("/")[-1].split(".")[0] for i in train_idx} + val_bases = {ds.samples[i][0].split("/")[-1].split(".")[0] for i in val_idx} + test_bases = {ds.samples[i][0].split("/")[-1].split(".")[0] for i in test_idx} + self.assertTrue(train_bases.isdisjoint(val_bases)) + self.assertTrue(train_bases.isdisjoint(test_bases)) + self.assertTrue(val_bases.isdisjoint(test_bases)) + + def test_apply_subsample_keeps_full_identity_groups(self): + ds = _DummyDataset(_mk_samples()) + sampled, total = apply_subsample(ds, {"subsample": 0.4, "seed": 7}) + self.assertEqual(total, 20) + self.assertGreater(sampled, 0) + by_basename = {} + for path, _ in ds.samples: + base = path.split("/")[-1] + by_basename.setdefault(base, 0) + by_basename[base] += 1 + # Each kept basename should include all 4 source variants. + self.assertTrue(all(count == 4 for count in by_basename.values())) + + +if __name__ == "__main__": + unittest.main() diff --git a/classifier/tests/test_transforms.py b/classifier/tests/test_transforms.py new file mode 100644 index 0000000..e302a7e --- /dev/null +++ b/classifier/tests/test_transforms.py @@ -0,0 +1,25 @@ +""" +Tests for preprocessing transforms: eval pipeline is deterministic and test-safe. +""" +import unittest + +import numpy as np +from PIL import Image + +from src.preprocessing.pipeline import get_transforms + + +class TransformTests(unittest.TestCase): + def test_eval_transform_is_deterministic(self): + rng = np.random.RandomState(0) + arr = (rng.rand(128, 128, 3) * 255).astype(np.uint8) + img = Image.fromarray(arr, mode="RGB") + tfm = get_transforms(train=False, image_size=64) + a = tfm(img) + b = tfm(img) + self.assertEqual(tuple(a.shape), (3, 64, 64)) + self.assertTrue(np.allclose(a.numpy(), b.numpy())) + + +if __name__ == "__main__": + unittest.main() diff --git a/classifier/tools/artifact_chk.py b/classifier/tools/artifact_chk.py new file mode 100644 index 0000000..55f2895 --- /dev/null +++ b/classifier/tools/artifact_chk.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +""" +Check expected classifier result artifacts. + +Usage: + python tools/artifact_chk.py + python tools/artifact_chk.py --output-root outputs +""" +import argparse +import json +from pathlib import Path + + +def parse_args(): + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--output-root", default="outputs", help="Root with logs/models folders") + return p.parse_args() + + +def iter_config_paths(config_root: Path): + for sub in ("phase1", "phase2"): + yield from sorted((config_root / sub).glob("*.json")) + + +def main(): + args = parse_args() + root = Path(__file__).resolve().parent.parent + config_root = root / "configs" + logs_dir = root / args.output_root / "logs" + models_dir = root / args.output_root / "models" + + expected = [] + for cfg_path in iter_config_paths(config_root): + with open(cfg_path) as f: + cfg = json.load(f) + run_name = cfg.get("run_name") + if run_name: + expected.append((run_name, cfg_path)) + + missing_logs = [] + missing_models = [] + for run_name, cfg_path in expected: + if not (logs_dir / f"{run_name}.json").exists(): + missing_logs.append((run_name, cfg_path)) + if not any(models_dir.glob(f"{run_name}_fold*_best.pt")): + missing_models.append((run_name, cfg_path)) + + print(f"Expected runs from configs: {len(expected)}") + print(f"Missing logs: {len(missing_logs)}") + for run_name, cfg_path in missing_logs: + print(f" - {run_name} ({cfg_path.relative_to(root)})") + print(f"Missing checkpoints: {len(missing_models)}") + for run_name, cfg_path in missing_models: + print(f" - {run_name} ({cfg_path.relative_to(root)})") + + if missing_logs or missing_models: + raise SystemExit(1) + print("All expected artifacts found.") + + +if __name__ == "__main__": + main() diff --git a/classifier/tools/facecrop.py b/classifier/tools/facecrop.py new file mode 100644 index 0000000..322b11c --- /dev/null +++ b/classifier/tools/facecrop.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 +""" +Pre-crop face images using MTCNN and save to a new directory. + +Runs face detection once over the dataset and saves cropped images to disk. +Training configs can then point at the pre-cropped directory — no per-epoch +MTCNN overhead during training. + +The output mirrors the source structure exactly: + data/wiki/14/37591914.jpg -> cropped/classifier/wiki/14/37591914.jpg + +Resumable: already-cropped images are skipped by default. + +Usage: + python tools/facecrop.py + python tools/facecrop.py --data-dir data --output-dir cropped/classifier + python tools/facecrop.py --sources wiki inpainting --device cpu + python tools/facecrop.py --no-skip-existing # reprocess everything +""" +import argparse +import sys +import warnings +from pathlib import Path + +# Suppress facenet_pytorch's torch.load FutureWarning — not fixable externally. +warnings.filterwarnings("ignore", message=".*weights_only.*", category=FutureWarning) + +ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(ROOT)) + +SOURCES = ["wiki", "inpainting", "text2img", "insight"] +_DETECTORS: dict[tuple[str, str], object] = {} + + +def parse_args(): + p = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--data-dir", default="data", help="Source dataset root (default: data)") + p.add_argument("--output-dir", default="cropped/classifier", help="Output root (default: cropped/classifier)") + p.add_argument("--margin", type=float, default=0.6, help="Face box margin as fraction of box size (default: 0.6)") + p.add_argument("--size", type=int, default=224, help="Output image size in px, square (default: 224)") + p.add_argument("--device", default=None, help="'cpu' or 'cuda'. Default: auto-detect") + p.add_argument("--sources", nargs="+", default=None, metavar="SOURCE", + help=f"Only process these sources. Default: all ({', '.join(SOURCES)})") + p.add_argument("--skip-existing", dest="skip_existing", action="store_true", default=True, + help="Skip images already present in output-dir (default: on, resumable)") + p.add_argument("--no-skip-existing", dest="skip_existing", action="store_false", + help="Re-process all images even if already cropped") + return p.parse_args() + + +# ── crop helpers ────────────────────────────────────────────────────────────── + +def _crop_face(img, box, margin: float, size: int): + from PIL import Image as PILImage + x1, y1, x2, y2 = [float(v) for v in box] + bw, bh = x2 - x1, y2 - y1 + mx, my = bw * margin / 2, bh * margin / 2 + x1 -= mx; y1 -= my; x2 += mx; y2 += my + cx, cy = (x1 + x2) / 2, (y1 + y2) / 2 + side = max(x2 - x1, y2 - y1) + x1, y1 = cx - side / 2, cy - side / 2 + x2, y2 = x1 + side, y1 + side + w, h = img.size + x1, y1 = max(0, x1), max(0, y1) + x2, y2 = min(w, x2), min(h, y2) + return img.crop((int(x1), int(y1), int(x2), int(y2))).resize((size, size), PILImage.BILINEAR) + + +def _center_crop(img, size: int): + from PIL import Image as PILImage + w, h = img.size + side = min(w, h) + left, top = (w - side) // 2, (h - side) // 2 + return img.crop((left, top, left + side, top + side)).resize((size, size), PILImage.BILINEAR) + + +def _get_detectors(device: str): + key_std = ("std", device) + key_relaxed = ("relaxed", device) + if key_std in _DETECTORS and key_relaxed in _DETECTORS: + return _DETECTORS[key_std], _DETECTORS[key_relaxed] + + from facenet_pytorch import MTCNN + + detector = MTCNN( + keep_all=False, select_largest=True, + min_face_size=15, + device=device, post_process=False, + ) + detector_relaxed = MTCNN( + keep_all=False, select_largest=True, + min_face_size=10, + thresholds=[0.5, 0.6, 0.6], + device=device, post_process=False, + ) + _DETECTORS[key_std] = detector + _DETECTORS[key_relaxed] = detector_relaxed + return detector, detector_relaxed + + +class FaceCropper: + """Reusable face cropper for notebooks/tools (not training pipeline).""" + + def __init__(self, margin: float = 0.6, size: int = 224, device: str | None = None): + import torch + + self.margin = margin + self.size = size + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") + + def __call__(self, img): + from PIL import Image as PILImage + + detector, detector_relaxed = _get_detectors(self.device) + + boxes, _ = detector.detect(img) + if boxes is not None and len(boxes) > 0: + return _crop_face(img, boxes[0], self.margin, self.size) + + w, h = img.size + img2x = img.resize((w * 2, h * 2), PILImage.BILINEAR) + boxes2, _ = detector_relaxed.detect(img2x) + if boxes2 is not None and len(boxes2) > 0: + box_orig = [v / 2 for v in boxes2[0]] + return _crop_face(img, box_orig, self.margin, self.size) + return _center_crop(img, self.size) + + +# ── main ────────────────────────────────────────────────────────────────────── + +def main(): + args = parse_args() + + import torch + from PIL import Image + from tqdm import tqdm + + data_dir = Path(args.data_dir) + output_dir = Path(args.output_dir) + device = args.device or ("cuda" if torch.cuda.is_available() else "cpu") + sources = args.sources or SOURCES + + if not data_dir.exists(): + print(f"Error: data directory not found: {data_dir}") + sys.exit(1) + + # Validate requested sources + for src in sources: + if not (data_dir / src).exists(): + print(f"Error: source directory not found: {data_dir / src}") + sys.exit(1) + + try: + import facenet_pytorch # noqa: F401 + except ImportError: + print("Error: facenet_pytorch not installed.") + print(" Run: pip install facenet-pytorch") + sys.exit(1) + + print(f"Data dir: {data_dir.resolve()}") + print(f"Output dir: {output_dir.resolve()}") + print(f"Sources: {', '.join(sources)}") + print(f"Device: {device}") + print(f"Margin: {args.margin} | Size: {args.size}px") + print(f"Skip exist: {args.skip_existing}") + + detector, detector_relaxed = _get_detectors(device) + + # Collect all image paths, grouped by source for per-source stats + all_paths: list[Path] = [] + for src in sources: + for subdir in sorted((data_dir / src).iterdir()): + if subdir.is_dir(): + all_paths.extend(sorted(subdir.glob("*.jpg"))) + + print(f"\nTotal images: {len(all_paths):,}\n") + + n_processed = n_skipped = n_error = 0 + # track per-source: detected / retry_detected / fallback + src_stats: dict[str, dict] = {s: {"detected": 0, "retry": 0, "fallback": 0} for s in sources} + + for img_path in tqdm(all_paths, desc="Pre-cropping", unit="img"): + rel = img_path.relative_to(data_dir) + out_path = output_dir / rel + src_name = img_path.parent.parent.name # data/wiki/14/file.jpg -> wiki + + if args.skip_existing and out_path.exists(): + n_skipped += 1 + continue + + out_path.parent.mkdir(parents=True, exist_ok=True) + + try: + img = Image.open(img_path).convert("RGB") + except Exception as exc: + tqdm.write(f"[WARN] Cannot open {img_path.name}: {exc}") + n_error += 1 + continue + + cropped = None + try: + # Pass 1: detect on original image + boxes, _ = detector.detect(img) + if boxes is not None and len(boxes) > 0: + cropped = _crop_face(img, boxes[0], args.margin, args.size) + src_stats[src_name]["detected"] += 1 + else: + # Pass 2: upscale 2x and retry with relaxed thresholds + w, h = img.size + img2x = img.resize((w * 2, h * 2), Image.BILINEAR) + boxes2, _ = detector_relaxed.detect(img2x) + if boxes2 is not None and len(boxes2) > 0: + # boxes are in upscaled coords — divide by 2 to get original coords + box_orig = [v / 2 for v in boxes2[0]] + cropped = _crop_face(img, box_orig, args.margin, args.size) + src_stats[src_name]["retry"] += 1 + else: + cropped = _center_crop(img, args.size) + src_stats[src_name]["fallback"] += 1 + except Exception as exc: + tqdm.write(f"[WARN] Detection failed for {img_path.name}: {exc}") + cropped = _center_crop(img, args.size) + src_stats[src_name]["fallback"] += 1 + + cropped.save(out_path, quality=95) + n_processed += 1 + + total = n_processed + n_skipped + n_detected = sum(s["detected"] for s in src_stats.values()) + n_retry = sum(s["retry"] for s in src_stats.values()) + n_fallback = sum(s["fallback"] for s in src_stats.values()) + denom = max(n_processed, 1) + + print(f"\n{'─' * 55}") + print(f" Total images : {total:>8,}") + print(f" Processed : {n_processed:>8,}") + print(f" Skipped (existed) : {n_skipped:>8,}") + print(f" Errors : {n_error:>8,}") + print(f" Pass-1 detected : {n_detected:>8,} ({n_detected / denom:.1%})") + print(f" Pass-2 detected : {n_retry:>8,} ({n_retry / denom:.1%}) ← 2x upscale retry") + print(f" Centre fallback : {n_fallback:>8,} ({n_fallback / denom:.1%})") + print() + print(f" {'Source':<12} {'pass-1':>8} {'pass-2':>8} {'fallback':>8} {'fallback%':>10}") + print(f" {'─'*12} {'─'*8} {'─'*8} {'─'*8} {'─'*10}") + for src in sources: + s = src_stats[src] + total_src = s["detected"] + s["retry"] + s["fallback"] + fb_pct = s["fallback"] / max(total_src, 1) + print(f" {src:<12} {s['detected']:>8,} {s['retry']:>8,} {s['fallback']:>8,} {fb_pct:>9.1%}") + print(f"{'─' * 55}") + print(f" Output: {output_dir.resolve()}") + print() + print("Next step — update your config:") + print(f' "data_dir": "{output_dir}"') + print(f' remove "face_crop": true (images are already cropped)') + + +if __name__ == "__main__": + main() diff --git a/classifier/tools/fetch_ds.py b/classifier/tools/fetch_ds.py new file mode 100644 index 0000000..9f7655c --- /dev/null +++ b/classifier/tools/fetch_ds.py @@ -0,0 +1,56 @@ +""" +Download the DeepFakeFace dataset from HuggingFace and extract it. + +Usage: + python tools/download_data.py + python tools/download_data.py --data-dir /mnt/data/DFF +""" +import argparse +import zipfile +from pathlib import Path + +from huggingface_hub import snapshot_download + +SOURCES = ["wiki", "inpainting", "text2img", "insight"] + + +def download(data_dir: Path) -> None: + print(f"Downloading dataset from HuggingFace into {data_dir}...") + snapshot_download( + repo_id="OpenRL/DeepFakeFace", + repo_type="dataset", + local_dir=data_dir, + ) + + for source in SOURCES: + zip_path = data_dir / f"{source}.zip" + target_dir = data_dir / source + + if target_dir.exists(): + print(f" {source}/ already extracted, skipping") + continue + + if not zip_path.exists(): + print(f" WARNING: {zip_path} not found, skipping") + continue + + print(f" Extracting {zip_path.name}...") + with zipfile.ZipFile(zip_path, "r") as z: + z.extractall(data_dir) + print(f" Done -> {target_dir}") + + print("\nVerifying...") + for source in SOURCES: + d = data_dir / source + count = sum(1 for _ in d.rglob("*.jpg")) if d.exists() else 0 + print(f" {source}: {count} images") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--data-dir", default="data", + help="Directory to download into. Default: data", + ) + args = parser.parse_args() + download(Path(args.data_dir)) diff --git a/classifier/tools/gradcam.py b/classifier/tools/gradcam.py new file mode 100644 index 0000000..6c4d36e --- /dev/null +++ b/classifier/tools/gradcam.py @@ -0,0 +1,104 @@ +""" +Grad-CAM visualization for trained classifiers. + +Generates heatmaps showing which image regions the model focused on, +overlaid on the original image. Targets the last Conv2d layer automatically. + +Usage (from notebook or script): + from tools.gradcam import save_overlays + from src.evaluation.evaluate import predict_rows + + records = predict_rows(model, test_dataset, raw_ds, test_idx, batch_size=32, device="cpu") + save_overlays(model, records, cfg, output_dir=Path("outputs/gradcam"), device="cpu") + +Output: one PNG per selected sample, named 01_.png, 02_.png, ... + top_k//2 false positives (real predicted fake) and top_k//2 false negatives. +""" +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import torch +import torch.nn as nn +from PIL import Image + +from src.preprocessing import get_transforms + + +# Pick the last convolution layer as the Grad-CAM target +def find_conv(model): + for module in reversed(list(model.modules())): + if isinstance(module, nn.Conv2d): + return module + raise ValueError("Could not find a Conv2d layer for Grad-CAM.") + + +# Build a normalized Grad-CAM heatmap for one image tensor (shape: 1xCxHxW) +def gradcam_map(model, image_tensor, device): + activations = [] + gradients = [] + target = find_conv(model) + + def on_fwd(_, __, output): + activations.append(output.detach()) + + def on_bwd(_, __, grad_output): + gradients.append(grad_output[0].detach()) + + h_fwd = target.register_forward_hook(on_fwd) + h_bwd = target.register_full_backward_hook(on_bwd) + + model.zero_grad(set_to_none=True) + logits = model(image_tensor.to(device)).squeeze() + logits.backward() + + h_fwd.remove() + h_bwd.remove() + + grads = gradients[0][0] + acts = activations[0][0] + weights = grads.mean(dim=(1, 2), keepdim=True) + cam = torch.relu((weights * acts).sum(dim=0)) + cam = cam - cam.min() + cam = cam / cam.max().clamp(min=1e-8) + return cam.cpu().numpy() + + +# Save side-by-side input and Grad-CAM overlays for top-confidence errors +def save_overlays(model, records, cfg, output_dir, device, *, top_k=8): + output_dir.mkdir(parents=True, exist_ok=True) + transform = get_transforms(train=False, image_size=cfg["image_size"]) + + false_pos = sorted( + (r for r in records if r["label"] == 0 and r["pred"] == 1), + key=lambda r: r["prob_fake"], + reverse=True, + )[: top_k // 2] + false_neg = sorted( + (r for r in records if r["label"] == 1 and r["pred"] == 0), + key=lambda r: r["prob_fake"], + )[: top_k // 2] + selected = [*false_pos, *false_neg] + + total = len(selected) + for idx, record in enumerate(selected, start=1): + print(f"Grad-CAM: rendering {idx}/{total} for {Path(record['path']).name}") + image = Image.open(record["path"]).convert("RGB") + image_tensor = transform(image).unsqueeze(0) + heatmap = gradcam_map(model, image_tensor, device) + + fig, axes = plt.subplots(1, 2, figsize=(8, 4)) + axes[0].imshow(np.asarray(image)) + axes[0].set_title("Input") + axes[0].axis("off") + + axes[1].imshow(np.asarray(image)) + axes[1].imshow(heatmap, cmap="jet", alpha=0.4, extent=(0, image.width, image.height, 0)) + axes[1].set_title( + f"{Path(record['path']).name}\ntrue={record['label']} pred={record['pred']} p={record['prob_fake']:.3f}" + ) + axes[1].axis("off") + + fig.tight_layout() + fig.savefig(output_dir / f"{idx:02d}_{Path(record['path']).stem}.png", dpi=160) + plt.close(fig) diff --git a/classifier/tools/inference.py b/classifier/tools/inference.py new file mode 100644 index 0000000..d0ce041 --- /dev/null +++ b/classifier/tools/inference.py @@ -0,0 +1,98 @@ +""" +Run inference on a single image using a trained classifier. + +Usage: + python tools/inference.py + python tools/inference.py --checkpoint +""" +import argparse +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +import torch +from PIL import Image + +from src.models import get_model, load_checkpoint +from src.preprocessing import get_transforms + + +# Defaults checkpoint to outputs/models/{run_name}_best.pt when not supplied +def predict(image_path, config_path, checkpoint_path=None): + image_path = Path(image_path) + config_path = Path(config_path) + + if not image_path.exists(): + print(f"Error: Image not found: {image_path}") + sys.exit(1) + + if not config_path.exists(): + print(f"Error: Config not found: {config_path}") + sys.exit(1) + + try: + with open(config_path) as f: + cfg = json.load(f) + except json.JSONDecodeError as e: + print(f"Error: Invalid JSON in config: {e}") + sys.exit(1) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + try: + # pretrained=False — we're loading a saved checkpoint, not ImageNet weights + model = get_model({**cfg, "pretrained": False}) + except Exception as e: + print(f"Error: Failed to build model: {e}") + sys.exit(1) + + if checkpoint_path is None: + checkpoint_path = ROOT / "outputs" / "models" / f"{cfg['run_name']}_best.pt" + else: + checkpoint_path = Path(checkpoint_path) + + if not checkpoint_path.exists(): + print(f"Error: Checkpoint not found: {checkpoint_path}") + sys.exit(1) + + try: + load_checkpoint(model, checkpoint_path, device) + except Exception as e: + print(f"Error: Failed to load checkpoint: {e}") + sys.exit(1) + + model.eval().to(device) + + try: + transform = get_transforms(train=False, image_size=cfg["image_size"]) + image = Image.open(image_path).convert("RGB") + tensor = transform(image).unsqueeze(0).to(device) + except Exception as e: + print(f"Error: Failed to load/preprocess image: {e}") + sys.exit(1) + + with torch.no_grad(): + logit = model(tensor).squeeze() + prob = torch.sigmoid(logit).item() + + label = "FAKE" if prob >= 0.5 else "REAL" + confidence = prob if prob >= 0.5 else 1 - prob + + print(f"Image : {image_path}") + print(f"Model : {cfg['run_name']} ({cfg['backbone']})") + print(f"Device: {device}") + print(f"Result: {label} (confidence: {confidence:.1%})") + print(f"P(fake): {prob:.4f} P(real): {1-prob:.4f}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("image_path", help="Path to the input image") + parser.add_argument("config_path", help="Path to the model config JSON") + parser.add_argument("--checkpoint", help="Optional path to model checkpoint") + args = parser.parse_args() + predict(args.image_path, args.config_path, args.checkpoint) diff --git a/classifier/tools/reevaluate.py b/classifier/tools/reevaluate.py new file mode 100644 index 0000000..aecf948 --- /dev/null +++ b/classifier/tools/reevaluate.py @@ -0,0 +1,288 @@ +""" +Re-evaluate existing trained checkpoints with per-source metrics. + +Loads each config, rebuilds CV splits (deterministic), loads the _best.pt +checkpoint per fold, runs predict_rows, and writes updated log files +with aggregate + per-source + pairwise metrics. + +Usage: + python tools/reevaluate.py # re-evaluate all experiments + python tools/reevaluate.py p1_resnet18_baseline # specific experiments + python tools/reevaluate.py --data-dir /mnt/data/DFF + python tools/reevaluate.py --use-gpu +""" +import argparse +import json +import sys +import warnings +from pathlib import Path + +warnings.filterwarnings("ignore", message="Corrupt EXIF data", category=UserWarning) +warnings.filterwarnings("ignore", message=".*weights_only.*", category=FutureWarning) + +# Ensure classifier/ is on sys.path so `src.*` imports work +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +# Config paths (data_dir) are relative to the project root +PROJECT_ROOT = ROOT.parent + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "run_names", nargs="*", + help="Run names to re-evaluate (matches log filenames). Default: all.", + ) + parser.add_argument( + "--data-dir", default=None, + help="Override cfg['data_dir'] for this run.", + ) + parser.add_argument( + "--output-root", default="outputs", + help="Directory where models/logs live. Default: outputs", + ) + parser.add_argument( + "--use-gpu", action="store_true", + help="Use GPU for evaluation.", + ) + return parser.parse_args() + + +# Map run_name -> config path (relative to classifier/) +CONFIG_MAP = { + # Phase 1 + "p1_resnet18_baseline": "configs/phase1/p1_resnet18_baseline.json", + "p1_simplecnn_baseline": "configs/phase1/p1_simplecnn_baseline.json", + # Phase 2a – shortcut / holdout + "p2a_t1_original": "configs/phase2/p2a_t1_original.json", + "p2a_t2_real_norm": "configs/phase2/p2a_t2_real_norm.json", + "p2a_t3_holdout_text2img": "configs/phase2/p2a_t3_holdout_text2img.json", + "p2a_t3_holdout_inpainting": "configs/phase2/p2a_t3_holdout_inpainting.json", + "p2a_t3_holdout_insight": "configs/phase2/p2a_t3_holdout_insight.json", + # Phase 2b – resolution + "p2b_resnet18_224": "configs/phase2/p2b_resnet18_224.json", + "p2b_simplecnn_224": "configs/phase2/p2b_simplecnn_224.json", + # Phase 2c – face crop + "p2c_resnet18_facecrop": "configs/phase2/p2c_resnet18_facecrop.json", + "p2c_simplecnn_facecrop": "configs/phase2/p2c_simplecnn_facecrop.json", + # Phase 2d – augmentation + "p2d_resnet18_aug": "configs/phase2/p2d_resnet18_aug.json", + "p2d_simplecnn_aug": "configs/phase2/p2d_simplecnn_aug.json", + # Phase 2e – face crop + aug + "p2e_resnet18_facecrop_aug": "configs/phase2/p2e_resnet18_facecrop_aug.json", + "p2e_simplecnn_facecrop_aug": "configs/phase2/p2e_simplecnn_facecrop_aug.json", +} + + +def main(): + args = parse_args() + import numpy as np + import torch + from src.data import DFFDataset, apply_subsample, build_transforms, get_splits + from src.evaluation.evaluate import predict_rows + from src.evaluation.metrics import calc_metrics, source_metrics, pair_metrics + from src.models import get_model + from src.utils import load_config + from src.utils.cross_validation import aggregate_fold_metrics + + output_root = Path(args.output_root) + logs_dir = output_root / "logs" + models_dir = output_root / "models" + + # Determine which experiments to re-evaluate + if args.run_names: + run_names = args.run_names + else: + run_names = sorted( + p.stem for p in logs_dir.glob("*.json") + if p.stem in CONFIG_MAP + ) + + device = "cuda" if args.use_gpu and torch.cuda.is_available() else "cpu" + print(f"Device: {device}") + + for run_name in run_names: + config_rel = CONFIG_MAP.get(run_name) + if config_rel is None: + print(f"\nSkipping {run_name}: no config mapping") + continue + + config_path = ROOT / config_rel + if not config_path.exists(): + print(f"\nSkipping {run_name}: config not found ({config_rel})") + continue + + # Check that at least one checkpoint exists + checkpoints = sorted(models_dir.glob(f"{run_name}_fold*_best.pt")) + if not checkpoints: + print(f"\nSkipping {run_name}: no checkpoints found") + continue + + print(f"\n{'='*60}") + print(f"Re-evaluating: {run_name}") + print(f" Config: {config_rel}") + print(f" Checkpoints: {len(checkpoints)}") + print(f"{'='*60}") + + # Load config + cfg = load_config(config_path) + seed = cfg.get("seed", 42) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + deterministic = cfg.get("deterministic", False) + torch.backends.cudnn.deterministic = deterministic + torch.backends.cudnn.benchmark = not deterministic + + data_dir = args.data_dir or cfg.get("data_dir", "data") + # Config paths are relative to the project root, not classifier/ + if not Path(data_dir).is_absolute(): + data_dir = str(PROJECT_ROOT / data_dir) + + # Build dataset + raw_ds = DFFDataset(data_dir, sources=cfg.get("dataset_sources")) + + sampled = apply_subsample(raw_ds, cfg) + if sampled is not None: + n_samples, total = sampled + print(f" Subsampled to {n_samples}/{total} samples") + + # Build CV splits and transforms (deterministic – same as training) + splits = get_splits(raw_ds, cfg) + transform_builder = build_transforms(raw_ds, cfg, augment=cfg.get("augment")) + + n_folds = len(splits) + fold_results = [] + + for fold_idx in range(n_folds): + train_idx, val_idx, test_idx = splits[fold_idx] + + checkpoint_path = models_dir / f"{run_name}_fold{fold_idx}_best.pt" + if not checkpoint_path.exists(): + print(f" Fold {fold_idx}: checkpoint missing, skipping") + continue + + # Rebuild model and load checkpoint + model = get_model(cfg) + model.load_state_dict( + torch.load(checkpoint_path, map_location=device, weights_only=True) + ) + model.to(device).eval() + + # Build test dataset for this fold + if cfg.get("normalization") == "real_norm": + from src.preprocessing.pipeline import compute_real_stats + norm_mean, norm_std = compute_real_stats(raw_ds, train_idx) + else: + norm_mean = norm_std = None + + test_dataset = transform_builder( + test_idx, train=False, + normalize_mean=norm_mean, normalize_std=norm_std, + ) + + records = predict_rows( + model, test_dataset, raw_ds, test_idx, + cfg["batch_size"], device, num_workers=4, + ) + + # Compute metrics + test_metrics = calc_metrics(records) + src_metrics = source_metrics(records) + pairwise = pair_metrics(records) + + fold_result = { + "fold": fold_idx, + "train_size": len(train_idx), + "val_size": len(val_idx), + "test_size": len(test_idx), + "test_metrics": test_metrics, + "source_metrics": src_metrics, + "pair_metrics": pairwise, + } + fold_results.append(fold_result) + + print(f" Fold {fold_idx}: auc={test_metrics.get('auc_roc', '?'):.4f} " + f"acc={test_metrics.get('accuracy', '?'):.4f} " + f"f1={test_metrics.get('f1', '?'):.4f}") + for source, sm in sorted(src_metrics.items()): + pa = sm.get("pairwise_auc") + dr = sm.get("detection_rate") + label = (f"pairwise_auc={pa:.4f}" if pa is not None + else f"detection_rate={dr:.4f}" if dr is not None else "") + print(f" {source}: {label}") + + if not fold_results: + print(f" No folds evaluated for {run_name}") + continue + + # Aggregate across folds + test_metrics_list = [f["test_metrics"] for f in fold_results] + aggregated = aggregate_fold_metrics(test_metrics_list) + + # Aggregate per-source metrics + all_sources = sorted({s for f in fold_results for s in f["source_metrics"]}) + aggregated_per_source = {} + for source in all_sources: + source_fold_metrics = [] + for f in fold_results: + sm = f["source_metrics"].get(source) + if sm: + source_fold_metrics.append({ + k: v for k, v in sm.items() + if isinstance(v, (int, float)) and k != "fold" + }) + if source_fold_metrics: + aggregated_per_source[source] = aggregate_fold_metrics(source_fold_metrics) + + # Aggregate pairwise metrics + all_pairs = sorted({p for f in fold_results for p in f["pair_metrics"]}) + aggregated_pairwise = {} + for pair in all_pairs: + pair_fold_metrics = [] + for f in fold_results: + pm = f["pair_metrics"].get(pair) + if pm: + pair_fold_metrics.append({ + k: v for k, v in pm.items() + if isinstance(v, (int, float)) and k not in ("fold", "n") + }) + if pair_fold_metrics: + aggregated_pairwise[pair] = aggregate_fold_metrics(pair_fold_metrics) + + # Load existing log to preserve training history + log_path = logs_dir / f"{run_name}.json" + if log_path.exists(): + with open(log_path) as f: + existing = json.load(f) + # Keep the training history from the original log + for fr_new in fold_results: + for fr_old in existing.get("fold_results", []): + if fr_old["fold"] == fr_new["fold"]: + fr_new["history"] = fr_old.get("history") + break + else: + existing = {} + + results = { + "run_name": run_name, + "n_folds": n_folds, + "fold_results": fold_results, + "aggregated_metrics": aggregated, + "aggregated_per_source": aggregated_per_source, + "aggregated_pairwise": aggregated_pairwise, + "config": existing.get("config", cfg), + } + + with open(log_path, "w") as f: + json.dump(results, f, indent=2) + print(f" Saved: {log_path}") + + print("\nDone.") + + +if __name__ == "__main__": + main() diff --git a/cropped_classifier.zip b/cropped_classifier.zip new file mode 100644 index 0000000..3dffc7d --- /dev/null +++ b/cropped_classifier.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ab18ecf4c15f89673f87f0c22bc337a7eb295de85d9eda6921d0c073917042 +size 2083173206 diff --git a/cropped_generator.zip b/cropped_generator.zip new file mode 100644 index 0000000..232ebc0 --- /dev/null +++ b/cropped_generator.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c82dd60ff8b5185e8724ad5613d816d4cde7ade2d44f4a6ef7928edde44ba99 +size 224628744 diff --git a/docs/DRL_Project.md b/docs/DRL_Project.md new file mode 100644 index 0000000..fe175c8 --- /dev/null +++ b/docs/DRL_Project.md @@ -0,0 +1,60 @@ +# Deep and Reinforcement Learning (2025/2026 — M.IA003), FEUP/FCUP +## Deep Learning Project + +**Submission deadline:** May 15th, 2026 + +This work will need to be submitted using the Moodle platform. It will be developed during practical classes, but it is expected that the students will complement this work using extra-class hours. + +--- + +## 1. Objective +The objective of this work is to develop deep learning discriminative and generative models, applied to the context of “deep fakes”. The discriminative models will be designed to classify images as “real” vs. “fake”, whereas the generative models will be trained to produce new “fake” examples. + +## 2. Dataset +The data that you will be using belongs to the DeepFakeFace (DFF) dataset. You can access the dataset files and description via the Hugging Face link. In addition, you can find a detailed description of the dataset in this paper. + +The dataset was generated to assess the ability of deepfake detectors to distinguish AI-generated and authentic images. It contains 30,000 real images of celebrities taken from the IMDB-WIKI dataset. The dataset also contains 90,000 fake images generated with the three following models: + +- Stable Diffusion v1.5 +- Stable Diffusion Inpainting +- InsightFace + +Each model generated 30,000 fake images. + +## 3. Implementation +In order to complete this work, you will need to implement two different models: + +1. One classifier, which is trained to distinguish between real and fake images +2. One generative model, which is trained to create new fake images + +For the first model, you will be free to implement any of the discriminative approaches that will be considered during the theoretical classes (e.g., multilayer perceptrons, convolutional neural networks, visual transformers, etc.). + +For the second model, you will be free to implement any of the generative models that will be considered during the theoretical classes (e.g., generative adversarial networks, variational autoencoders, diffusion models, etc.). + +For both models, you will need to define a proper training strategy as well as the correct way and metrics to evaluate the performance. + +## 4. Project evaluation +The project will be evaluated by taking in consideration the suitability of the proposed model for the specific task, its correctness, and complexity. + +**VERY IMPORTANT:** The main objective and core of this project will be that of iteratively improving the proposed solutions via a continuous observation of intermediate results and the proposal of adjustments to the algorithm. Projects that simply present a solution without showcasing the evolution of the proposed model will not be considered as sufficient. + +## 5. Submission of the solution +Your project must be delivered in Moodle by **May 15th, 2026, at 23:59:59**. + +- Final code solution, as a notebook or series of files. +- Slides for presentation (**pdf format**) focusing on the main issues of the assignment for a 10 minute presentation; any additional information that cannot be presented in that time slot can be included in annexes to the presentation. The presentation should contain the following information: + - Brief description of the deep learning solutions considered for the problem, both for the discriminative and generative part. + - **MOST IMPORTANT:** Description of the different implementation steps considered to improve the proposed models: motivate your choices in terms of type of approach, model architecture, training strategy, etc. Show intermediate results, how you interpreted those, and what you decide to change in order to improve the results. + - Results: + - Classification performance obtained by the developed discriminative model. Description of the experimental setup, train/val/test splits, and performance metrics chosen. + - Data generation performance obtained by the developed generative model. Description of the experimental setup and performance metrics chosen. + - Discussion and conclusions: comments on the performance obtained and final remarks. +- Filled auto-evaluation file regarding the contribution of each member of the group. + +Further information about the project submission and presentation: + +- The code provided as the solution will need to allow to train the considered models and reproduce the results that you reported. Please do not include dataset files. You can assume I have local access to the dataset. +- The work must be done by groups of 3 people. Groups formed by less than 3 people must be justified and approved before starting working. +- Delays in the submission will incur in a grade penalization and eventually in not accepting the work. +- All works must be presented on May 22nd and 29th, during the practical classes. All group members must be present during the demonstration. If a member of the group is not present to the work presentation, he will receive a zero grade for this work, thus implying failing to pass. +- Each member of the group must comment on their contribution to the work, and must know what the other members of the group have done. Failing to describe in details what your solution is doing and why will determine a penalization in the overall evaluation of the project. diff --git a/docs/DRL_Project.pdf b/docs/DRL_Project.pdf new file mode 100644 index 0000000..504111b Binary files /dev/null and b/docs/DRL_Project.pdf differ diff --git a/docs/classifier_impl.md b/docs/classifier_impl.md new file mode 100644 index 0000000..004fc75 --- /dev/null +++ b/docs/classifier_impl.md @@ -0,0 +1,647 @@ +# Deepfake Detection Classifier - Implementation Plan + +## Overview +This document provides a comprehensive implementation plan for refactoring the deepfake detection classifier project. Each task includes a checkbox to track completion. + +--- + +## Phase 0: Pre-Implementation Setup + +### Infrastructure and Configuration +- [x] Create `classifier/configs/shared.json` with shared parameters: + - seed: 42 + - val_ratio: 0.1 + - test_ratio: 0.1 + - batch_size: 32 + - optimizer: {type: "adamw", lr: 1e-4, weight_decay: 1e-4} + - scheduler: {type: "cosine_annealing", T_max: 15} + - early_stopping_patience: 5 + - num_workers: 4 + - cv_folds: 5 + - data_dir: "data" + - face_crop_margin: 0.6 + +- [x] Implement config loading/merging so experiment configs inherit `shared.json` defaults and override only the variables under test +- [x] Resolve shared nested fields such as `optimizer.lr`, `optimizer.weight_decay`, and `scheduler.T_max` into the training arguments used by the runner +- [x] Update existing configs to reference `shared.json` or otherwise document which shared defaults they intentionally override +- [x] Define one CV protocol for all phases: + - outer fold: held-out test fold + - inner validation split: group-aware split from the remaining training folds for early stopping/model selection + - final reported metrics: aggregate held-out test-fold results across the 5 outer folds + +### Data Preparation +- [x] Verify dataset structure and integrity +- [x] Check that real and fake images are properly organized by source +- [x] Verify no data leakage between train/val/test splits or CV folds (group-aware by basename) + +### Cleanup +- [x] Remove `classifier/tools/ensemble.py` (not part of reorganization plan, conflicts with explainability goals) +- [x] Remove robustness evaluation from `classifier/tools/analyze.py` (lines 51-104, 82-104, 144) - not part of experimental plan +- [x] Remove any unused or obsolete config files from previous experiments (see detailed list below) +- [X] Clean up old output directories if needed (keep important results for reference) + +#### Config Files to Remove (39 total) + +**Root configs (6):** +- [x] `classifier/configs/resnet18_quick.json` +- [x] `classifier/configs/resnet18.json` +- [x] `classifier/configs/simple_cnn_large.json` +- [x] `classifier/configs/simple_cnn_micro.json` +- [x] `classifier/configs/simple_cnn_small.json` +- [x] `classifier/configs/simple_cnn.json` + +**Phase 1 old configs (7):** +- [x] `classifier/configs/phase1/p1_cnn_base.json` (uses lr=1e-3, epochs=20 - should be 1e-4, 15) +- [x] `classifier/configs/phase1/p1_cnn_aug.json` +- [x] `classifier/configs/phase1/p1_resnet18_base.json` (duplicate of new baseline) +- [x] `classifier/configs/phase1/p1_resnet18_aug.json` +- [x] `classifier/configs/phase1/holdout/` (entire directory - 6 configs, source holdout not in new plan) + +**Phase 2 old configs (7):** +- [x] `classifier/configs/phase2/p2_resnet18_224.json` (should be p2a_resnet18_224.json) +- [x] `classifier/configs/phase2/p2_resnet18_facecrop.json` (should be p2b_resnet18_facecrop.json) +- [x] `classifier/configs/phase2/p2_resnet18_frozen.json` (frozen backbone not in new plan) +- [x] `classifier/configs/phase2/p2_resnet34_224.json` (ResNet34 should be in Phase 3) +- [x] `classifier/configs/phase2/p2_resnet34.json` (ResNet34 should be in Phase 3) +- [x] `classifier/configs/phase2/p2_resnet50_frozen.json` (ResNet50 should be in Phase 3) +- [x] `classifier/configs/phase2/p2_resnet50.json` (ResNet50 should be in Phase 3) + +**Phase 3 old configs (4):** +- [x] `classifier/configs/phase3/p3_efficientnet_b2.json` (EfficientNet-B2 not in new plan, only B0) +- [x] `classifier/configs/phase3/p3_resnet18_facecrop_full.json` (ResNet18 full dataset should be Phase 4) +- [x] `classifier/configs/phase3/p3_resnet18_freqaug.json` (frequency augmentation not in new plan) +- [x] `classifier/configs/phase3/p3_vit_b16.json` (ViT not in new plan, replaced with ConvNeXt/MobileNet) +- Note: `p3_efficientnet_b0.json` - REMOVED (will be recreated after Phase2 with correct settings) + +**Source holdout (6):** +- [x] `classifier/configs/source_holdout/` (entire directory - 6 configs, source holdout not in new plan) + +**Ablation (3):** +- [x] `classifier/configs/ablation/` (entire directory - 3 configs, ablation studies not in new plan) + +**Configs to KEEP (3):** +- ✅ `classifier/configs/shared.json` +- ✅ `classifier/configs/phase1/p1_simplecnn_baseline.json` +- ✅ `classifier/configs/phase1/p1_resnet18_baseline.json` + +**Phase 2 alias configs removed (8):** +- [x] `classifier/configs/phase2/p2b_resnet18_128.json` (alias for p1_resnet18_baseline) +- [x] `classifier/configs/phase2/p2b_simplecnn_128.json` (alias for p1_simplecnn_baseline) +- [x] `classifier/configs/phase2/p2c_resnet18_nofacecrop.json` (alias for p2b_resnet18_224) +- [x] `classifier/configs/phase2/p2c_simplecnn_nofacecrop.json` (alias for p2b_simplecnn_224) +- [x] `classifier/configs/phase2/p2d_resnet18_noaug.json` (alias for p2b_resnet18_224) +- [x] `classifier/configs/phase2/p2d_simplecnn_noaug.json` (alias for p2b_simplecnn_224) +- [x] `classifier/configs/phase2/p2e_resnet18_facecrop_only.json` (alias for p2c_resnet18_facecrop) +- [x] `classifier/configs/phase2/p2e_simplecnn_facecrop_only.json` (alias for p2c_simplecnn_facecrop) + +Note: Comparison pairs (baseline vs treatment) are defined in the analysis notebook as a mapping dict, not as separate config files. + +--- + +## Phase 1: Architecture Baseline + +### 1.1 Experiment Configs +- [x] Create `classifier/configs/phase1/p1_simplecnn_baseline.json` + - backbone: simple_cnn + - cnn_preset: medium + - dropout: 0.0 + - epochs: 15 + - batch_size: 32 + - lr: 1e-4 (consistent with ResNet) + - weight_decay: 1e-4 + - image_size: 128 + - data_dir: data + - early_stopping_patience: 5 + - subsample: 0.2 + - face_crop: false + - augment: false + - seed: 42 + +- [x] Create `classifier/configs/phase1/p1_resnet18_baseline.json` + - backbone: resnet18 + - pretrained: true + - epochs: 15 + - batch_size: 32 + - lr: 1e-4 + - weight_decay: 1e-4 + - image_size: 128 + - data_dir: data + - early_stopping_patience: 5 + - subsample: 0.2 + - face_crop: false + - augment: false + - seed: 42 + +### 1.2 Code Updates +- [x] Implement 5-fold stratified group cross-validation by basename in training pipeline +- [x] Update `classifier/src/training/trainer.py` to support CV +- [x] Update `classifier/src/evaluation/evaluate.py` to support CV +- [x] Ensure all metrics report mean ± std and confidence intervals across folds + +### 1.3 Training +- [x] Train SimpleCNN with 5-fold stratified group CV (via pipeline: `python -m pipeline run classifier/configs/phase1/p1_simplecnn_baseline.json`) +- [x] Train ResNet18 with 5-fold stratified group CV (via pipeline: `python -m pipeline run classifier/configs/phase1/p1_resnet18_baseline.json`) +- [x] Save all checkpoints and metrics (pipeline automatically fetches outputs to classifier/outputs/) + +### 1.4 Analysis +- [x] Use `classifier/notebooks/03_phase1_analysis.ipynb` for Phase 1 analysis +- [x] Compare SimpleCNN vs ResNet18 performance +- [x] Overall metrics (AUC, Accuracy, F1) with mean ± std and confidence intervals +- [x] Per-source metrics (text2img, inpainting, insight) +- [x] Train/val/test performance curves +- [x] Confusion matrices +- [x] Statistical significance testing +- [x] Generate Grad-CAM visualizations (10-20 images per model) +- [x] Document conclusions: Which baseline is better and why + +--- + +## Phase 2: Preprocessing Impact + +### 2.1 Shortcut Analysis (2A) +- [x] Create `classifier/configs/phase2/p2a_t1_original.json` + - backbone: resnet18 + - image_size: 224 + - subsample: 0.2 + - seed: 42 + - augment: false + - normalization: imagenet + - data_dir: data + +- [x] Create `classifier/configs/phase2/p2a_t2_real_norm.json` + - extends: p2a_t1_original.json + - normalization: real_norm + - **Normalization**: Calculate mean/std from real training images only within each fold + +- [x] Geometry diagnostic was explored and then removed from the codebase (`src/evaluation/geometry.py` no longer exists): + - Current pipeline always square-crops before resize, reducing rectangle-vs-square shortcut risk. + - Shortcut analysis now relies on normalization and held-out-source evidence artifacts. + +- [ ] Train the 2 shortcut configs with 5-fold stratified group CV +- [ ] Compare results: + - Standard vs matched-geometry eval for `p2a_t1_original` (letterboxing impact) + - `p2a_t1_original` vs `p2a_t2_real_norm` (color distribution shortcut) + +- [x] Create `classifier/configs/phase2/p2a_t3_holdout_text2img.json` + - extends: p2a_t1_original.json + - train_sources: ["wiki", "inpainting", "insight"] + - eval_sources: ["wiki", "inpainting", "insight", "text2img"] + +- [x] Create `classifier/configs/phase2/p2a_t3_holdout_inpainting.json` + - extends: p2a_t1_original.json + - train_sources: ["wiki", "text2img", "insight"] + - eval_sources: ["wiki", "text2img", "insight", "inpainting"] + +- [x] Create `classifier/configs/phase2/p2a_t3_holdout_insight.json` + - extends: p2a_t1_original.json + - train_sources: ["wiki", "text2img", "inpainting"] + - eval_sources: ["wiki", "text2img", "inpainting", "insight"] + +- [ ] Train the 3 source holdout configs with 5-fold stratified group CV +- [ ] Compare held-out source performance vs in-source performance: + - Calculate AUC for held-out source (text2img, inpainting, insight) + - Compute Δ (in-source AUC - held-out AUC) + - If Δ > 0.05-0.10, model is learning source-specific features + +### 2.2 Resolution Impact (2B) +- [x] Create `classifier/configs/phase2/p2b_simplecnn_224.json` + - backbone: simple_cnn + - image_size: 224 + - subsample: 0.2 + - augment: false + - seed: 42 + - data_dir: data + +- [x] Create `classifier/configs/phase2/p2b_resnet18_224.json` + - backbone: resnet18 + - image_size: 224 + - subsample: 0.2 + - augment: false + - seed: 42 + - data_dir: data + +- [ ] Train both 224 configs with 5-fold stratified group CV +- [ ] Compare 128×128 vs 224×224 for each model + - 128 baseline is `p1_*_baseline` (comparison mapping in notebook) + +### 2.3 Facecrop Impact (2C) +- [x] Create `classifier/configs/phase2/p2c_simplecnn_facecrop.json` + - backbone: simple_cnn + - image_size: 224 + - subsample: 0.2 + - augment: false + - seed: 42 + - data_dir: cropped/classifier + +- [x] Create `classifier/configs/phase2/p2c_resnet18_facecrop.json` + - backbone: resnet18 + - image_size: 224 + - subsample: 0.2 + - augment: false + - seed: 42 + - data_dir: cropped/classifier + +- [ ] Train both facecrop configs with 5-fold stratified group CV +- [ ] Compare `p2b_resnet18_224` (no facecrop) vs `p2c_resnet18_facecrop` for each model + - No-facecrop baseline is `p2b_*_224` (comparison mapping in notebook) + +### 2.4 Augmentation Impact (2D) +- [x] Create `classifier/configs/phase2/p2d_simplecnn_aug.json` + - backbone: simple_cnn + - image_size: 224 + - subsample: 0.2 + - seed: 42 + - augment: {hflip_p: 0.5, rotation_degrees: 10, brightness: 0.2, contrast: 0.2, saturation: 0.1, hue: 0.02, grayscale_p: 0.1, blur_p: 0.1, erase_p: 0.2, noise_p: 0.3, noise_std: 0.04} + - data_dir: data + +- [x] Create `classifier/configs/phase2/p2d_resnet18_aug.json` + - backbone: resnet18 + - image_size: 224 + - subsample: 0.2 + - seed: 42 + - augment: {hflip_p: 0.5, rotation_degrees: 10, brightness: 0.2, contrast: 0.2, saturation: 0.1, hue: 0.02, grayscale_p: 0.1, blur_p: 0.1, erase_p: 0.2, noise_p: 0.3, noise_std: 0.04} + - data_dir: data + +- [ ] Train both augmentation configs with 5-fold stratified group CV +- [ ] Compare `p2b_resnet18_224` (no aug) vs `p2d_resnet18_aug` for each model + - No-aug baseline is `p2b_*_224` (comparison mapping in notebook) + +### 2.5 Augmentation + Facecrop (2E) +- [x] Create `classifier/configs/phase2/p2e_simplecnn_facecrop_aug.json` + - backbone: simple_cnn + - image_size: 224 + - subsample: 0.2 + - seed: 42 + - augment: {hflip_p: 0.5, rotation_degrees: 10, brightness: 0.2, contrast: 0.2, saturation: 0.1, hue: 0.02, grayscale_p: 0.1, blur_p: 0.1, erase_p: 0.2, noise_p: 0.3, noise_std: 0.04} + - data_dir: cropped/classifier + +- [x] Create `classifier/configs/phase2/p2e_resnet18_facecrop_aug.json` + - backbone: resnet18 + - image_size: 224 + - subsample: 0.2 + - seed: 42 + - augment: {hflip_p: 0.5, rotation_degrees: 10, brightness: 0.2, contrast: 0.2, saturation: 0.1, hue: 0.02, grayscale_p: 0.1, blur_p: 0.1, erase_p: 0.2, noise_p: 0.3, noise_std: 0.04} + - data_dir: cropped/classifier + +- [ ] Train both facecrop+aug configs with 5-fold stratified group CV +- [ ] Compare `p2c_resnet18_facecrop` (facecrop only) vs `p2e_resnet18_facecrop_aug` for each model + - Facecrop-only baseline is `p2c_*_facecrop` (comparison mapping in notebook) + +### 2.6 Phase 2 Analysis +- [ ] Use `classifier/notebooks/04_phase2_analysis.ipynb` for Phase 2 analysis +- [ ] For each experiment (2A-2E): + - [ ] Load 5-fold stratified group CV results (mean ± std and confidence intervals) + - [ ] Generate overall metrics (AUC, Accuracy, F1) + - [ ] Generate per-source metrics (text2img, inpainting, insight) + - [ ] Calculate train/val gap + - [ ] Calculate pairwise source AUC variance (wiki-vs-source AUC variance) + - [ ] Statistical significance testing vs baseline + - [ ] Generate comparison visualizations (bar charts, heatmaps) +- [ ] For 2C (Shortcut Analysis): + - [ ] Compare original-test vs alternative geometry evidence if reintroduced in a dedicated tool/notebook + - [ ] Compare ImageNet vs real-image-only normalization (color distribution shortcuts) + - [ ] Load source holdout results (3 configs) + - [ ] Calculate held-out source AUC vs in-source AUC for each holdout experiment + - [ ] Compute Δ (in-source AUC - held-out AUC) + - [ ] If Δ > 0.05-0.10, model is learning source-specific features + - [ ] Generate source holdout comparison table +- [ ] For each model/condition: + - [ ] Generate Grad-CAM visualizations (10-20 images per condition) + - [ ] Organize by experiment, prediction type, and source +- [ ] Answer key questions: + - [ ] Which preprocessing choices are statistically significant? + - [ ] Do certain sources benefit more from specific preprocessing? + - [ ] Is there an interaction between facecrop and augmentation? + - [ ] Are shortcuts being learned (resolution, color distribution)? + - [ ] Is the model learning source-specific features (source holdout)? + - [ ] Does augmentation remove shortcuts or over-regularize? + - [ ] What features do models focus on (based on Grad-CAM)? +- [ ] Generate comprehensive metrics comparison table +- [ ] Use paired fold-wise statistical tests for model comparisons, with bootstrap confidence intervals for key metrics where useful +- [ ] Provide evidence-based conclusions for each experiment +- [ ] Provide recommendations for Phase 3 (best preprocessing settings) + +--- + +## Phase 3: Extended Architecture Exploration + +### 3.1 Experiment Configs +Use the best preprocessing choices from Phase 2. The placeholders below assume 224×224, face crop enabled, and no augmentation unless Phase 2 results justify different settings. + +- [ ] Create `classifier/configs/phase3/p3_resnet34.json` + - backbone: resnet34 + - pretrained: true + - epochs: 15 + - batch_size: 32 + - lr: 1e-4 + - weight_decay: 1e-4 + - image_size: 224 + - face_crop: true (or best from Phase 2B/E) + - face_crop_margin: 0.6 + - augment: false (or best from Phase 2D/E) + - subsample: 0.2 + - seed: 42 + - early_stopping_patience: 5 + +- [ ] Create `classifier/configs/phase3/p3_resnet50.json` + - backbone: resnet50 + - pretrained: true + - epochs: 15 + - batch_size: 32 + - lr: 1e-4 + - weight_decay: 1e-4 + - image_size: 224 + - face_crop: true (or best from Phase 2B/E) + - face_crop_margin: 0.6 + - augment: false (or best from Phase 2D/E) + - subsample: 0.2 + - seed: 42 + - early_stopping_patience: 5 + +- [ ] Create `classifier/configs/phase3/p3_efficientnet_b0.json` + - backbone: efficientnet_b0 + - pretrained: true + - epochs: 15 + - batch_size: 32 + - lr: 1e-4 + - weight_decay: 1e-4 + - image_size: 224 + - face_crop: true (or best from Phase 2B/E) + - augment: false (or best from Phase 2D/E) + - subsample: 0.2 + - seed: 42 + - early_stopping_patience: 5 + +- [ ] Create `classifier/configs/phase3/p3_convnext_tiny.json` + - backbone: convnext_tiny + - pretrained: true + - epochs: 15 + - batch_size: 32 + - lr: 1e-4 + - weight_decay: 1e-4 + - image_size: 224 + - face_crop: true (or best from Phase 2B/E) + - augment: false (or best from Phase 2D/E) + - subsample: 0.2 + - seed: 42 + - early_stopping_patience: 5 + +- [ ] Create `classifier/configs/phase3/p3_mobilenetv3_small.json` + - backbone: mobilenetv3_small + - pretrained: true + - epochs: 15 + - batch_size: 32 + - lr: 1e-4 + - weight_decay: 1e-4 + - image_size: 224 + - face_crop: true (or best from Phase 2B/E) + - augment: false (or best from Phase 2D/E) + - subsample: 0.2 + - seed: 42 + - early_stopping_patience: 5 + +### 3.2 Model Implementation +- [ ] Implement ConvNeXt-Tiny in `classifier/src/models/convnext.py` +- [ ] Implement MobileNetV3-Small in `classifier/src/models/mobilenet.py` +- [ ] Register both models in `classifier/src/models/__init__.py` + +### 3.3 Training +- [ ] Train ResNet34 with 5-fold stratified group CV +- [ ] Train ResNet50 with 5-fold stratified group CV +- [ ] Train EfficientNet-B0 with 5-fold stratified group CV +- [ ] Train ConvNeXt-Tiny with 5-fold stratified group CV +- [ ] Train MobileNetV3-Small with 5-fold stratified group CV +- [ ] Save all checkpoints and metrics + +### 3.4 Analysis +- [ ] Use `classifier/notebooks/05_phase3_analysis.ipynb` for Phase 3 analysis +- [ ] Load 5-fold stratified group CV results for all models (mean ± std and confidence intervals) +- [ ] Generate overall metrics for each model +- [ ] Generate per-source metrics for each model +- [ ] Compare with Phase 1 baselines (ResNet18, SimpleCNN) +- [ ] Statistical significance testing vs baselines +- [ ] Generate Grad-CAM visualizations for top models (10-20 images each) +- [ ] Parameter count vs performance analysis +- [ ] Conclusions: Which architectures work best and why + +--- + +## Phase 4: Final Analysis on Best Models + +### 4.1 Select Top Models +- [ ] Based on Phases 1-3 results, select top 3-4 models +- [ ] Document selection criteria (e.g., top AUC, balanced performance, efficiency) + +### 4.2 Data Quantity Scaling (4A) +- [ ] For each selected model, create configs for different data sizes: + - [ ] `classifier/configs/phase4/p4a__20pct.json` (subsample: 0.2) + - [ ] `classifier/configs/phase4/p4a__50pct.json` (subsample: 0.5) + - [ ] `classifier/configs/phase4/p4a__100pct.json` (subsample: 1.0) +- [ ] In every 4A config, explicitly set the best Phase 2 preprocessing choices: + - image_size: best from Phase 2A + - face_crop: best from Phase 2B/E + - augment: best from Phase 2D/E +- [ ] Train each model with 5-fold stratified group CV at all three data sizes +- [ ] Compare how each model scales with more data + +### 4.3 Full Dataset Evaluation (4B) +- [ ] For each selected model, create config for full dataset: + - `classifier/configs/phase4/p4b__full.json` (subsample: 1.0) +- [ ] In every 4B config, explicitly set the same best Phase 2 preprocessing choices used in 4A +- [ ] Train each model on full dataset with 5-fold stratified group CV +- [ ] Generate detailed per-source metrics +- [ ] Generate Grad-CAM visualizations (10-20 images each) +- [ ] Perform hard example analysis (false positives/negatives) with visualizations +- [ ] Generate confidence distribution histograms +- [ ] Cross-validation results (mean ± std with confidence intervals) + +### 4.4 Analysis +- [ ] Use `classifier/notebooks/06_phase4_analysis.ipynb` for Phase 4 analysis +- [ ] Load data quantity scaling results +- [ ] Load full dataset evaluation results +- [ ] Generate comprehensive metrics comparison table +- [ ] Generate per-source metrics for final models +- [ ] Generate Grad-CAM galleries for final models +- [ ] Perform hard example analysis with visualizations +- [ ] Generate confidence distribution histograms +- [ ] Final model comparison and selection +- [ ] Conclusions and recommendations + +--- + +## Notebooks and Analysis + +This section is the consolidated notebook checklist for the notebooks referenced in the phase sections above; do not create duplicate notebooks for the same phase. + +### 5.1 Exploratory Data Analysis +- [x] Create `classifier/notebooks/01_eda.ipynb` +- [x] Dataset overview (real vs fake distribution, sources) +- [x] Image resolution/aspect ratio analysis (identify potential shortcuts) +- [x] Color distribution analysis (identify potential shortcuts) +- [x] Sample visualization from each source +- [x] Statistical summary of the dataset +- [x] Data quality checks + +### 5.2 Preprocessing Pipeline +- [x] Create `classifier/notebooks/02_preprocessing.ipynb` +- [x] Square crop and resize implementation demonstration +- [x] Face crop (MTCNN) demonstration and effectiveness analysis +- [x] Augmentation pipeline visualization (before/after examples) +- [x] Z-score normalization comparison (ImageNet vs real-image-only) +- [x] Data split verification (group-aware by basename, no overlap) +- [x] Preprocessing impact visualization + +### 5.3 Phase 1 Analysis +- [x] Create `classifier/notebooks/03_phase1_analysis.ipynb` +- [x] Load Phase 1 training results +- [x] Generate 5-fold stratified group CV results (mean ± std with confidence intervals) +- [x] Generate per-source metrics for each model +- [x] Generate train/val/test performance curves +- [x] Generate confusion matrices +- [x] Perform statistical significance testing between models +- [x] Generate Grad-CAM visualizations (10-20 images each) +- [x] Document conclusions: Which baseline is better and why + +### 5.4 Phase 2 Analysis +- [x] Create `classifier/notebooks/04_phase2_analysis.ipynb` +- [ ] Load all Phase 2 experiment results +- [ ] For each experiment (2A-2E): + - [ ] Generate 5-fold stratified group CV results (mean ± std with confidence intervals) + - [ ] Generate overall metrics + - [ ] Generate per-source metrics + - [ ] Calculate train/val gap + - [ ] Calculate pairwise source AUC variance (wiki-vs-source AUC variance) + - [ ] Perform statistical significance testing +- [ ] Generate comparison tables across all Phase 2 experiments +- [ ] Generate comparison visualizations (bar charts, heatmaps) +- [ ] For each model/condition, generate Grad-CAM visualizations (10-20 images) +- [ ] Organize visualizations by experiment, model, prediction type, and source +- [ ] Answer key analysis questions +- [ ] Generate comprehensive metrics comparison table +- [ ] Provide evidence-based conclusions for each experiment +- [ ] Provide recommendations for Phase 3 + +### 5.5 Phase 3 Analysis +- [ ] Create `classifier/notebooks/05_phase3_analysis.ipynb` +- [ ] Load Phase 3 training results +- [ ] Generate 5-fold stratified group CV results for each model (mean ± std with confidence intervals) +- [ ] Generate per-source metrics for each model +- [ ] Compare with Phase 1 baselines (ResNet18, SimpleCNN) +- [ ] Perform statistical significance testing vs baselines +- [ ] Generate Grad-CAM visualizations for top models (10-20 images each) +- [ ] Parameter count vs performance analysis +- [ ] Conclusions: Which architectures work best and why + +### 5.6 Phase 4 Analysis +- [ ] Create `classifier/notebooks/06_phase4_analysis.ipynb` +- [ ] Load data quantity scaling results +- [ ] Load full dataset evaluation results +- [ ] Generate comprehensive metrics comparison table +- [ ] Generate per-source metrics for final models +- [ ] Generate Grad-CAM galleries for final models +- [ ] Perform hard example analysis with visualizations +- [ ] Generate confidence distribution histograms +- [ ] Final model comparison and selection +- [ ] Conclusions and recommendations + +### 5.7 Grad-CAM Deep Dive (Optional) +- [ ] Create `classifier/notebooks/07_gradcam_deep_dive.ipynb` +- [ ] Load Grad-CAM results from all phases +- [ ] Comprehensive Grad-CAM analysis across all phases and models +- [ ] Feature visualization for different model architectures +- [ ] CNN vs EfficientNet vs ConvNeXt comparison +- [ ] What regions do different architectures focus on? +- [ ] Are there systematic differences in attention patterns? +- [ ] Evidence of shortcut removal analysis across phases +- [ ] Temporal analysis: does model attention change with different preprocessing? +- [ ] Generate visual explanations suitable for presentation + +--- + +## Code Implementation Tasks + +### Cross-Validation Implementation +- [x] Update `classifier/src/training/trainer.py` to support 5-fold stratified group CV by basename +- [x] Update `classifier/src/evaluation/evaluate.py` to support grouped CV splits +- [x] Implement metric aggregation across folds (mean ± std) +- [x] Ensure all metrics report confidence intervals +- [x] Reuse the same fold assignments for comparable experiments so paired statistical tests are valid +- [x] Rename `classifier/run_cv.py` to `classifier/run.py` (pipeline expects classifier/run.py) +- [x] Rename `classifier/run_cv.py` to `classifier/run.py` (pipeline expects classifier/run.py) + +### Model Implementations +- [ ] Implement ConvNeXt-Tiny in `classifier/src/models/convnext.py` +- [ ] Implement MobileNetV3-Small in `classifier/src/models/mobilenet.py` +- [ ] Register both models in `classifier/src/models/__init__.py` + +### Normalization Implementation +- [ ] Implement function to calculate mean/std from real training images only +- [ ] Update `classifier/src/preprocessing/pipeline.py` to support custom normalization stats +- [ ] Test ImageNet normalization vs real-image-only normalization + +### Evaluation Improvements +- [ ] Ensure test set uses `train=False` to disable augmentation +- [ ] Ensure diagnostic evaluation transforms never change the training data +- [ ] Verify CV fold assignments are identical across comparable experiments (same seed and basename grouping) +- [ ] Implement per-source metrics with detection rate and false alarm rate +- [ ] Implement pairwise AUC calculations +- [ ] Implement train/val gap calculations +- [ ] Implement pairwise source AUC variance calculations + +### Grad-CAM Improvements +- [ ] Ensure Grad-CAM works for all model types (CNN-based) +- [ ] Implement Grad-CAM for ConvNeXt +- [ ] Implement Grad-CAM for MobileNetV3 +- [ ] Organize Grad-CAM outputs by experiment, model, prediction type, source + +--- + +## Final Report Preparation +- [ ] Compile results from all phases +- [ ] Create presentation slides (PDF format) +- [ ] Brief description of deep learning solutions (discriminative + generative) +- [ ] Description of implementation steps and improvements + - [ ] Motivate choices for architecture, training strategy, etc. + - [ ] Show intermediate results + - [ ] Interpret results and what changed + - [ ] What was decided to improve results +- [ ] Classification performance results + - [ ] Experimental setup + - [ ] Train/val/test splits + - [ ] Performance metrics chosen +- [ ] Data generation performance results + - [ ] Experimental setup + - [ ] Performance metrics chosen +- [ ] Discussion and conclusions + - [ ] Comments on performance + - [ ] Final remarks +- [ ] Fill auto-evaluation file + +--- + +## Summary + +Total tasks: ~150+ + +This implementation plan covers: +- ✅ All 4 phases with comprehensive experiments +- ✅ 5-fold stratified group cross-validation for all experiments +- ✅ 7 analysis notebooks for robust validation +- ✅ Shortcut analysis (resolution/ratio + color distribution + source holdout) +- ✅ Source holdout experiments to detect source-specific feature learning +- ✅ Grad-CAM visualizations for explainability +- ✅ Statistical analysis with confidence intervals +- ✅ Per-source metrics for all experiments +- ✅ Data quantity scaling analysis +- ✅ Full dataset evaluation on best models +- ✅ Comprehensive documentation and reporting + +**Key Features:** +- Reproducible experiments with fixed seeds +- Stratified group CV keeps basename groups together while balancing class distribution +- Multiple shortcut analyses to prevent model cheating (resolution, color, source-specific) +- Source holdout experiments to test generalization to unseen sources +- Grad-CAM for explainability +- Statistical rigor with confidence intervals +- Per-source analysis to understand model behavior +- Clear progression from baselines -> preprocessing -> architectures -> final evaluation diff --git a/docs/classifier_plan.md b/docs/classifier_plan.md new file mode 100644 index 0000000..86c1739 --- /dev/null +++ b/docs/classifier_plan.md @@ -0,0 +1,449 @@ +# Classifier Reorganization Plan (v2) + +## Analysis of Current Phasing Issues + +Your current phasing has several problems that make it difficult to present a rigorous, explainable report: + +### Current Problems + +1. **Inconsistent comparison conditions**: + - SimpleCNN uses lr=1e-3, ResNet uses lr=1e-4 + - SimpleCNN trains 20 epochs (no ES), ResNet18 trains 15 epochs (with ES) + - Makes direct comparisons invalid + +2. **No cross-validation**: + - Only a single 80/10/10 split + - Results may be split-dependent + - No confidence intervals on metrics + +3. **Augmentation testing is incomplete**: + - Only tested on ResNet18 (Phase 3), not across architectures + - Performance drop could mean: (a) removing shortcuts (good) or (b) over-regularization (bad) + - No way to distinguish these cases + +4. **Facecrop impact not generalized**: + - Only ResNet18 tested with facecrop + - Don't know if EfficientNet or ViT benefit similarly + +5. **Full dataset only on one model**: + - Only ResNet18 tested on full dataset + - Don't know if data quantity helps all models equally + +6. **Test set integrity**: + - Need to verify test set uses original images (no augmentation, no preprocessing or minimal if really necessary) + - Need to ensure same train/val/test splits across all model comparisons + - Need central config for shared parameters across phases + +--- + +## Recommended Reorganization + +I suggest reorganizing into **4 phases** with clear, isolated variables. All phases use **5-fold stratified cross-validation** as standard practice to ensure balanced class distribution across folds. + +### Phase 1: Controlled Baseline Comparison + +**Goal**: Compare simple architectures under identical conditions to establish baselines + +**Fixed conditions for ALL models**: +- Data: 20% subsample +- Resolution: 128×128 +- No face crop +- No augmentation +- Optimizer: AdamW (lr=1e-4, weight_decay=1e-4) +- Scheduler: CosineAnnealingLR (T_max=15) +- Epochs: 15 with early stopping (patience=5) +- Batch size: 32 +- 5-fold stratified cross-validation (report mean ± std) + +| Model | Params | Expected AUC (mean ± std) | +|-------|--------|---------------------------| +| SimpleCNN | ~400k | ? | +| ResNet18 | ~11.7M | ? | + +**This gives you**: Clean, comparable baseline for simple architectures with confidence intervals + +**These same 2 models will be used in Phase 2 for preprocessing experiments.** + +--- + +### Phase 2: Preprocessing Impact (Same 2 Models from Phase 1) + +**Goal**: Test each preprocessing change on the SAME 2 models from Phase 1 + +**Experimental questions**: +- Does higher resolution improve performance? +- Does face cropping improve performance? +- Does augmentation improve or hurt performance? +- Does augmentation interact with face cropping? +- Is the model learning any shortcuts (e.g., resolution differences, aspect ratios, etc.)? + +#### 2A: Shortcut Analysis +**Goal**: Establish whether the baseline model exploits geometry, colour, or source-specific shortcuts before drawing any conclusions from preprocessing experiments. + +**Test 1: Resolution/Ratio Shortcuts (Letterboxing)** +- Train on original images (real=rectangular, fake=square); evaluate the same checkpoint under standard crop vs letterbox-padded real images to confirm geometry is or is not a discriminative cue +- Models: **ResNet18** +- Data: 20% subsample +- 5-fold stratified CV (balanced class distribution) +- Resolution: 224×224 +- No facecrop, no augmentation + +| Experiment | AUC | Train/Val Gap | Per-Source AUC Variance | +|------------|-----|---------------|-------------------------| +| Original images (standard eval) | ? | ? | ? | +| Matched geometry (letterboxed real images) | ? | ? | ? | + +**Test 2: Color Distribution Shortcuts** +- Compare: Train with ImageNet normalization stats vs real-image-only normalization stats +- Models: **ResNet18** +- Data: 20% subsample +- 5-fold stratified CV (balanced class distribution) +- Resolution: 224×224 +- No facecrop, no augmentation +- ImageNet stats: mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) +- Real-image stats: Calculate mean/std from real training images only, apply to all + +| Experiment | AUC | Train/Val Gap | Per-Source AUC Variance | +|------------|-----|---------------|-------------------------| +| ImageNet normalization | ? | ? | ? | +| Real-image-only normalization | ? | ? | ? | + +**Test 3: Source-Specific Feature Learning (Source Holdout)** +- Compare: Train on all sources vs train with one source held out +- Models: **ResNet18** +- Data: 20% subsample +- 5-fold stratified CV (balanced class distribution) +- Resolution: 224×224 +- No facecrop, no augmentation +- Hold out each fake source (text2img, inpainting, insight) separately + +| Experiment | Held-out Source | Train Sources | Held-out AUC | In-Source AUC | Δ (In-Source - Held-out) | +|------------|-----------------|---------------|--------------|---------------|--------------------------| +| Baseline | None | All | - | ? | - | +| Holdout text2img | text2img | wiki, inpainting, insight | ? | ? | ? | +| Holdout inpainting | inpainting | wiki, text2img, insight | ? | ? | ? | +| Holdout insight | insight | wiki, text2img, inpainting | ? | ? | ? | + +**Interpretation**: If held-out source AUC is significantly lower than in-source AUC (Δ > 0.05-0.10), the model is learning source-specific features. If AUC drop under matched geometry is significant, the model exploits aspect-ratio as a shortcut — this must be known before interpreting resolution or facecrop results. + +#### 2B: Resolution Impact (no facecrop, no augmentation) +- Test: 128×128 vs 224×224 +- Models: **SimpleCNN, ResNet18** +- Data: 20% subsample +- 5-fold stratified CV (balanced class distribution) + +| Model | 128×128 AUC | 224×224 AUC | Δ | +|-------|-------------|-------------|---| +| SimpleCNN | ? | ? | ? | +| ResNet18 | ? | ? | ? | + +#### 2C: Facecrop Impact (224×224, no augmentation) +- Test: No facecrop vs MTCNN facecrop +- Models: **SimpleCNN, ResNet18** +- Data: 20% subsample +- 5-fold stratified CV (balanced class distribution) + +| Model | No Facecrop AUC | Facecrop AUC | Δ | +|-------|-----------------|--------------|---| +| SimpleCNN | ? | ? | ? | +| ResNet18 | ? | ? | ? | + +#### 2D: Augmentation Impact (224×224, without facecrop) +- Test: No augmentation vs augmentation +- Models: **SimpleCNN, ResNet18** +- Data: 20% subsample +- 5-fold stratified CV (balanced class distribution) +- **Verify test set has no augmentation** (code inspection of `get_transforms(train=False, ...)`) +- **Analyze shortcut removal**: Compare train/val gaps and per-source AUC balance + +| Model | No Aug AUC | With Aug AUC | Δ | Train/Val Gap (No Aug) | Train/Val Gap (With Aug) | +|-------|------------|--------------|---|------------------------|--------------------------| +| SimpleCNN | ? | ? | ? | ? | ? | +| ResNet18 | ? | ? | ? | ? | ? | + +**Experimental question**: Does augmentation without facecrop improve or hurt performance? + +#### 2E: Augmentation + Facecrop Combined (224×224) +- Test: Facecrop only vs Facecrop + augmentation +- Models: **SimpleCNN, ResNet18** +- Data: 20% subsample +- 5-fold stratified CV (balanced class distribution) +- **Analyze shortcut removal**: Compare train/val gaps and per-source AUC balance + +| Model | Facecrop Only AUC | Facecrop + Aug AUC | Δ | Train/Val Gap (Only) | Train/Val Gap (With Aug) | +|-------|-------------------|--------------------|---|----------------------|--------------------------| +| SimpleCNN | ? | ? | ? | ? | ? | +| ResNet18 | ? | ? | ? | ? | ? | + +**Experimental question**: Does augmentation with facecrop improve or hurt performance compared to facecrop alone? + +**This gives you**: +- Isolated impact of each preprocessing choice on SimpleCNN and ResNet18 +- Verification that the model is not learning shortcuts +- Understanding of how augmentation interacts with face cropping +- Shortcut removal analysis through train/val gap and per-source AUC metrics + +--- + +### Phase 3: Extended Architecture Exploration + +**Goal**: Test additional architectures to find the best performing models + +**Fixed conditions** (based on best findings from Phase 2): +- Data: 20% subsample +- Resolution: Best from Phase 2A (likely 224×224) +- Facecrop: Best from Phase 2B/E (likely Yes) +- Augmentation: Best from Phase 2D/E (depends on experimental results) +- Optimizer: AdamW (lr=1e-4, weight_decay=1e-4) +- Scheduler: CosineAnnealingLR (T_max=15) +- Epochs: 15 with early stopping (patience=5) +- Batch size: 32 +- 5-fold stratified cross-validation (balanced class distribution) + +| Model | Params | Rationale | +|-------|--------|-----------| +| ResNet34 | ~21.8M | Deeper ResNet - test if more capacity helps | +| ResNet50 | ~25.6M | Even deeper with bottleneck blocks | +| EfficientNet-B0 | ~4.0M | Efficient compound scaling | +| ConvNeXt-Tiny | ~29M | Modern CNN, different architecture family | +| MobileNetV3-Small | ~2.5M | Lightweight efficiency comparison | + +**This gives you**: Extended architecture exploration to identify top-performing models for Phase 4 +- ResNet depth progression (18 -> 34 -> 50) +- Efficient architectures (EfficientNet-B0, MobileNetV3-Small) +- Modern CNN with different inductive bias (ConvNeXt-Tiny) +- Size range (2.5M to 29M parameters) + +--- + +### Phase 4: Final Analysis on Best Models + +**Goal**: Comprehensive evaluation of top-performing models from Phases 1-3 + +**Select top 3-4 models** based on Phase 1-3 results (e.g., ResNet18, ResNet34, EfficientNet-B0, ConvNeXt-Tiny) + +#### 4A: Data Quantity Scaling +Test how each best model scales with more data: + +| Model | 20% Data AUC | 50% Data AUC | 100% Data AUC | Δ (100% - 20%) | +|-------|--------------|--------------|---------------|----------------| +| Model 1 | ? | ? | ? | ? | +| Model 2 | ? | ? | ? | ? | +| Model 3 | ? | ? | ? | ? | +| Model 4 | ? | ? | ? | ? | + +**Fixed conditions**: +- Resolution: Best from Phase 2A +- Facecrop: Best from Phase 2B/E +- Augmentation: Best from Phase 2D/E +- 5-fold stratified cross-validation (balanced class distribution) + +#### 4B: Comprehensive Evaluation on Full Dataset +- Train best models on **full dataset** (100%) +- Detailed per-source metrics (text2img, inpainting, insight) +- Grad-CAM visualizations for explainability +- Hard example analysis (false positives/negatives) +- Confidence distribution analysis +- Cross-validation results (mean ± std) + +**This gives you**: Final, comprehensive evaluation of the best models with full explainability + +--- + +### Notebooks and Analysis + +**Goal**: Use Jupyter notebooks for comprehensive analysis and validation of each phase + +#### **01_eda.ipynb** - Exploratory Data Analysis +- Dataset overview (real vs fake distribution, sources) +- Image resolution/aspect ratio analysis (identify potential shortcuts) +- Color distribution analysis (identify potential shortcuts) +- Sample visualization from each source (text2img, inpainting, insight, wiki) +- Statistical summary of the dataset +- Data quality checks + +#### **02_preprocessing.ipynb** - Preprocessing Pipeline +- Square crop and resize implementation demonstration +- Face crop (MTCNN) demonstration and effectiveness analysis +- Augmentation pipeline visualization (before/after examples) +- Z-score normalization comparison (ImageNet vs real-image-only) +- Data split verification (group-aware by basename, no overlap) +- Preprocessing impact visualization + +#### **03_phase1_analysis.ipynb** - Phase 1: Architecture Baseline +- SimpleCNN vs ResNet18 comparison +- 5-fold stratified CV results (mean ± std with confidence intervals) +- Per-source metrics for each model (text2img, inpainting, insight) +- Train/val/test performance curves across epochs +- Confusion matrices for each model +- Statistical significance testing between models +- Grad-CAM visualizations for both models (10-20 images each) +- Conclusions: Which baseline is better and why + +#### **04_phase2_analysis.ipynb** - Phase 2: Preprocessing Impact +- **2A**: Resolution impact (128×128 vs 224×224) +- **2B**: Facecrop impact +- **2C**: Shortcut analysis (resolution/ratio + color distribution) +- **2D**: Augmentation impact (without facecrop) +- **2E**: Augmentation + facecrop combined + +For each experiment: +- 5-fold CV results (mean ± std with confidence intervals) +- Per-source metrics (text2img, inpainting, insight) +- Statistical significance testing vs baseline +- Comparison tables across all Phase 2 experiments +- Grad-CAM visualizations (10-20 images per condition) +- Analysis of train/val gap changes +- Analysis of per-source AUC variance changes + +**Overall Phase 2 conclusions**: +- Which preprocessing choices work best and why +- Are shortcuts being learned (resolution, color distribution)? +- Does augmentation remove shortcuts or over-regularize? +- Recommendations for Phase 3 (best preprocessing settings) + +#### **05_phase3_analysis.ipynb** - Phase 3: Extended Architecture Exploration +- ResNet34, ResNet50, EfficientNet-B0, ConvNeXt-Tiny, MobileNetV3-Small +- 5-fold CV results (mean ± std) for each model +- Per-source metrics for each model +- Comparison with Phase 1 baselines (ResNet18, SimpleCNN) +- Statistical significance testing vs baselines +- Grad-CAM visualizations for top models (10-20 images each) +- Parameter count vs performance analysis +- Conclusions: Which architectures work best and why + +#### **06_phase4_analysis.ipynb** - Phase 4: Final Analysis +- **4A**: Data quantity scaling (20%, 50%, 100%) on top 3-4 models +- **4B**: Comprehensive evaluation on full dataset +- Detailed per-source metrics for final models +- Grad-CAM visualizations for final models (10-20 images each) +- Hard example analysis (false positives/negatives) with visualizations +- Confidence distribution analysis (histograms) +- Cross-validation results (mean ± std with confidence intervals) +- Final model comparison and selection +- Conclusions and recommendations + +#### **07_gradcam_deep_dive.ipynb** - Grad-CAM Deep Dive (optional) +- Comprehensive Grad-CAM analysis across all phases and models +- Feature visualization for different model architectures (CNN vs EfficientNet vs ConvNeXt) +- Comparison of what different models focus on (face regions, backgrounds, artifacts) +- Evidence of shortcut removal (or lack thereof) across phases +- Temporal analysis: does model attention change with different preprocessing? +- Visual explanations suitable for presentation + +**Notebook requirements**: +- Each notebook should be self-contained and reproducible +- Include statistical analysis with confidence intervals +- Generate publication-ready visualizations +- Address all experimental questions and hypotheses +- Provide clear conclusions for each phase +- Use consistent formatting and style across all notebooks +- Save all results (metrics, figures, tables) for easy reference + +--- + +## Key Improvements + +### 1. Stratified Cross-Validation Implementation +```python +# Use sklearn's StratifiedKFold to ensure balanced class distribution across folds +from sklearn.model_selection import StratifiedKFold + +skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) +for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)): + # Train on train_idx, validate on val_idx + # Store metrics per fold +``` + +### 2. Augmentation Shortcut Removal Analysis (Phase 2D/2E) +Track these metrics with/without augmentation: + +| Metric | Without Aug | With Aug | Interpretation | +|--------|-------------|----------|----------------| +| Train AUC | 0.99 | 0.95 | ↓ Expected | +| Val AUC | 0.90 | 0.89 | ↓ Slight | +| **Train/Val Gap** | **0.09** | **0.06** | **↓ Good!** | +| text2img AUC | 0.98 | 0.96 | ↓ Slight | +| InsightFace AUC | 0.82 | 0.85 | **↑ Good!** | +| **AUC Variance** | **0.08** | **0.06** | **↓ Good!** | + +**Interpretation**: If train/val gap ↓ AND per-source AUC variance ↓, augmentation is removing shortcuts. + +### 3. Consistent Hyperparameters +- Same lr for all models (1e-4 is safe for pretrained, may need adjustment for SimpleCNN) +- Same epochs, ES patience, batch size +- Only vary the architecture being tested + +### 4. Test Set Integrity and Reproducibility + +**Test set from original source**: +- Verify that test set uses original images with minimal preprocessing +- Test set should use `get_transforms(train=False, ...)` to disable augmentation +- Ensure test images are not preprocessed in a way that could affect model comparisons + +**Reproducible splits across models**: +- The code already uses `cfg.get("seed", 42)` for reproducible splits +- All experiments should use the same seed (42) to ensure identical train/val/test splits +- This ensures fair comparison between models + +**Central config for shared parameters**: +- Create a central config file (`classifier/configs/shared.json`) with parameters common across all phases +- This includes: seed, val_ratio, test_ratio, batch_size, optimizer settings, etc. +- Individual experiment configs can override these defaults + +Example shared config: +```json +{ + "seed": 42, + "val_ratio": 0.1, + "test_ratio": 0.1, + "batch_size": 32, + "optimizer": { + "type": "adamw", + "lr": 1e-4, + "weight_decay": 1e-4 + }, + "scheduler": { + "type": "cosine_annealing", + "T_max": 15 + }, + "early_stopping_patience": 5, + "num_workers": 4 +} +``` + +--- + +## Summary Table for Report + +| Phase | Variable Tested | Models | Data | Resolution | Facecrop | Augment | CV | +|-------|-----------------|--------|------|------------|----------|---------|----| +| 1 | Architecture Baseline | SimpleCNN, ResNet18 | 20% | 128 | No | No | 5-fold stratified | +| 2A | Shortcut Analysis | ResNet18 | 20% | 224 | No | No | 5-fold stratified | +| 2A-Holdout | Source Holdout | ResNet18 | 20% | 224 | No | No | 5-fold stratified | +| 2B | Resolution | SimpleCNN, ResNet18 | 20% | 128/224 | No | No | 5-fold stratified | +| 2C | Facecrop | SimpleCNN, ResNet18 | 20% | 224 | ± | No | 5-fold stratified | +| 2D | Augmentation (no facecrop) | SimpleCNN, ResNet18 | 20% | 224 | No | ± | 5-fold stratified | +| 2E | Augmentation + Facecrop | SimpleCNN, ResNet18 | 20% | 224 | Yes | ± | 5-fold stratified | +| 3 | Extended Architectures | ResNet34, ResNet50, EffNet-B0, ConvNeXt-Tiny, MobileNetV3-Small | 20% | Best | Best | Best | 5-fold stratified | +| 4A | Data Quantity | Top 3-4 models | 20/50/100% | Best | Best | Best | 5-fold stratified | +| 4B | Final Evaluation | Top 3-4 models | 100% | Best | Best | Best | 5-fold stratified | + +This structure gives you: +- ✅ Identical comparison conditions across all phases +- ✅ 5-fold stratified cross-validation with confidence intervals (ensures balanced class distribution) +- ✅ Same 2 baseline models (SimpleCNN, ResNet18) tested across all preprocessing variations (Phase 2) +- ✅ Shortcut analysis to verify no bias (Phase 2C) +- ✅ Experimental questions about augmentation impact (Phase 2D/2E) +- ✅ Shortcut removal analysis via train/val gap and per-source AUC metrics +- ✅ Facecrop tested on baseline models (Phase 2B) +- ✅ Extended architecture exploration with proven models (Phase 3) +- ✅ Final comprehensive analysis on best models (Phase 4) +- ✅ Data quantity scaling on multiple best models (Phase 4A) +- ✅ Clear, isolated variables per phase +- ✅ Explainable progression for report + +**Key Experimental Questions in Phase 2**: +- **2C (Shortcut Analysis)**: Is the model learning any shortcuts (e.g., resolution differences, aspect ratios, etc.)? +- **2D (Augmentation without facecrop)**: Does augmentation improve or hurt performance? +- **2E (Augmentation with facecrop)**: Does augmentation improve or hurt performance compared to facecrop alone? diff --git a/docs/generator_plan.md b/docs/generator_plan.md new file mode 100644 index 0000000..f2f74ac --- /dev/null +++ b/docs/generator_plan.md @@ -0,0 +1,279 @@ +# Generator Plan + +The assignment rewards *iterative improvement with intermediate results*. This plan is structured around **model evolution as the spine**: each step has a *because* tied to an observed failure of the previous step. Pipeline ablations are honest but de-emphasized — they clear the table for the real story. + +--- + +## Standard Settings (Applied Everywhere Unless Noted) + +| Setting | Value | Reason | +|---------|-------|--------| +| Batch size | 64 | Consistent across experiments | +| Mixed precision | float16 + GradScaler | Speed | +| EMA decay | 0.9999 | Sample from EMA weights for GANs | +| FID evaluation | Every 25 epochs | Objective quality tracking | +| FID n_real | 5000 | Held-out real images | +| Default epochs | 100 | Best-of-each in Phase 4 retrains to 200 | + +Per-model optimizer/hyperparameters are listed inside each phase. + +--- + +## Phase 1 — Pipeline Selection *(quick, one figure)* + +**Goal**: Pick the data pipeline used for every downstream experiment. Don't dwell here — this is clearing the table, not the story. + +Fixed model: **DCGAN at 64×64** (cheapest baseline, fast iteration). One variable per experiment. + +| Experiment | Variable | Variants | Decision | +|---|---|---|---| +| 1A | Resolution | 64×64 vs 128×128 | Pick by FID — assumed transferable | +| 1B | Face crop + alignment | Full image vs MTCNN-aligned | Pick by FID — assumed transferable | +| 1C | Augmentation | H-flip only vs H-flip + rotation ±5° + mild color jitter | Per-family: validate inside Phase 2 for GAN, default to H-flip-only for VAE/DDPM | +| 1D | Combined dataset | Aligned only vs aligned + raw mixed | Pick by FID — expected to underperform aligned-only | + +**Caveat on transferability**: Phase 1 uses DCGAN as a proxy to choose the pipeline cheaply, then assumes the choice transfers to VAE and DDPM. Resolution and alignment are largely architecture-invariant (more pixels help everyone; structural consistency helps any spatial prior). Augmentation is *not* — diffusion models benefit less from aug, and MSE-VAE may even be hurt by color jitter. So 1C is treated as an **indicative** result for GANs and re-checked per family rather than baked in globally. + +**1D — combined dataset rationale**: Mixing aligned + raw doubles the variance the generator must model (face anywhere/any scale + face fixed) and dilutes the geometric prior. Hypothesis: combined < aligned-only. Cheap to test (one extra DCGAN run). Included for completeness so the report shows we considered it rather than asserting it. + +**MTCNN alignment** (one-time preprocessing, cached to disk): + +```python +from facenet_pytorch import MTCNN +from skimage.transform import SimilarityTransform, warp +import numpy as np +from PIL import Image + +mtcnn = MTCNN(keep_all=False, device='cuda') + +REF_LANDMARKS = np.array([ # reference positions in 128×128 + [38.0, 51.0], # left eye + [90.0, 51.0], # right eye + [64.0, 71.0], # nose + [45.0, 95.0], # left mouth + [83.0, 95.0], # right mouth +], dtype=np.float32) + +def align_face(img: Image.Image, out_size: int = 128): + boxes, _, landmarks = mtcnn.detect(img, landmarks=True) + if boxes is None: + return None + tform = SimilarityTransform() + tform.estimate(landmarks[0], REF_LANDMARKS) + aligned = warp(np.array(img), tform.inverse, + output_shape=(out_size, out_size), + order=3, preserve_range=True).astype(np.uint8) + return Image.fromarray(aligned) +``` + +**Augmentation philosophy** — only structure-preserving transforms (face-aligned crops are consistent by design): + +| Transform | Apply? | Reason | +|---|---|---| +| Horizontal flip | Yes, p=0.5 | Faces are symmetric | +| Rotation | Yes, ±5° | Residual head tilt post-alignment | +| Color jitter | Yes, mild | brightness ±0.1, contrast ±0.1, saturation ±0.05 | +| Translation | No | Breaks alignment | +| Vertical flip | No | Meaningless for faces | +| Strong blur / noise | No | Teaches the model to generate blur | + +**Output**: ~1 page in the report. Best pipeline carries forward to all phases. + +--- + +## Phase 2 — GAN Evolution *(main spine)* + +**Goal**: The richest narrative — each step has a clear *because* from observed failure. This is the strongest part of the storyline; keep it front and center. + +Best pipeline from Phase 1 fixed throughout. + +--- + +### 2.1 — DCGAN *(baseline)* + +Simplest GAN baseline. BCE loss, no gradient penalty. + +- Adam β1=0.5, β2=0.999, lr=2e-4 +- ngf=ndf=64, latent_dim=100 +- Resolution: 64×64 + +**Expected failure**: mode collapse, training instability, oscillating losses. Document these explicitly — they motivate 2.2. + +--- + +### 2.2 — WGAN-GP + +**Because**: DCGAN showed mode collapse and instability → Wasserstein loss + gradient penalty. + +- Adam β1=0.0, β2=0.9, lr_g=lr_d=1e-4 +- ngf=ndf=64, latent_dim=128, n_critic=2, gp_lambda=10 +- Resolution: 64×64 + +**Expected**: more stable training, better diversity. Likely remaining issues: texture artifacts, limited global coherence at higher resolution. + +--- + +### 2.3 — WGAN-GP + Spectral Norm + GroupNorm + Self-Attention + +**Because**: WGAN-GP showed texture artifacts / limited coherence → principled Lipschitz constraint and long-range dependencies. + +- Generator: BatchNorm → GroupNorm (no batch-size coupling) +- Critic: InstanceNorm → Spectral Normalization (principled Lipschitz constraint) +- Self-attention at 16×16 in both generator and critic + +```python +class SelfAttention(nn.Module): + def __init__(self, in_ch): + super().__init__() + mid = max(in_ch // 8, 1) + self.q = nn.Conv2d(in_ch, mid, 1, bias=False) + self.k = nn.Conv2d(in_ch, mid, 1, bias=False) + self.v = nn.Conv2d(in_ch, in_ch, 1, bias=False) + self.gamma = nn.Parameter(torch.zeros(1)) + self._mid = mid + + def forward(self, x): + b, c, h, w = x.shape + q = self.q(x).view(b, self._mid, -1).transpose(-2, -1) + k = self.k(x).view(b, self._mid, -1) + v = self.v(x).view(b, c, -1) + attn = torch.softmax(q @ k * self._mid ** -0.5, dim=-1) + return x + self.gamma * (v @ attn.transpose(-2, -1)).view(b, c, h, w) +``` + +--- + +### 2.4 — Scale to 128×128 *(if 2.3 looks coherent at 64×64)* + +**Because**: 2.3 produces coherent samples at 64×64 → does the architecture hold up at higher resolution? + +Same architecture as 2.3, retrained at 128×128. Add attention at 32×32 if memory permits. + +--- + +### Phase 2 Results + +| Step | Model | FID @ 100ep ↓ | Main observed failure | Motivates next step | +|---|---|---|---|---| +| 2.1 | DCGAN | ? | ? | ? | +| 2.2 | WGAN-GP | ? | ? | ? | +| 2.3 | WGAN-GP + SN + Attn | ? | ? | ? | +| 2.4 | + 128×128 | ? | ? | — | + +For each step: FID curve, 16-sample grid, one paragraph on what failed and why the next change addresses it. + +--- + +## Phase 3 — VAE Track + +**Goal**: A self-contained evolution story for the likelihood-based family. Every step motivated by a known limitation of the previous. + +| Step | Model | Because | +|---|---|---| +| 3.1 | Vanilla VAE (MSE) | Baseline — expect blur | +| 3.2 | + Perceptual loss (VGG) | MSE blur is fundamental to pixel-space reconstruction | +| 3.3 | + PatchGAN discriminator (VQGAN-lite) | Perceptual loss still lacks local texture realism | + +**3.1 — Vanilla VAE**: Adam lr=1e-3, latent_dim=256, β=1.0. Plain convolutional encoder/decoder, MSE reconstruction. + +**3.2 — Perceptual loss**: VGG-16 feature matching at relu1_2, relu2_2, relu3_3. + +**3.3 — Patch discriminator**: PatchGAN adversarial loss targeting local texture realism. + +``` +L = L_mse + λ_perc·L_vgg + λ_adv·L_adv + β·L_kl +λ_perc=0.1, λ_adv=0.1, β=0.0001 +``` + +**Decoder fix** (applied from 3.1 onward): replace `ConvTranspose2d` with `Upsample(nearest) + Conv2d` — eliminates checkerboard artifacts. + +| Step | Model | FID ↓ | Main observed failure | +|---|---|---|---| +| 3.1 | VAE MSE | ? | ? | +| 3.2 | + Perceptual | ? | ? | +| 3.3 | + PatchGAN | ? | ? | + +--- + +## Phase 4 — DDPM Track + +**Goal**: A self-contained evolution story for the diffusion family. + +| Step | Model | Because | +|---|---|---| +| 4.1 | DDPM linear + ε-pred | Baseline | +| 4.2 | + cosine schedule | Linear schedule wastes capacity at low timesteps | +| 4.3 | + v-prediction | ε-prediction is unstable across the full trajectory | +| 4.4 | + wider U-Net / more attention | If 4.3 still underfits | + +**4.1 — Baseline**: AdamW lr=2e-4, base_ch=128, T=1000, attention at 8×8 and 16×16. DDIM sampling, 100 steps. + +**4.2 — Cosine schedule**: + +```python +def cosine_betas(T: int, s: float = 0.008): + t = torch.linspace(0, T, T + 1) + f = torch.cos((t / T + s) / (1 + s) * math.pi / 2) ** 2 + alpha_bar = f / f[0] + betas = 1 - alpha_bar[1:] / alpha_bar[:-1] + return betas.clamp(0, 0.999) +``` + +**4.3 — v-prediction**: replaces ε target with `v = √ᾱ·ε − √(1−ᾱ)·x₀`. + +**4.4 — Wider U-Net**: base_ch 128 → 192, attention at 8×8, 16×16, 32×32. + +| Step | Model | FID ↓ | Main observed failure | +|---|---|---|---| +| 4.1 | DDPM linear + ε | ? | ? | +| 4.2 | + cosine | ? | ? | +| 4.3 | + v-pred | ? | ? | +| 4.4 | + wider | ? | ? | + +--- + +## Phase 5 — Cross-Family Comparison + +**Goal**: Side-by-side comparison of the best from each family (2.4, 3.3, 4.4) under identical conditions. + +Best-of-each retrained for 200 epochs at the same resolution and pipeline. + +### 5A — Quantitative + +| Model | FID ↓ | IS ↑ | LPIPS diversity ↑ | Params | Train time | +|---|---|---|---|:---:|:---:| +| Best GAN (2.4) | ? | ? | ? | ? | ? | +| Best VAE (3.3) | ? | ? | ? | ? | ? | +| Best DDPM (4.4) | ? | ? | ? | ? | ? | + +### 5B — Qualitative + +- **Visual grids**: 16-image sample grids per finalist +- **Progression**: epoch 10 → 50 → 100 → 200 side by side +- **Latent interpolation**: smooth transitions between two latent codes (GAN, VAE) +- **Diversity**: average pairwise LPIPS distance across 100 generated images +- **Failure modes**: worst-generated images per model + +--- + +## Compute Budget Notes + +Three families × multiple steps is a lot of runs. If compute is tight: + +- **Keep the GAN track complete** (2.1 → 2.4) — it carries the strongest narrative. +- **VAE and DDPM can drop the last step each** (stop at 3.2 and 4.3) without hurting the story. +- Phase 1 ablations can use 50 epochs instead of 100 — pipeline deltas show up early. + +--- + +## Summary + +| Phase | Purpose | Models | Output | +|---|---|---|---| +| 1 | Pipeline selection | DCGAN @ 64×64 across data variants | Best pipeline | +| 2 | GAN evolution (main spine) | DCGAN → WGAN-GP → +SN+Attn → 128×128 | GAN failure→fix narrative | +| 3 | VAE evolution | VAE → +Perceptual → +PatchGAN | VAE failure→fix narrative | +| 4 | DDPM evolution | DDPM → cosine → v-pred → wider | DDPM failure→fix narrative | +| 5 | Cross-family comparison | Best of each, retrained 200ep | Final FID + IS + qualitative | + +**The narrative**: baseline fails in a specific way → fix targets that failure → new failure emerges → next fix targets that → repeat per family → compare families on equal footing. diff --git a/generator/configs/phase1/_base_dcgan.json b/generator/configs/phase1/_base_dcgan.json new file mode 100644 index 0000000..e00283f --- /dev/null +++ b/generator/configs/phase1/_base_dcgan.json @@ -0,0 +1,13 @@ +{ + "model": "dcgan", + "image_size": 64, + "latent_dim": 100, + "ngf": 64, + "ndf": 64, + "epochs": 50, + "lr_g": 2e-4, + "lr_d": 2e-4, + "beta1": 0.5, + "beta2": 0.999, + "augment": false +} diff --git a/generator/configs/phase1/p1a_dcgan_128.json b/generator/configs/phase1/p1a_dcgan_128.json new file mode 100644 index 0000000..d747b68 --- /dev/null +++ b/generator/configs/phase1/p1a_dcgan_128.json @@ -0,0 +1,5 @@ +{ + "extends": "_base_dcgan.json", + "run_name": "p1a_dcgan_128", + "image_size": 128 +} diff --git a/generator/configs/phase1/p1a_dcgan_64.json b/generator/configs/phase1/p1a_dcgan_64.json new file mode 100644 index 0000000..0f3814d --- /dev/null +++ b/generator/configs/phase1/p1a_dcgan_64.json @@ -0,0 +1,5 @@ +{ + "extends": "_base_dcgan.json", + "run_name": "p1a_dcgan_64", + "image_size": 64 +} diff --git a/generator/configs/phase1/p1b_dcgan_aligned.json b/generator/configs/phase1/p1b_dcgan_aligned.json new file mode 100644 index 0000000..3070666 --- /dev/null +++ b/generator/configs/phase1/p1b_dcgan_aligned.json @@ -0,0 +1,5 @@ +{ + "extends": "_base_dcgan.json", + "run_name": "p1b_dcgan_aligned", + "data_dir": "cropped/generator" +} diff --git a/generator/configs/phase1/p1b_dcgan_full.json b/generator/configs/phase1/p1b_dcgan_full.json new file mode 100644 index 0000000..0b41b55 --- /dev/null +++ b/generator/configs/phase1/p1b_dcgan_full.json @@ -0,0 +1,5 @@ +{ + "extends": "_base_dcgan.json", + "run_name": "p1b_dcgan_full", + "data_dir": "data" +} diff --git a/generator/configs/phase1/p1c_dcgan_full_aug.json b/generator/configs/phase1/p1c_dcgan_full_aug.json new file mode 100644 index 0000000..4170ed1 --- /dev/null +++ b/generator/configs/phase1/p1c_dcgan_full_aug.json @@ -0,0 +1,6 @@ +{ + "extends": "_base_dcgan.json", + "run_name": "p1c_dcgan_full_aug", + "data_dir": "cropped/generator", + "augment": true +} diff --git a/generator/configs/phase1/p1c_dcgan_hflip.json b/generator/configs/phase1/p1c_dcgan_hflip.json new file mode 100644 index 0000000..33d6a5c --- /dev/null +++ b/generator/configs/phase1/p1c_dcgan_hflip.json @@ -0,0 +1,6 @@ +{ + "extends": "_base_dcgan.json", + "run_name": "p1c_dcgan_hflip", + "data_dir": "cropped/generator", + "augment": false +} diff --git a/generator/configs/phase1/p1d_dcgan_combined.json b/generator/configs/phase1/p1d_dcgan_combined.json new file mode 100644 index 0000000..d0c2a9e --- /dev/null +++ b/generator/configs/phase1/p1d_dcgan_combined.json @@ -0,0 +1,5 @@ +{ + "extends": "_base_dcgan.json", + "run_name": "p1d_dcgan_combined", + "data_dir": ["data", "cropped/generator"] +} diff --git a/generator/configs/shared.json b/generator/configs/shared.json new file mode 100644 index 0000000..4ba1c9b --- /dev/null +++ b/generator/configs/shared.json @@ -0,0 +1,10 @@ +{ + "batch_size": 64, + "ema_decay": 0.9999, + "data_dir": "data", + "sources": ["wiki"], + "subsample": 1.0, + "sample_interval": 10, + "fid_interval": 25, + "fid_n_real": 5000 +} diff --git a/generator/run.py b/generator/run.py new file mode 100644 index 0000000..bebed7f --- /dev/null +++ b/generator/run.py @@ -0,0 +1,81 @@ +""" +Train a generative model from a config file. + +Usage: + python run.py + python run.py --data-dir /path/to/data --output-root generator/outputs +""" +import argparse +import json +import sys +import warnings +from pathlib import Path + +# Allow running from project root (python3 generator/run.py ...) or from inside generator/ +_here = Path(__file__).resolve().parent +if str(_here) not in sys.path: + sys.path.insert(0, str(_here)) + +warnings.filterwarnings("ignore", message="Corrupt EXIF data", category=UserWarning) + + +def parse_args(argv=None): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("config_path") + parser.add_argument("--data-dir", default=None) + parser.add_argument("--output-root", default="generator/outputs") + parser.add_argument("--use-gpu", action="store_true", help="Accepted for pipeline compatibility (GPU auto-detected).") + return parser.parse_args(argv) + + +def main(config_path, *, data_dir_override=None, output_root="generator/outputs"): + import torch + from src.data import GeneratorDataset, get_transform + from src.models import get_model + from src.training import train_dcgan + from src.utils import load_config + + cfg = load_config(config_path) + + run_name = cfg["run_name"] + device = "cuda" if torch.cuda.is_available() else "cpu" + data_dir = data_dir_override or cfg.get("data_dir", "data") + output_root = Path(output_root) + models_dir = output_root / "models" + logs_dir = output_root / "logs" + + print(f"Run: {run_name}") + print(f"Config: {cfg}") + print(f"Device: {device} Data: {data_dir}") + + model, kind = get_model(cfg) + + augment = cfg.get("augment", True) + transform = get_transform(cfg.get("image_size", 128), augment=augment) + dataset = GeneratorDataset( + data_dir, + sources=cfg.get("sources", ["wiki"]), + subsample=cfg.get("subsample", 1.0), + transform=transform, + ) + print(f"Dataset size: {len(dataset)}") + + if kind == "dcgan": + generator, discriminator = model + history = train_dcgan( + generator, discriminator, dataset, cfg, + save_dir=models_dir, run_name=run_name, device=device, + ) + else: + raise NotImplementedError(f"kind={kind!r} not yet implemented in this phase") + + logs_dir.mkdir(parents=True, exist_ok=True) + out = logs_dir / f"{run_name}.json" + with open(out, "w") as f: + json.dump({"run_name": run_name, "config": cfg, "history": history}, f, indent=2) + print(f"\nSaved log to {out}") + + +if __name__ == "__main__": + args = parse_args(sys.argv[1:]) + main(args.config_path, data_dir_override=args.data_dir, output_root=args.output_root) diff --git a/generator/src/data/__init__.py b/generator/src/data/__init__.py new file mode 100644 index 0000000..a6053a1 --- /dev/null +++ b/generator/src/data/__init__.py @@ -0,0 +1,3 @@ +from src.data.dataset import GeneratorDataset, get_transform + +__all__ = ["GeneratorDataset", "get_transform"] diff --git a/generator/src/data/dataset.py b/generator/src/data/dataset.py new file mode 100644 index 0000000..5281296 --- /dev/null +++ b/generator/src/data/dataset.py @@ -0,0 +1,74 @@ +import random +from pathlib import Path + +from PIL import Image +import torchvision.transforms as T +from torch.utils.data import Dataset + + +class GeneratorDataset(Dataset): + """Unlabeled image dataset for generative model training. + + Loads images from source subdirectories and returns tensors only — + no labels, since generation is unsupervised. + """ + + def __init__(self, data_dir, sources=None, subsample=1.0, transform=None, seed=42): + self.transform = transform + self.samples = [] + + # Accept either a single root or a list of roots (used by 1D to mix + # raw + aligned crops in one dataset). + roots = [data_dir] if isinstance(data_dir, (str, Path)) else list(data_dir) + if sources is None: + sources = ["wiki"] + + for root in roots: + root = Path(root) + if not root.exists(): + raise FileNotFoundError(f"Dataset root not found: {root}") + for source in sources: + source_dir = root / source + if not source_dir.exists(): + raise FileNotFoundError(f"Missing source directory: {source_dir}") + for subdir in sorted(source_dir.iterdir()): + if subdir.is_dir(): + for img_path in sorted(subdir.glob("*.jpg")): + self.samples.append(img_path) + + if subsample < 1.0: + rng = random.Random(seed) + n = max(1, int(len(self.samples) * subsample)) + self.samples = rng.sample(self.samples, n) + + def __len__(self): + return len(self.samples) + + def __getitem__(self, idx): + img = Image.open(self.samples[idx]).convert("RGB") + if self.transform: + img = self.transform(img) + return img + + +def get_transform(image_size: int, augment: bool = False) -> T.Compose: + """Build transform for generator training. Output is in [-1, 1]. + + augment=True adds horizontal flip + mild rotation + mild color jitter. + Use augment=False for validation / FID real-image sets. + """ + ops = [ + T.Resize(image_size), + T.CenterCrop(image_size), + ] + if augment: + ops += [ + T.RandomHorizontalFlip(p=0.5), + T.RandomRotation(degrees=5, interpolation=T.InterpolationMode.BILINEAR), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.05), + ] + ops += [ + T.ToTensor(), + T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), # -> [-1, 1] + ] + return T.Compose(ops) diff --git a/generator/src/models/__init__.py b/generator/src/models/__init__.py new file mode 100644 index 0000000..6f25924 --- /dev/null +++ b/generator/src/models/__init__.py @@ -0,0 +1,26 @@ +from typing import Callable +import torch.nn as nn + +_REGISTRY: dict[str, tuple[Callable, str]] = {} + + +def register(name: str, builder: Callable, *, kind: str) -> None: + _REGISTRY[name] = (builder, kind) + + +def get_model(cfg: dict) -> tuple: + """Return (model_or_pair, kind). + + kind="dcgan" -> (generator, discriminator) + """ + name = cfg.get("model") + entry = _REGISTRY.get(name) + if entry is None: + available = ", ".join(sorted(_REGISTRY)) + raise ValueError(f"Unknown model: {name!r}. Available: {available}") + builder, kind = entry + return builder(cfg), kind + + +from src.models import dcgan # noqa: E402, F401 + diff --git a/generator/src/models/dcgan.py b/generator/src/models/dcgan.py new file mode 100644 index 0000000..84a951b --- /dev/null +++ b/generator/src/models/dcgan.py @@ -0,0 +1,115 @@ +"""Vanilla DCGAN (Radford et al., 2015). + +Used as the Phase 1 baseline for cheap pipeline ablations. Architecture is +intentionally minimal — BatchNorm in both networks, no spectral norm, no +attention, no gradient penalty. The whole point is to be the cheapest GAN +we can run, so 1A–1D pipeline deltas show up in FID quickly. + +Depth scales with image_size: each step doubles the spatial dimension, +starting from 4×4 after the first transposed conv. + 64 -> 5 layers (1 -> 4 -> 8 -> 16 -> 32 -> 64) + 128 -> 6 layers (1 -> 4 -> 8 -> 16 -> 32 -> 64 -> 128) +""" +import math + +import torch +import torch.nn as nn + +from src.models import register + + +def _init_weights(m): + classname = m.__class__.__name__ + if "Conv" in classname: + nn.init.normal_(m.weight, 0.0, 0.02) + elif "BatchNorm" in classname: + nn.init.normal_(m.weight, 1.0, 0.02) + nn.init.zeros_(m.bias) + + +def _n_upsamples(image_size: int) -> int: + """Number of 2x upsampling steps from 4x4 to image_size.""" + if image_size < 8 or image_size & (image_size - 1): + raise ValueError(f"image_size must be a power of two ≥ 8, got {image_size}") + return int(math.log2(image_size)) - 2 # 64 -> 4, 128 -> 5 + + +class DCGANGenerator(nn.Module): + """Maps (latent_dim x 1 x 1) -> (3 x image_size x image_size) in [-1, 1].""" + + def __init__(self, latent_dim: int = 100, ngf: int = 64, image_size: int = 64): + super().__init__() + n_up = _n_upsamples(image_size) # 64 -> 4 upsamples after the 1->4 init + max_mult = 2 ** (n_up - 1) # channel multiplier at the 4x4 stage + + layers: list[nn.Module] = [ + # 1x1 -> 4x4 + nn.ConvTranspose2d(latent_dim, ngf * max_mult, 4, 1, 0, bias=False), + nn.BatchNorm2d(ngf * max_mult), + nn.ReLU(inplace=True), + ] + # Each step halves the channel multiplier and doubles spatial size. + mult = max_mult + for _ in range(n_up - 1): + layers += [ + nn.ConvTranspose2d(ngf * mult, ngf * mult // 2, 4, 2, 1, bias=False), + nn.BatchNorm2d(ngf * mult // 2), + nn.ReLU(inplace=True), + ] + mult //= 2 + # Final layer to 3 channels, no BN, Tanh. + layers += [ + nn.ConvTranspose2d(ngf * mult, 3, 4, 2, 1, bias=False), + nn.Tanh(), + ] + self.net = nn.Sequential(*layers) + self.apply(_init_weights) + + def forward(self, z: torch.Tensor) -> torch.Tensor: + return self.net(z) + + +class DCGANDiscriminator(nn.Module): + """Maps (3 x image_size x image_size) -> scalar logit (no sigmoid).""" + + def __init__(self, ndf: int = 64, image_size: int = 64): + super().__init__() + n_down = _n_upsamples(image_size) + layers: list[nn.Module] = [ + # First layer: no BN + nn.Conv2d(3, ndf, 4, 2, 1, bias=False), + nn.LeakyReLU(0.2, inplace=True), + ] + mult = 1 + for _ in range(n_down - 1): + layers += [ + nn.Conv2d(ndf * mult, ndf * mult * 2, 4, 2, 1, bias=False), + nn.BatchNorm2d(ndf * mult * 2), + nn.LeakyReLU(0.2, inplace=True), + ] + mult *= 2 + # 4x4 -> 1x1, scalar logit + layers += [nn.Conv2d(ndf * mult, 1, 4, 1, 0, bias=False)] + self.net = nn.Sequential(*layers) + self.apply(_init_weights) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.net(x).view(x.size(0)) + + +def _build(cfg: dict): + image_size = cfg.get("image_size", 64) + return ( + DCGANGenerator( + latent_dim=cfg.get("latent_dim", 100), + ngf=cfg.get("ngf", 64), + image_size=image_size, + ), + DCGANDiscriminator( + ndf=cfg.get("ndf", 64), + image_size=image_size, + ), + ) + + +register("dcgan", _build, kind="dcgan") diff --git a/generator/src/models/wgan.py b/generator/src/models/wgan.py new file mode 100644 index 0000000..1084697 --- /dev/null +++ b/generator/src/models/wgan.py @@ -0,0 +1,133 @@ +"""WGAN-GP with spectral normalization, self-attention, and GroupNorm. + +Improvements over the original: +- Generator: BatchNorm -> GroupNorm (no batch-size coupling, stable with varied content) +- Critic: InstanceNorm -> spectral normalization (principled Lipschitz constraint) +- Both: one SAGAN-style self-attention block at the 32x32 feature map +- Larger capacity: ngf=128, ndf=128 +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from src.models import register + + +def _init_weights(m): + if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): + nn.init.normal_(m.weight, 0.0, 0.02) + elif isinstance(m, nn.GroupNorm) and m.weight is not None: + nn.init.normal_(m.weight, 1.0, 0.02) + nn.init.zeros_(m.bias) + + +class SelfAttention(nn.Module): + def __init__(self, in_ch: int): + super().__init__() + mid = max(in_ch // 8, 1) + self.q = nn.Conv2d(in_ch, mid, 1, bias=False) + self.k = nn.Conv2d(in_ch, mid, 1, bias=False) + self.v = nn.Conv2d(in_ch, in_ch, 1, bias=False) + self.gamma = nn.Parameter(torch.zeros(1)) + self._mid = mid + + def forward(self, x: torch.Tensor) -> torch.Tensor: + b, c, h, w = x.shape + q = self.q(x).view(b, self._mid, -1).transpose(-2, -1) # (b, hw, mid) + k = self.k(x).view(b, self._mid, -1) # (b, mid, hw) + v = self.v(x).view(b, c, -1) # (b, c, hw) + attn = torch.softmax(q @ k * self._mid ** -0.5, dim=-1) # (b, hw, hw) + out = (v @ attn.transpose(-2, -1)).view(b, c, h, w) + return x + self.gamma * out + + +def _sn(module): + """Apply spectral normalization to a conv layer.""" + return nn.utils.spectral_norm(module) + + +class WGANGenerator(nn.Module): + """Maps (latent_dim x 1 x 1) -> (3 x 128 x 128) in [-1, 1]. + + Upsampling path: 1 -> 4 -> 8 -> 16 (+attn) -> 32 -> 64 -> 128 + Self-attention sits at 16x16 (attention matrix 256x256 vs 1024x1024 at 32x32). + """ + + def __init__(self, latent_dim: int = 128, ngf: int = 64): + super().__init__() + self.net = nn.Sequential( + # 1x1 -> 4x4 + nn.ConvTranspose2d(latent_dim, ngf * 8, 4, 1, 0, bias=False), + nn.GroupNorm(8, ngf * 8), nn.ReLU(True), + # 4x4 -> 8x8 + nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), + nn.GroupNorm(8, ngf * 4), nn.ReLU(True), + # 8x8 -> 16x16 + nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), + nn.GroupNorm(8, ngf * 2), nn.ReLU(True), + ) + self.attn = SelfAttention(ngf * 2) # applied at 16x16 + self.out = nn.Sequential( + # 16x16 -> 32x32 + nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), + nn.GroupNorm(8, ngf), nn.ReLU(True), + # 32x32 -> 64x64 + nn.ConvTranspose2d(ngf, ngf // 2, 4, 2, 1, bias=False), + nn.GroupNorm(8, ngf // 2), nn.ReLU(True), + # 64x64 -> 128x128 + nn.ConvTranspose2d(ngf // 2, 3, 4, 2, 1, bias=False), + nn.Tanh(), + ) + self.apply(_init_weights) + + def forward(self, z: torch.Tensor) -> torch.Tensor: + h = self.net(z) + h = self.attn(h) + return self.out(h) + + +class WGANCritic(nn.Module): + """Critic (no sigmoid) for WGAN-GP. All conv layers are spectrally normalized. + + Downsampling path: 128 -> 64 -> 32 -> 16 (+attn) -> 8 -> 4 -> score + """ + + def __init__(self, ndf: int = 64): + super().__init__() + self.down = nn.Sequential( + # 128x128 -> 64x64 (no norm on first layer) + _sn(nn.Conv2d(3, ndf // 2, 4, 2, 1, bias=False)), + nn.LeakyReLU(0.2, True), + # 64x64 -> 32x32 + _sn(nn.Conv2d(ndf // 2, ndf, 4, 2, 1, bias=False)), + nn.LeakyReLU(0.2, True), + # 32x32 -> 16x16 + _sn(nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False)), + nn.LeakyReLU(0.2, True), + ) + self.attn = SelfAttention(ndf * 2) # applied at 16x16 + self.tail = nn.Sequential( + # 16x16 -> 8x8 + _sn(nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False)), + nn.LeakyReLU(0.2, True), + # 8x8 -> 4x4 + _sn(nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False)), + nn.LeakyReLU(0.2, True), + # 4x4 -> 1x1 + _sn(nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False)), + ) + self.apply(_init_weights) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + h = self.down(x) + h = self.attn(h) + return self.tail(h).view(x.size(0)) + + +def _build(cfg: dict): + return ( + WGANGenerator(latent_dim=cfg.get("latent_dim", 128), ngf=cfg.get("ngf", 128)), + WGANCritic(ndf=cfg.get("ndf", 128)), + ) + + +register("wgan", _build, kind="wgan") diff --git a/generator/src/training/__init__.py b/generator/src/training/__init__.py new file mode 100644 index 0000000..dc356a6 --- /dev/null +++ b/generator/src/training/__init__.py @@ -0,0 +1,3 @@ +from src.training.trainer import train_dcgan + +__all__ = ["train_dcgan"] diff --git a/generator/src/training/ema.py b/generator/src/training/ema.py new file mode 100644 index 0000000..4782787 --- /dev/null +++ b/generator/src/training/ema.py @@ -0,0 +1,22 @@ +import copy +import torch +import torch.nn as nn + + +class EMA: + """Exponential moving average of model weights. + + Maintains a shadow copy of the model. Call update() after each + optimizer step. Sample from ema.model, never from the training model. + """ + + def __init__(self, model: nn.Module, decay: float = 0.9999): + self.decay = decay + self.model = copy.deepcopy(model).eval() + for p in self.model.parameters(): + p.requires_grad_(False) + + @torch.no_grad() + def update(self, model: nn.Module) -> None: + for p_ema, p in zip(self.model.parameters(), model.parameters()): + p_ema.data.mul_(self.decay).add_(p.data, alpha=1.0 - self.decay) diff --git a/generator/src/training/fid.py b/generator/src/training/fid.py new file mode 100644 index 0000000..2d0c842 --- /dev/null +++ b/generator/src/training/fid.py @@ -0,0 +1,49 @@ +"""FID evaluation helper. + +Computes Fréchet Inception Distance between a fixed set of real images +and a batch of generated images. Real images are stored as a tensor on CPU +and moved to device only during evaluation — this avoids re-reading disk +every call while keeping GPU memory free between evaluations. +""" +import torch +from torch.utils.data import DataLoader +from torchmetrics.image.fid import FrechetInceptionDistance + + +class FIDEvaluator: + def __init__(self, real_dataset, n_real: int = 10_000, device: str = "cuda"): + self.device = torch.device(device if torch.cuda.is_available() else "cpu") + self.n_real = n_real + + # Cache real images as a CPU tensor ([-1, 1] range) + imgs_list = [] + loader = DataLoader(real_dataset, batch_size=256, shuffle=False, + num_workers=4, drop_last=False) + for batch in loader: + imgs_list.append(batch.cpu()) + if sum(x.size(0) for x in imgs_list) >= n_real: + break + real = torch.cat(imgs_list)[:n_real] + self._real = real # stored on CPU, shape (N, 3, H, W) in [-1, 1] + + @torch.no_grad() + def compute(self, fake_imgs: torch.Tensor) -> float: + """Compute FID score. + + fake_imgs: float tensor in [-1, 1], shape (N, 3, H, W). + N should be at least 2048 for a reliable score. + """ + fid = FrechetInceptionDistance(feature=2048, normalize=True).to(self.device) + + # Feed real images in batches + for i in range(0, self._real.size(0), 256): + batch = (self._real[i:i + 256] * 0.5 + 0.5).clamp(0, 1).to(self.device) + fid.update(batch, real=True) + + # Feed fake images in batches + fake = fake_imgs.cpu() + for i in range(0, fake.size(0), 256): + batch = (fake[i:i + 256] * 0.5 + 0.5).clamp(0, 1).to(self.device) + fid.update(batch, real=False) + + return float(fid.compute()) diff --git a/generator/src/training/trainer.py b/generator/src/training/trainer.py new file mode 100644 index 0000000..388d7c6 --- /dev/null +++ b/generator/src/training/trainer.py @@ -0,0 +1,166 @@ +import os +from pathlib import Path + +import torch +import torch.nn as nn +from torch.utils.data import DataLoader +from torchvision.utils import save_image +from tqdm import tqdm + +from src.training.ema import EMA +from src.training.fid import FIDEvaluator + +if hasattr(torch.amp, "GradScaler"): + _GradScaler = torch.amp.GradScaler + _autocast = torch.amp.autocast +else: + from torch.cuda.amp import GradScaler as _GS, autocast as _AC + _GradScaler = lambda device="", enabled=True, **kw: _GS(**kw) + _autocast = lambda device_type="", enabled=True, **kw: _AC(enabled=enabled, **kw) + + +def _save_samples(generator_ema, samples_dir: Path, epoch: int, *, latent_dim: int, device) -> None: + samples_dir.mkdir(parents=True, exist_ok=True) + with torch.no_grad(): + noise = torch.randn(16, latent_dim, 1, 1, device=device) + imgs = generator_ema.model(noise) # EMA model, [-1, 1] + imgs = (imgs.clamp(-1, 1) + 1.0) / 2.0 # -> [0, 1] + save_image(imgs, samples_dir / f"epoch_{epoch:04d}.png", nrow=4) + + +def train_dcgan( + generator, + discriminator, + train_dataset, + cfg: dict, + *, + save_dir, + run_name: str, + device: str = "cuda", +) -> dict: + """Vanilla DCGAN training loop with BCE loss (Radford et al., 2015). + + Used as the Phase 1 baseline for cheap pipeline ablations. No gradient + penalty, no n_critic, single G/D step per batch. + """ + device = torch.device(device if torch.cuda.is_available() else "cpu") + generator = generator.to(device) + discriminator = discriminator.to(device) + + n_g = sum(p.numel() for p in generator.parameters() if p.requires_grad) + n_d = sum(p.numel() for p in discriminator.parameters() if p.requires_grad) + print(f"Generator: {n_g:,} params Discriminator: {n_d:,} params") + + epochs = cfg["epochs"] + batch_size = cfg["batch_size"] + lr_g = cfg.get("lr_g", 2e-4) + lr_d = cfg.get("lr_d", 2e-4) + beta1 = cfg.get("beta1", 0.5) + beta2 = cfg.get("beta2", 0.999) + latent_dim = cfg.get("latent_dim", 100) + ema_decay = cfg.get("ema_decay", 0.9999) + sample_interval = cfg.get("sample_interval", 10) + fid_interval = cfg.get("fid_interval", 25) + fid_n_real = cfg.get("fid_n_real", 5000) + + loader = DataLoader( + train_dataset, batch_size=batch_size, shuffle=True, + num_workers=min(4, os.cpu_count() or 1), + pin_memory=(device.type == "cuda"), drop_last=True, + ) + opt_g = torch.optim.Adam(generator.parameters(), lr=lr_g, betas=(beta1, beta2)) + opt_d = torch.optim.Adam(discriminator.parameters(), lr=lr_d, betas=(beta1, beta2)) + bce = nn.BCEWithLogitsLoss() + + use_amp = device.type == "cuda" + scaler_g = _GradScaler("cuda", enabled=use_amp) + scaler_d = _GradScaler("cuda", enabled=use_amp) + + ema = EMA(generator, decay=ema_decay) + + save_dir = Path(save_dir) + save_dir.mkdir(parents=True, exist_ok=True) + samples_dir = save_dir.parent / "samples" / run_name + + fid_eval = FIDEvaluator(train_dataset, n_real=fid_n_real, device=str(device)) + + history = {"g_loss": [], "d_loss": [], "d_real": [], "d_fake": [], "fid": {}} + best_fid = float("inf") + print(f"Device: {device} AMP: {use_amp} Batches/epoch: {len(loader)}") + + for epoch in range(1, epochs + 1): + generator.train() + discriminator.train() + g_sum = d_sum = real_sum = fake_sum = 0.0 + n_batches = 0 + + for imgs in tqdm(loader, desc=f"Epoch {epoch}/{epochs}", leave=False): + imgs = imgs.to(device) + bsz = imgs.size(0) + real_labels = torch.ones(bsz, device=device) + fake_labels = torch.zeros(bsz, device=device) + + # ── Discriminator step ──────────────────────────────────────── + noise = torch.randn(bsz, latent_dim, 1, 1, device=device) + with _autocast("cuda", enabled=use_amp): + fake = generator(noise).detach() + d_real = discriminator(imgs) + d_fake = discriminator(fake) + d_loss = bce(d_real, real_labels) + bce(d_fake, fake_labels) + opt_d.zero_grad() + scaler_d.scale(d_loss).backward() + scaler_d.step(opt_d) + scaler_d.update() + + # ── Generator step ──────────────────────────────────────────── + noise = torch.randn(bsz, latent_dim, 1, 1, device=device) + with _autocast("cuda", enabled=use_amp): + g_loss = bce(discriminator(generator(noise)), real_labels) + opt_g.zero_grad() + scaler_g.scale(g_loss).backward() + scaler_g.step(opt_g) + scaler_g.update() + ema.update(generator) + + g_sum += g_loss.item() + d_sum += d_loss.item() + real_sum += d_real.mean().item() + fake_sum += d_fake.mean().item() + n_batches += 1 + + avg_g = g_sum / n_batches + avg_d = d_sum / n_batches + avg_r = real_sum / n_batches + avg_f = fake_sum / n_batches + history["g_loss"].append(avg_g) + history["d_loss"].append(avg_d) + history["d_real"].append(avg_r) + history["d_fake"].append(avg_f) + print( + f"[{epoch:03d}/{epochs}] " + f"G: {avg_g:.4f} D: {avg_d:.4f} D(real): {avg_r:.4f} D(fake): {avg_f:.4f}" + ) + + if epoch % sample_interval == 0: + _save_samples(ema, samples_dir, epoch, latent_dim=latent_dim, device=device) + + if epoch % fid_interval == 0: + generator.eval() + with torch.no_grad(): + fake_imgs = torch.cat([ + generator(torch.randn(64, latent_dim, 1, 1, device=device)) + for _ in range(fid_n_real // 64 + 1) + ])[:fid_n_real] + fid_score = fid_eval.compute(fake_imgs) + history["fid"][epoch] = fid_score + print(f" FID @ epoch {epoch}: {fid_score:.2f}") + + if fid_score < best_fid: + best_fid = fid_score + torch.save(generator.state_dict(), save_dir / f"{run_name}_best_g.pt") + torch.save(ema.model.state_dict(), save_dir / f"{run_name}_best_ema.pt") + + torch.save(generator.state_dict(), save_dir / f"{run_name}_final_g.pt") + torch.save(discriminator.state_dict(), save_dir / f"{run_name}_final_d.pt") + torch.save(ema.model.state_dict(), save_dir / f"{run_name}_final_ema.pt") + return history diff --git a/generator/src/utils/__init__.py b/generator/src/utils/__init__.py new file mode 100644 index 0000000..f8b9359 --- /dev/null +++ b/generator/src/utils/__init__.py @@ -0,0 +1,3 @@ +from src.utils.config import load_config + +__all__ = ["load_config"] diff --git a/generator/src/utils/config.py b/generator/src/utils/config.py new file mode 100644 index 0000000..c46ccdd --- /dev/null +++ b/generator/src/utils/config.py @@ -0,0 +1,58 @@ +import json +from pathlib import Path +from typing import Any, Dict, Optional + + +# Resolves the extends chain first, then overlays shared.json underneath so +# experiment-level keys always win over shared defaults. +def load_config(config_path: str, shared_path: Optional[str] = None) -> Dict[str, Any]: + config_path = Path(config_path) + cfg = _load_extends(config_path) + + if shared_path is None: + shared_path = config_path.parent.parent / "shared.json" + else: + shared_path = Path(shared_path) + + if shared_path.exists(): + with open(shared_path) as f: + shared_cfg = json.load(f) + cfg = _deep_merge(shared_cfg, cfg) + + return cfg + + +# Pops the "extends" key and recursively merges the parent config underneath; +# the seen set catches circular inheritance before it recurses infinitely. +def _load_extends(config_path: Path, seen: Optional[set[Path]] = None) -> Dict[str, Any]: + if seen is None: + seen = set() + resolved_path = config_path.resolve() + if resolved_path in seen: + chain = " -> ".join(str(p) for p in [*seen, resolved_path]) + raise ValueError(f"Circular config inheritance detected: {chain}") + seen.add(resolved_path) + + with open(config_path) as f: + cfg = json.load(f) + + base_ref = cfg.pop("extends", None) + if not base_ref: + seen.remove(resolved_path) + return cfg + + base_path = (config_path.parent / base_ref).resolve() + base_cfg = _load_extends(base_path, seen=seen) + seen.remove(resolved_path) + return _deep_merge(base_cfg, cfg) + + +# Override always wins; nested dicts are merged recursively rather than replaced. +def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: + result = base.copy() + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = value + return result diff --git a/generator/tools/facecrop.py b/generator/tools/facecrop.py new file mode 100644 index 0000000..0773e01 --- /dev/null +++ b/generator/tools/facecrop.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +""" +Pre-align face images using MTCNN landmarks + similarity transform. + +This is the generator-side counterpart to classifier/tools/facecrop.py. +Difference: classifier uses bbox crop+resize; the generator wants landmark-based +alignment so the eyes, nose, and mouth land at fixed pixel positions in every +training image — structurally consistent training data for the generator. + +Output mirrors the source layout exactly: + data/wiki/14/37591914.jpg -> cropped/generator/wiki/14/37591914.jpg + +Resumable: already-aligned images are skipped by default. + +Usage: + python generator/tools/facecrop.py + python generator/tools/facecrop.py --data-dir data --output-dir cropped/generator + python generator/tools/facecrop.py --sources wiki --device cpu + python generator/tools/facecrop.py --size 128 + python generator/tools/facecrop.py --no-skip-existing # reprocess everything +""" +import argparse +import sys +import warnings +from pathlib import Path + +warnings.filterwarnings("ignore", message=".*weights_only.*", category=FutureWarning) + +ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(ROOT)) + +# Generator trains on real images only (wiki). The other sources are AI-generated +# and aren't used as training targets for the generator, so we don't align them +# by default. Pass --sources to override. +SOURCES = ["wiki"] +ALL_SOURCES = ["wiki", "inpainting", "text2img", "insight"] + +# Reference landmark positions for a 128px aligned face. +# Source: standard FFHQ-style alignment template (eyes at y=51, nose at y=71, mouth at y=95). +# Scaled at runtime to match --size. +REF_LANDMARKS_128 = [ + (38.0, 51.0), # left eye + (90.0, 51.0), # right eye + (64.0, 71.0), # nose tip + (45.0, 95.0), # left mouth + (83.0, 95.0), # right mouth +] + +_DETECTORS: dict[str, object] = {} + + +def parse_args(): + p = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--data-dir", default="data", help="Source dataset root (default: data)") + p.add_argument("--output-dir", default="cropped/generator", help="Output root (default: cropped/generator)") + p.add_argument("--size", type=int, default=128, help="Output image size in px, square (default: 128)") + p.add_argument("--device", default=None, help="'cpu' or 'cuda'. Default: auto-detect") + p.add_argument("--sources", nargs="+", default=None, metavar="SOURCE", + help=f"Sources to process. Default: {', '.join(SOURCES)} (real images only). " + f"All available: {', '.join(ALL_SOURCES)}") + p.add_argument("--skip-existing", dest="skip_existing", action="store_true", default=True, + help="Skip images already present in output-dir (default: on, resumable)") + p.add_argument("--no-skip-existing", dest="skip_existing", action="store_false", + help="Re-process all images even if already aligned") + return p.parse_args() + + +# ── alignment helpers ───────────────────────────────────────────────────────── + +def _ref_landmarks(size: int): + """Reference landmarks scaled from the 128px template to `size`.""" + import numpy as np + scale = size / 128.0 + return np.asarray( + [(x * scale, y * scale) for x, y in REF_LANDMARKS_128], + dtype=np.float32, + ) + + +def _align_from_landmarks(img, landmarks, size: int): + """Apply similarity transform so detected landmarks map to reference positions.""" + import numpy as np + from PIL import Image as PILImage + from skimage.transform import SimilarityTransform, warp + + src = np.asarray(landmarks, dtype=np.float32) # 5x2 detected + dst = _ref_landmarks(size) # 5x2 reference + + try: + tform = SimilarityTransform.from_estimate(src, dst) + except Exception: + return None + aligned = warp( + np.asarray(img), + tform.inverse, + output_shape=(size, size), + order=3, + preserve_range=True, + ).astype(np.uint8) + return PILImage.fromarray(aligned) + + +def _center_crop(img, size: int): + from PIL import Image as PILImage + w, h = img.size + side = min(w, h) + left, top = (w - side) // 2, (h - side) // 2 + return img.crop((left, top, left + side, top + side)).resize((size, size), PILImage.BILINEAR) + + +def _get_detectors(device: str): + """Return (standard, relaxed) MTCNN detectors with landmarks enabled.""" + if device in _DETECTORS: + return _DETECTORS[device] + + from facenet_pytorch import MTCNN + + standard = MTCNN( + keep_all=False, select_largest=True, + min_face_size=15, + device=device, post_process=False, + ) + relaxed = MTCNN( + keep_all=False, select_largest=True, + min_face_size=10, + thresholds=[0.5, 0.6, 0.6], + device=device, post_process=False, + ) + _DETECTORS[device] = (standard, relaxed) + return standard, relaxed + + +# ── main ────────────────────────────────────────────────────────────────────── + +def main(): + args = parse_args() + + import torch + from PIL import Image + from tqdm import tqdm + + data_dir = Path(args.data_dir) + output_dir = Path(args.output_dir) + device = args.device or ("cuda" if torch.cuda.is_available() else "cpu") + sources = args.sources or SOURCES + + if not data_dir.exists(): + print(f"Error: data directory not found: {data_dir}") + sys.exit(1) + + for src in sources: + if not (data_dir / src).exists(): + print(f"Error: source directory not found: {data_dir / src}") + sys.exit(1) + + try: + import facenet_pytorch # noqa: F401 + import skimage # noqa: F401 + except ImportError as exc: + print(f"Error: missing dependency ({exc}).") + print(" Run: pip install facenet-pytorch scikit-image") + sys.exit(1) + + print(f"Data dir: {data_dir.resolve()}") + print(f"Output dir: {output_dir.resolve()}") + print(f"Sources: {', '.join(sources)}") + print(f"Device: {device}") + print(f"Size: {args.size}px") + print(f"Skip exist: {args.skip_existing}") + + standard, relaxed = _get_detectors(device) + + all_paths: list[Path] = [] + for src in sources: + for subdir in sorted((data_dir / src).iterdir()): + if subdir.is_dir(): + all_paths.extend(sorted(subdir.glob("*.jpg"))) + + print(f"\nTotal images: {len(all_paths):,}\n") + + n_processed = n_skipped = n_error = 0 + src_stats: dict[str, dict] = { + s: {"aligned": 0, "retry": 0, "fallback": 0} for s in sources + } + + for img_path in tqdm(all_paths, desc="Aligning", unit="img"): + rel = img_path.relative_to(data_dir) + out_path = output_dir / rel + src_name = img_path.parent.parent.name + + if args.skip_existing and out_path.exists(): + n_skipped += 1 + continue + + out_path.parent.mkdir(parents=True, exist_ok=True) + + try: + img = Image.open(img_path).convert("RGB") + except Exception as exc: + tqdm.write(f"[WARN] Cannot open {img_path.name}: {exc}") + n_error += 1 + continue + + aligned = None + try: + # Pass 1: detect landmarks on the original image + _, _, landmarks = standard.detect(img, landmarks=True) + if landmarks is not None and len(landmarks) > 0: + aligned = _align_from_landmarks(img, landmarks[0], args.size) + if aligned is not None: + src_stats[src_name]["aligned"] += 1 + + # Pass 2: upscale 2x and retry with relaxed thresholds + if aligned is None: + w, h = img.size + img2x = img.resize((w * 2, h * 2), Image.BILINEAR) + _, _, landmarks2 = relaxed.detect(img2x, landmarks=True) + if landmarks2 is not None and len(landmarks2) > 0: + lm_orig = [(x / 2, y / 2) for x, y in landmarks2[0]] + aligned = _align_from_landmarks(img, lm_orig, args.size) + if aligned is not None: + src_stats[src_name]["retry"] += 1 + + if aligned is None: + aligned = _center_crop(img, args.size) + src_stats[src_name]["fallback"] += 1 + except Exception as exc: + tqdm.write(f"[WARN] Detection failed for {img_path.name}: {exc}") + aligned = _center_crop(img, args.size) + src_stats[src_name]["fallback"] += 1 + + aligned.save(out_path, quality=95) + n_processed += 1 + + total = n_processed + n_skipped + n_aligned = sum(s["aligned"] for s in src_stats.values()) + n_retry = sum(s["retry"] for s in src_stats.values()) + n_fallback = sum(s["fallback"] for s in src_stats.values()) + denom = max(n_processed, 1) + + print(f"\n{'─' * 55}") + print(f" Total images : {total:>8,}") + print(f" Processed : {n_processed:>8,}") + print(f" Skipped (existed) : {n_skipped:>8,}") + print(f" Errors : {n_error:>8,}") + print(f" Pass-1 aligned : {n_aligned:>8,} ({n_aligned / denom:.1%})") + print(f" Pass-2 aligned : {n_retry:>8,} ({n_retry / denom:.1%}) ← 2x upscale retry") + print(f" Centre fallback : {n_fallback:>8,} ({n_fallback / denom:.1%})") + print() + print(f" {'Source':<12} {'pass-1':>8} {'pass-2':>8} {'fallback':>8} {'fallback%':>10}") + print(f" {'─'*12} {'─'*8} {'─'*8} {'─'*8} {'─'*10}") + for src in sources: + s = src_stats[src] + total_src = s["aligned"] + s["retry"] + s["fallback"] + fb_pct = s["fallback"] / max(total_src, 1) + print(f" {src:<12} {s['aligned']:>8,} {s['retry']:>8,} {s['fallback']:>8,} {fb_pct:>9.1%}") + print(f"{'─' * 55}") + print(f" Output: {output_dir.resolve()}") + print() + print("Next step — point your config at the aligned dataset:") + print(f' "data_dir": "{output_dir}"') + + +if __name__ == "__main__": + main() diff --git a/pipeline/README.md b/pipeline/README.md new file mode 100644 index 0000000..696a0fe --- /dev/null +++ b/pipeline/README.md @@ -0,0 +1,164 @@ +# Pipeline + +Orchestrates ephemeral Vast.ai GPU instances: searches for an offer, creates the instance, syncs the project, trains, downloads `outputs/`, and destroys the instance automatically. Generator runs also rsync `generator/outputs/` every 50 epochs while training is still running. + +## One-time setup + +Create `pipeline/.env`: + +```dotenv +VAST_API_KEY=your-vast-api-key +VAST_SSH_PRIVATE_KEY=/home/you/.ssh/id_ed25519 # optional, this is the default +``` + +The matching `.pub` file must exist alongside the private key. The pipeline registers it with Vast.ai automatically if it isn't there yet. + +## Commands + +### `run` — train on a remote GPU and fetch results + +``` +python -m pipeline run [options] +``` + +Accepts one or more config paths, or a single directory (all `*.json` inside, sorted). Duplicate configs (identical training settings after resolving `extends` and `shared.json`) are skipped automatically. + +| Flag | Default | Description | +|------|---------|-------------| +| `configs` | *(required)* | One or more config paths, or a directory of JSON configs | +| `--download-data` | off | Download the DFF dataset via HuggingFace on the remote before training | +| `--send-cropped` | off | Rsync local `cropped/{classifier,generator}/` to remote (picks subdirectory based on config) | +| `--select-offer` | off | Interactively browse and pick the GPU offer | +| `--sort` | config | Ranking mode: `price`, `performance`, or `dlp_per_dollar` | +| `--region TEXT` | any | Filter by region, e.g. `europe`, `Portugal`, `US` | +| `--price FLOAT` | config | Max hourly price cap in USD | +| `--dry-run` | off | Print matching offers without creating an instance | +| `--keep-on-failure` | off | Do not destroy the instance if training fails | +| `--no-gpu` | off | Disable GPU training on remote (use CPU instead) | +| `--select-template` | off | Interactively choose a Vast.ai Docker template | +| `--template HASH` | config | Use a specific template hash ID | +| `--pipeline-config PATH` | none | JSON file that overrides `pipeline/defaults/vast.json` | + +**Examples:** + +```bash +# Cheapest available RTX 3090 in Europe, download data on remote +python -m pipeline run configs/resnet18.json --region europe --download-data + +# Browse offers interactively, sort by price +python -m pipeline run configs/resnet18.json --select-offer --sort price + +# Run all configs in a directory sequentially on one instance +python -m pipeline run configs/phase2/ --region europe + +# See what offers would be selected without spending money +python -m pipeline run configs/resnet18.json --dry-run --region europe + +# Keep the instance alive if something goes wrong (for debugging) +python -m pipeline run configs/resnet18.json --keep-on-failure + +# Cap price at $0.12/h +python -m pipeline run configs/resnet18.json --price 0.12 +``` + +### `offers` — inspect available GPU offers + +``` +python -m pipeline offers [options] +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--sort` | config | Ranking mode: `price`, `performance`, or `dlp_per_dollar` | +| `--region TEXT` | any | Region filter | +| `--price FLOAT` | config | Max hourly price cap | +| `--select-offer` | off | Interactive offer picker (prints the selected offer as JSON) | +| `--list-regions` | off | Print a count of available offers per region and exit | +| `--limit-output INT` | 10 | How many offers to print | +| `--pipeline-config PATH` | none | Pipeline config override | + +**Examples:** + +```bash +# See the 20 best-value offers under $0.15/h in Europe +python -m pipeline offers --region europe --price 0.15 --limit-output 20 + +# List which regions have matching GPUs +python -m pipeline offers --list-regions + +# Interactive picker — useful before committing to a run +python -m pipeline offers --select-offer --sort price +``` + +### `up` — create an instance without training + +Spins up an instance and prints SSH connection details. Useful for manual experiments or debugging. + +``` +python -m pipeline up [options] +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--label TEXT` | auto | Optional label for the instance | +| `--select-template` | off | Interactively choose a Vast.ai Docker template | +| `--template HASH` | config | Use a specific template hash ID | +| `--pipeline-config PATH` | none | Pipeline config override | + +```bash +python -m pipeline up +python -m pipeline up --label my-debug-session +``` + +### `status` — show instance details + +``` +python -m pipeline status [--pipeline-config PATH] +``` + +### `down` — destroy an instance + +``` +python -m pipeline down [--pipeline-config PATH] +``` + +## Pipeline config overrides + +Pass `--pipeline-config my_overrides.json` to override any field from `pipeline/defaults/vast.json`. Only the fields you specify are changed; the rest keep their defaults (deep-merged). Useful for switching GPU types or raising the price cap for a single run without editing defaults. + +**Example — allow RTX 4090, higher price cap:** + +```json +{ + "search": { + "gpu_names": ["RTX 4090"], + "max_dph_total": 0.45 + } +} +``` + +**Key fields in `pipeline/defaults/vast.json`:** + +| Section | Key | Default | Meaning | +|---------|-----|---------|---------| +| `search` | `gpu_names` | `["RTX 3090", "RTX 3090 Ti"]` | Accepted GPU models | +| `search` | `max_dph_total` | `0.40` | Max price per hour | +| `search` | `sort_mode` | `"dlp_per_dollar"` | Default ranking (`price`, `performance`, or `dlp_per_dollar`) | +| `search` | `min_reliability` | `0.98` | Minimum host reliability score | +| `instance` | `disk_gb` | `48` | Disk size provisioned on the instance | +| `instance` | `image` | `"vastai/pytorch:latest"` | Docker image | +| `remote` | `workspace_dir` | `"/workspace/DRL_PROJ"` | Remote working directory | +| `remote` | `ssh_timeout_seconds` | `900` | How long to wait for SSH to become available | + +## Full workflow example + +```bash +# 1. Check what's available and how much it costs +python -m pipeline offers --region europe --list-regions +python -m pipeline offers --region europe --sort price --limit-output 20 + +# 2. Run training (auto-selects best offer, downloads data if needed) +python -m pipeline run configs/resnet18.json --region europe --download-data + +# 3. Results land in classifier/outputs/ automatically +``` diff --git a/pipeline/__init__.py b/pipeline/__init__.py new file mode 100644 index 0000000..0bd6b92 --- /dev/null +++ b/pipeline/__init__.py @@ -0,0 +1,2 @@ +"""Ephemeral Vast.ai training pipeline.""" + diff --git a/pipeline/__main__.py b/pipeline/__main__.py new file mode 100644 index 0000000..9071a8e --- /dev/null +++ b/pipeline/__main__.py @@ -0,0 +1,6 @@ +from pipeline.cli import main + + +if __name__ == "__main__": + raise SystemExit(main()) + diff --git a/pipeline/cli.py b/pipeline/cli.py new file mode 100644 index 0000000..7837d38 --- /dev/null +++ b/pipeline/cli.py @@ -0,0 +1,130 @@ +import argparse +from pathlib import Path + +from pipeline.orchestrator import EphemeralVastRunner, RunOptions + + +# Accept one or more config files, or a single directory (all *.json inside, sorted) +def _resolve_configs(raw: list[str]) -> list[Path]: + if len(raw) == 1 and Path(raw[0]).is_dir(): + configs = sorted(Path(raw[0]).glob("*.json")) + if not configs: + raise ValueError(f"No JSON configs found in directory: {raw[0]}") + return configs + return [Path(p) for p in raw] + + +# Build the argparse CLI with subcommands: offers, run, up, status, down +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Ephemeral Vast.ai training pipeline.") + subparsers = parser.add_subparsers(dest="command", required=True) + + # Shared offer-related flags used by both "offers" and "run" subcommands + def add_offer_options(p: argparse.ArgumentParser) -> None: + p.add_argument("--pipeline-config", default=None, help="Optional JSON file with pipeline overrides.") + p.add_argument("--sort", choices=["price", "performance", "dlp_per_dollar"], default=None, help="Override offer ranking mode.") + p.add_argument("--region", default=None, help="Filter offers by region (e.g. 'europe', 'Portugal').") + p.add_argument("--price", type=float, default=None, help="Override max hourly price cap in USD.") + p.add_argument("--select-offer", action="store_true", help="Interactively choose an offer.") + + # Shared template-related flags used by "run" and "up" subcommands + def add_template_options(p: argparse.ArgumentParser) -> None: + p.add_argument("--select-template", action="store_true", help="Interactively choose a Vast.ai template.") + p.add_argument("--template", default=None, help="Template hash ID to use for instance creation.") + + # ── offers ────────────────────────────────────────────────────────── + + offers_parser = subparsers.add_parser("offers", help="Inspect available Vast offers.") + add_offer_options(offers_parser) + offers_parser.add_argument("--list-regions", action="store_true", help="List matching regions and exit.") + offers_parser.add_argument("--limit-output", type=int, default=10, help="How many offers to print (default: 10).") + + # ── run ───────────────────────────────────────────────────────────── + + run_parser = subparsers.add_parser("run", help="Create instance, train, fetch outputs, destroy it.") + run_parser.add_argument("configs", nargs="+", help="One or more config paths, or a directory of JSON configs.") + add_offer_options(run_parser) + add_template_options(run_parser) + run_parser.add_argument("--download-data", action="store_true", help="Force download dataset via HuggingFace on the remote before training (auto-downloads if missing).") + run_parser.add_argument("--send-cropped", action="store_true", help="Rsync local cropped/ subdirectory to remote before training (sends only classifier or generator based on config).") + run_parser.add_argument("--keep-on-failure", action="store_true", help="Keep instance on failure.") + run_parser.add_argument("--dry-run", action="store_true", help="Search and print offers without creating instance.") + run_parser.add_argument("--no-gpu", action="store_true", help="Disable GPU training on remote (use CPU instead).") + + # ── up ────────────────────────────────────────────────────────────── + + up_parser = subparsers.add_parser("up", help="Create instance and print SSH details.") + up_parser.add_argument("--pipeline-config", default=None, help="Optional JSON file with pipeline overrides.") + add_template_options(up_parser) + up_parser.add_argument("--label", default=None, help="Optional label for the instance.") + + # ── status ────────────────────────────────────────────────────────── + + status_parser = subparsers.add_parser("status", help="Show instance details.") + status_parser.add_argument("instance_id", type=int) + status_parser.add_argument("--pipeline-config", default=None, help="Optional JSON file with pipeline overrides.") + + # ── down ──────────────────────────────────────────────────────────── + + down_parser = subparsers.add_parser("down", help="Destroy an instance.") + down_parser.add_argument("instance_id", type=int) + down_parser.add_argument("--pipeline-config", default=None, help="Optional JSON file with pipeline overrides.") + + return parser + + +# Parse CLI args and dispatch to the appropriate runner method +def main(argv=None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + override = Path(args.pipeline_config) if getattr(args, "pipeline_config", None) else None + runner = EphemeralVastRunner(override) + + # ── dispatch ──────────────────────────────────────────────────────── + + if args.command == "offers": + runner.offers( + sort_mode=args.sort, + region=args.region, + price_cap=args.price, + select_offer=args.select_offer, + list_regions=args.list_regions, + limit_output=args.limit_output, + ) + return 0 + + if args.command == "run": + opts = RunOptions( + download_data=args.download_data, + send_cropped=args.send_cropped, + keep_on_failure=args.keep_on_failure, + dry_run=args.dry_run, + select_offer=args.select_offer, + select_template=args.select_template, + template_hash=args.template, + sort_mode=args.sort, + region=args.region, + price_cap=args.price, + use_gpu=not args.no_gpu, # Default to True, disable with --no-gpu + ) + runner.run(_resolve_configs(args.configs), opts) + return 0 + + if args.command == "up": + up_opts = RunOptions( + select_template=getattr(args, "select_template", False), + template_hash=getattr(args, "template", None), + ) + runner.up(label=args.label, opts=up_opts) + return 0 + + if args.command == "status": + runner.status(args.instance_id) + return 0 + + if args.command == "down": + runner.down(args.instance_id) + return 0 + + parser.error(f"Unknown command: {args.command}") + return 2 diff --git a/pipeline/config.py b/pipeline/config.py new file mode 100644 index 0000000..c90f445 --- /dev/null +++ b/pipeline/config.py @@ -0,0 +1,182 @@ +import json +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +# Load environment variables from pipeline/.env if present +# This file is intentionally optional and gitignored +# Expected keys: +# VAST_API_KEY= +# VAST_SSH_PRIVATE_KEY=/home/your-user/.ssh/id_ed25519 +def load_dotenv(dotenv_path: Path) -> None: + if not dotenv_path.exists(): + return + for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + if "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + value = value.strip().strip("'").strip('"') + os.environ.setdefault(key, value) + +# Default offer search settings +DEFAULT_SEARCH: dict[str, Any] = { + "limit": 100, + "order_by": "dlperf", + "order_direction": "desc", + "sort_mode": "performance", + "offer_type": "ondemand", + "verified_only": True, + "rentable": True, + "rented": False, + "num_gpus": 1, + "min_reliability": 0.98, + "min_cuda_ram_mb": 0, + "min_cpu_ram_mb": 0, + "min_disk_space_gb": 0, + "min_direct_port_count": 1, + "max_dph_total": 0.20, + "gpu_names": ["RTX 3090", "RTX 3090 Ti"], + "countries_exclude": [], +} + +# Default instance settings +DEFAULT_INSTANCE: dict[str, Any] = { + "image": "vastai/pytorch:latest", + "disk_gb": 48, + "target_state": "running", + "label_prefix": "drl-proj", + "template_hash_id": None, +} + +# Default remote SSH/workspace settings +DEFAULT_REMOTE: dict[str, Any] = { + "ssh_user": "root", + "workspace_dir": "/workspace/DRL_PROJ", + "remote_data_dir": "/workspace/data/DeepFakeFace", + "remote_output_root": "classifier/outputs", + "ssh_timeout_seconds": 900, + "poll_interval_seconds": 10, +} + +# Default local transfer paths +DEFAULT_TRANSFER: dict[str, Any] = { + "local_output_dir": "classifier/outputs", + "local_data_dir": "data", +} + + +# Merge nested dictionaries recursively +# Used when a user override file should replace only specific nested keys +# (for example, overriding just search.max_dph_total without redefining search) +def _deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]: + result = dict(base) + for key, value in override.items(): + if isinstance(value, dict) and isinstance(result.get(key), dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = value + return result + + +# Runtime pipeline config container and loader +@dataclass(slots=True) +class PipelineConfig: + search: dict[str, Any] = field(default_factory=lambda: dict(DEFAULT_SEARCH)) + instance: dict[str, Any] = field(default_factory=lambda: dict(DEFAULT_INSTANCE)) + remote: dict[str, Any] = field(default_factory=lambda: dict(DEFAULT_REMOTE)) + transfer: dict[str, Any] = field(default_factory=lambda: dict(DEFAULT_TRANSFER)) + keep_on_failure: bool = False + + # Load defaults, apply overrides, and return a PipelineConfig instance + @classmethod + def load(cls, project_root: Path, override_path: Path | None = None) -> "PipelineConfig": + load_dotenv(project_root / "pipeline" / ".env") + + # Load default settings from vast.json + defaults_path = project_root / "pipeline" / "defaults" / "vast.json" + with open(defaults_path, encoding="utf-8") as handle: + raw = json.load(handle) + + # Apply user-provided override config (deep merge keeps unspecified defaults) + if override_path is not None: + with open(override_path, encoding="utf-8") as handle: + raw = _deep_merge(raw, json.load(handle)) + return cls( + search=raw.get("search", {}), + instance=raw.get("instance", {}), + remote=raw.get("remote", {}), + transfer=raw.get("transfer", {}), + keep_on_failure=raw.get("keep_on_failure", False), + ) + + # Convert the internal "search" config shape into Vast.ai query format + def build_offer_query(self, *, price_cap: float | None = None) -> dict[str, Any]: + s = self.search + query: dict[str, Any] = { + "limit": s["limit"], + "order": [[s["order_by"], s["order_direction"]]], + "type": s["offer_type"], + "verified": {"eq": s["verified_only"]}, + "rentable": {"eq": s["rentable"]}, + "rented": {"eq": s["rented"]}, + "num_gpus": {"gte": s["num_gpus"]}, + "reliability2": {"gte": s["min_reliability"]}, + "gpu_ram": {"gte": s["min_cuda_ram_mb"]}, + "cpu_ram": {"gte": s["min_cpu_ram_mb"]}, + "disk_space": {"gte": s["min_disk_space_gb"]}, + "direct_port_count": {"gte": s["min_direct_port_count"]}, + } + # CLI-provided price cap takes precedence over config default + max_price = price_cap or s.get("max_dph_total") + if max_price is not None: + query["dph_total"] = {"lte": max_price} + gpu_names = s.get("gpu_names", []) + if gpu_names: + query["gpu_name"] = {"in": gpu_names} + excluded = s.get("countries_exclude", []) + if excluded: + query["geolocation"] = {"notin": excluded} + return query + + # Resolve the local directory where training outputs are saved + def local_output_path(self, project_root: Path) -> Path: + return (project_root / self.transfer["local_output_dir"]).resolve() + + # Resolve the local directory where datasets are stored + def local_data_path(self, project_root: Path) -> Path: + return (project_root / self.transfer["local_data_dir"]).resolve() + + +# Read the Vast.ai API key from the environment, failing fast if unset +def resolve_api_key() -> str: + api_key = os.environ.get("VAST_API_KEY") + if not api_key: + raise RuntimeError("VAST_API_KEY is not set.") + return api_key + + +# Resolve the local SSH private key path, defaulting to ~/.ssh/id_ed25519 +def resolve_ssh_private_key() -> Path: + raw = os.environ.get("VAST_SSH_PRIVATE_KEY", "~/.ssh/id_ed25519") + path = Path(raw).expanduser() + if not path.exists(): + raise RuntimeError( + f"SSH private key not found at {path}. Set VAST_SSH_PRIVATE_KEY to the correct path." + ) + return path + + +# Read the corresponding .pub file for a given private key +def read_public_key(private_key_path: Path) -> str: + public_key_path = Path(f"{private_key_path}.pub") + if not public_key_path.exists(): + raise RuntimeError( + f"SSH public key not found at {public_key_path}. Generate it or set VAST_SSH_PRIVATE_KEY." + ) + return public_key_path.read_text(encoding="utf-8").strip() diff --git a/pipeline/defaults/vast.json b/pipeline/defaults/vast.json new file mode 100644 index 0000000..54d369b --- /dev/null +++ b/pipeline/defaults/vast.json @@ -0,0 +1,41 @@ +{ + "search": { + "limit": 100, + "order_by": "dlperf", + "order_direction": "desc", + "sort_mode": "dlp_per_dollar", + "offer_type": "ondemand", + "verified_only": true, + "rentable": true, + "rented": false, + "num_gpus": 1, + "min_reliability": 0.98, + "min_cuda_ram_mb": 0, + "min_cpu_ram_mb": 0, + "min_disk_space_gb": 0, + "min_direct_port_count": 1, + "max_dph_total": 0.40, + "gpu_names": ["RTX 3090", "RTX 3090 Ti"], + "countries_exclude": [] + }, + "instance": { + "image": "vastai/pytorch:latest", + "disk_gb": 48, + "target_state": "running", + "label_prefix": "drl-proj", + "template_hash_id": null + }, + "remote": { + "ssh_user": "root", + "workspace_dir": "/workspace/DRL_PROJ", + "remote_data_dir": "/workspace/data/DeepFakeFace", + "remote_output_root": "classifier/outputs", + "ssh_timeout_seconds": 900, + "poll_interval_seconds": 10 + }, + "transfer": { + "local_output_dir": "classifier/outputs", + "local_data_dir": "data" + }, + "keep_on_failure": false +} diff --git a/pipeline/orchestrator.py b/pipeline/orchestrator.py new file mode 100644 index 0000000..aec8d65 --- /dev/null +++ b/pipeline/orchestrator.py @@ -0,0 +1,817 @@ +import json +import re +import threading +import time +from dataclasses import asdict, dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from pipeline.config import ( + PipelineConfig, + read_public_key, + resolve_api_key, + resolve_ssh_private_key, +) +from pipeline.remote import ( + RemoteCommandError, + SshTarget, + fetch_directory, + is_process_alive, + read_remote_file, + rsync_file, + rsync_project, + run_detached, + shell_join, + ssh, + ssh_output, + tail_log, + wait_for_ssh, +) +from pipeline.vast_api import VastApiClient, VastApiError, VastInstance + + +# ── data classes ───────────────────────────────────────────────────── + +# Snapshot of a pipeline run written to disk for traceability +@dataclass(slots=True) +class RunManifest: + created_at: str + config_paths: list[str] + instance_id: int | None + offer_id: int | None + ssh_host: str | None + ssh_port: int | None + status: str + remote_workspace: str | None + + +# CLI flags controlling how the pipeline run behaves +@dataclass +class RunOptions: + download_data: bool = False + send_cropped: bool = False + keep_on_failure: bool = False + dry_run: bool = False + select_offer: bool = False + select_template: bool = False + template_hash: str | None = None + sort_mode: str | None = None + region: str | None = None + price_cap: float | None = None + list_regions: bool = False + use_gpu: bool = True + + +# ── constants ───────────────────────────────────────────────────────── + +# ISO-3166-1-alpha-2 country codes used by the --region europe filter +EUROPE_REGION_CODES = { + "AL", "AD", "AM", "AT", "AZ", "BA", "BE", "BG", "BY", "CH", "CY", "CZ", + "DE", "DK", "EE", "ES", "FI", "FR", "GB", "GE", "GR", "HR", "HU", "IE", + "IS", "IT", "LI", "LT", "LU", "LV", "MC", "MD", "ME", "MK", "MT", "NL", + "NO", "PL", "PT", "RO", "RS", "SE", "SI", "SK", "SM", "TR", "UA", "VA", +} + +# How often (in epochs) to rsync generator outputs back during training +_GENERATOR_SYNC_INTERVAL = 50 + + +# Raised when the user aborts interactive offer selection +class OfferSelectionAborted(RuntimeError): + pass + + +# Raised when the user aborts interactive template selection +class TemplateSelectionAborted(RuntimeError): + pass + + +# ── runner ──────────────────────────────────────────────────────────── + +# Orchestrates the full lifecycle: search offers -> rent GPU -> train -> fetch results -> destroy +class EphemeralVastRunner: + + # Pre-defined Vast.ai image templates the user can pick interactively + TEMPLATE_CATALOG: list[dict[str, str]] = [ + { + "hash_id": "661d064bbda1f2a133816b6d55da07c3", + "name": "PyTorch (cuDNN Devel)", + "image": "pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel", + }, + { + "hash_id": "b9e5a8f3d4c1e7f6a2b0d8c9e5f1a3b7", + "name": "PyTorch (Latest)", + "image": "pytorch/pytorch:latest", + }, + { + "hash_id": "none", + "name": "Custom image (no template)", + "image": "", + }, + ] + + # Resolve the project root (two levels up from this file) + def __init__(self, override_path: Path | None) -> None: + self.project_root = Path(__file__).resolve().parent.parent + self.config = PipelineConfig.load(self.project_root, override_path) + self.api = VastApiClient(resolve_api_key()) + self.private_key = resolve_ssh_private_key() + self.public_key = read_public_key(self.private_key) + self.exclude_file = self.project_root / "pipeline" / "rsync-excludes.txt" + + # ── formatting helpers ──────────────────────────────────────────── + + # Format seconds into a compact human-readable string (e.g. "1d2h30m") + @staticmethod + def _fmt_duration(seconds: float) -> str: + s = max(int(seconds), 0) + days, s = divmod(s, 86400) + hours, s = divmod(s, 3600) + minutes, _ = divmod(s, 60) + parts: list[str] = [] + if days: + parts.append(f"{days}d") + if hours or days: + parts.append(f"{hours}h") + parts.append(f"{minutes}m") + return "".join(parts) + + # Estimate cost from wall-clock time and the offer's $/h rate + @staticmethod + def _fmt_cost(elapsed_seconds: float, cost_per_hour: float) -> str: + return f"${(elapsed_seconds / 3600) * cost_per_hour:.2f}" + + # ── offer search & selection ────────────────────────────────────── + + # Build a sort tuple that ranks offers by the chosen mode (performance / price / dlp_per_dollar) + @staticmethod + def _offer_sort_key(offer: dict, sort_mode: str) -> tuple: + price = float(offer.get("dph_total", float("inf"))) + dlperf = float(offer.get("dlperf", float("-inf"))) + reliability = float(offer.get("reliability2", 0.0)) + if sort_mode == "price": + return (price, -dlperf, -reliability) + if sort_mode == "dlp_per_dollar": + per_dollar = dlperf / price if price > 0 else 0.0 + return (-per_dollar, -reliability, price) + return (-dlperf, price, -reliability) + + # Match a user-friendly region string (e.g. "europe") against the offer's geolocation code + @staticmethod + def _region_matches(offered: str, requested: str) -> bool: + req = requested.strip().lower() + if not req: + return True + if req == "europe": + code = offered.rsplit(", ", 1)[-1].strip() if ", " in offered else "" + return code in EUROPE_REGION_CODES + return req in offered.lower() + + # Query Vast.ai for matching offers, optionally filter by region, then sort + def _search_offers( + self, + sort_mode: str | None = None, + region: str | None = None, + price_cap: float | None = None, + ) -> list[dict]: + query = self.config.build_offer_query(price_cap=price_cap) + offers = self.api.search_offers(query) + if not offers: + raise VastApiError("No Vast offers matched the configured filters.") + if region: + offers = [o for o in offers if self._region_matches(o.get("geolocation") or "", region)] + if not offers: + raise VastApiError(f"No offers matched region filter: {region!r}") + mode = sort_mode or self.config.search.get("sort_mode", "performance") + return sorted(offers, key=lambda o: self._offer_sort_key(o, mode)) + + # Print a summary of available regions and their offer counts + @staticmethod + def _print_regions(offers: list[dict]) -> None: + counts: dict[str, int] = {} + for o in offers: + region = o.get("geolocation") or "Unknown" + counts[region] = counts.get(region, 0) + 1 + for region, count in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])): + print(f"{region}: {count}") + + # Render one page of offers with GPU specs, price, and reliability + def _print_offer_page(self, offers: list[dict], *, page: int, page_size: int) -> None: + start = page * page_size + end = min(start + page_size, len(offers)) + print(f"Offers {start + 1}–{end} of {len(offers)}") + for idx in range(start, end): + o = offers[idx] + vram_gb = o.get("gpu_ram", 0) / 1024 + duration = self._fmt_duration(float(o.get("duration") or 0)) + print( + f" [{idx + 1}] {o.get('gpu_name')} " + f"gpus={o.get('num_gpus')} " + f"vram={vram_gb:.0f}GB " + f"dlperf={o.get('dlperf', 0):.1f} " + f"$/h={o.get('dph_total', 0):.3f} " + f"reliability={o.get('reliability2', 0):.3f} " + f"avail={duration} " + f"region={o.get('geolocation')} " + f"id={o.get('id')}" + ) + + # Paginated interactive prompt — user picks an offer or aborts + def _choose_offer(self, offers: list[dict]) -> dict: + page, page_size = 0, 10 + last_page = max((len(offers) - 1) // page_size, 0) + while True: + self._print_offer_page(offers, page=page, page_size=page_size) + raw = input("Select offer [number / n / p / q]: ").strip().lower() + if raw == "n": + page = min(page + 1, last_page) + elif raw == "p": + page = max(page - 1, 0) + elif raw == "q": + raise OfferSelectionAborted + elif raw.isdigit(): + idx = int(raw) - 1 + if 0 <= idx < len(offers): + return offers[idx] + print(" Out of range.") + else: + print(" Invalid input.") + + # ── template selection ──────────────────────────────────────────── + + # Decide which Vast template hash to use: CLI flag, interactive pick, or config default + def _resolve_template(self, opts: RunOptions) -> str | None: + if opts.template_hash: + return opts.template_hash + if opts.select_template: + try: + return self._choose_template() + except (TemplateSelectionAborted, KeyboardInterrupt): + print("Template selection aborted, using default image.") + return None + return self.config.instance.get("template_hash_id") + + # Interactive prompt — user picks a Docker image template + def _choose_template(self) -> str | None: + print("Available templates:") + for idx, tpl in enumerate(self.TEMPLATE_CATALOG, 1): + print(f" [{idx}] {tpl['name']} ({tpl['image'] or 'config image'})") + while True: + raw = input("Select template [number / q]: ").strip().lower() + if raw == "q": + raise TemplateSelectionAborted + if raw.isdigit(): + idx = int(raw) - 1 + if 0 <= idx < len(self.TEMPLATE_CATALOG): + tpl = self.TEMPLATE_CATALOG[idx] + if tpl["hash_id"] == "none": + return None + if tpl["image"]: + self.config.instance["image"] = tpl["image"] + print(f" Selected: {tpl['name']}") + return tpl["hash_id"] + print(" Invalid input.") + + # ── instance lifecycle ──────────────────────────────────────────── + + # Build a unique instance label from a prefix, stem, and UTC timestamp + def _label_for(self, stem: str) -> str: + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + return f"{self.config.instance['label_prefix']}-{stem}-{ts}" + + # Assemble the JSON payload sent to Vast.ai when renting an instance + def _build_instance_payload(self, label: str, *, template_hash_id: str | None = None) -> dict[str, Any]: + inst = self.config.instance + payload: dict[str, Any] = { + "image": inst["image"], + "disk": inst["disk_gb"], + "runtype": "ssh_direc ssh_proxy", + "target_state": inst["target_state"], + "label": label, + } + if template_hash_id: + payload["template_hash_id"] = template_hash_id + return payload + + # Poll the API until the instance is running and has SSH credentials + def _wait_for_instance(self, instance_id: int) -> VastInstance: + deadline = time.time() + self.config.remote["ssh_timeout_seconds"] + poll = self.config.remote["poll_interval_seconds"] + while time.time() < deadline: + instance = self.api.show_instance(instance_id) + if instance.actual_status == "running" and instance.ssh_host and instance.ssh_port: + return instance + print(f" instance status: {instance.actual_status or 'pending'}...") + time.sleep(poll) + raise VastApiError(f"Timed out waiting for instance {instance_id} to be SSH-ready.") + + # Build an SshTarget from the running instance's connection details + def _build_target(self, instance: VastInstance) -> SshTarget: + return SshTarget( + user=self.config.remote["ssh_user"], + host=instance.ssh_host or instance.public_ipaddr or "", + port=int(instance.ssh_port or 22), + private_key=self.private_key, + ) + + # ── manifest ────────────────────────────────────────────────────── + + # Persist the run manifest as a timestamped JSON file under the output directory + def _write_manifest(self, manifest: RunManifest, local_output_root: Path) -> Path: + d = local_output_root / "pipeline" + d.mkdir(parents=True, exist_ok=True) + ts = manifest.created_at.replace(":", "").replace("-", "") + path = d / f"{ts}.json" + with open(path, "w", encoding="utf-8") as fh: + json.dump(asdict(manifest), fh, indent=2) + return path + + # ── remote workspace setup ──────────────────────────────────────── + + # Upload project files, bootstrap the venv, and ensure training data is present + def _prepare_remote_workspace( + self, target: SshTarget, *, download_data: bool, send_cropped: bool, + first_config: Path | None = None, + ) -> None: + ws = self.config.remote["workspace_dir"] + poll = self.config.remote["poll_interval_seconds"] + + print(" Syncing project files...") + ssh(target, shell_join(["mkdir", "-p", ws])) + rsync_project(self.project_root, target, ws, self.exclude_file) + + print(" Bootstrapping remote environment...") + ssh(target, shell_join(["bash", "-lc", f"cd {ws} && bash pipeline/scripts/bootstrap_env.sh"])) + + if send_cropped and first_config is not None: + # Determine which cropped subdirectory to send based on the first config + first_mod, _ = self._module_for_config(first_config) + crop_subdir = "generator" if first_mod.startswith("generator/") else "classifier" + local_zip = self.project_root / f"cropped_{crop_subdir}.zip" + if not local_zip.exists(): + print(f" Warning: --send-cropped set but cropped_{crop_subdir}.zip not found.") + print(f" Create it with: zip -r cropped_{crop_subdir}.zip cropped/{crop_subdir}/") + else: + size_mb = local_zip.stat().st_size / (1024 * 1024) + print(f" Sending cropped_{crop_subdir}.zip ({size_mb:.0f} MB)...") + rsync_file(local_zip, target, ws) + print(" Unzipping on remote...") + ssh(target, shell_join(["bash", "-lc", f"cd {ws} && unzip -q -o cropped_{crop_subdir}.zip"])) + print(f" Pre-cropped {crop_subdir} images ready.") + + data_ready = self._remote_data_exists(target, f"{ws}/data") + if download_data or not data_ready: + # Download the dataset in a detached process so the SSH timeout doesn't kill it + print(" Downloading dataset from HuggingFace...") + log = f"{ws}/.pipeline_logs/fetch_ds.log" + ssh(target, f"mkdir -p {ws}/.pipeline_logs") + pid = run_detached( + target, + f"cd {ws} && source .venv/bin/activate && python3 classifier/tools/fetch_ds.py", + log, + ) + reconnects = 0 + # Poll until the fetch process exits, with reconnect logic for SSH drops + while True: + try: + if not is_process_alive(target, pid): + break + reconnects = 0 + except RemoteCommandError as exc: + if "Command failed (255)" not in str(exc) or reconnects >= 10: + raise + reconnects += 1 + time.sleep(min(5 * reconnects, 30)) + continue + time.sleep(poll) + print(" Dataset ready.") + + # ── config routing ──────────────────────────────────────────────── + + # Determine which training script and output dir to use based on config location + def _module_for_config(self, config_path: Path) -> tuple[str, str]: + try: + rel = config_path.resolve().relative_to(self.project_root) + except ValueError: + rel = config_path + if rel.parts[0] == "generator": + return "generator/run.py", "generator/outputs" + return "classifier/run.py", "classifier/outputs" + + # Merge two dicts recursively (override wins on leaf keys) + @staticmethod + def _deep_merge_dicts(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]: + result = base.copy() + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = EphemeralVastRunner._deep_merge_dicts(result[key], value) + else: + result[key] = value + return result + + # Recursively resolve "extends" references in JSON configs with cycle detection + def _load_config_with_extends(self, config_path: Path, seen: set[Path] | None = None) -> dict[str, Any]: + if seen is None: + seen = set() + resolved = config_path.resolve() + if resolved in seen: + raise ValueError(f"Circular config inheritance detected at: {config_path}") + seen.add(resolved) + with open(config_path, encoding="utf-8") as fh: + cfg = json.load(fh) + base_ref = cfg.pop("extends", None) + if not base_ref: + seen.remove(resolved) + return cfg + base_path = (config_path.parent / base_ref).resolve() + base_cfg = self._load_config_with_extends(base_path, seen=seen) + seen.remove(resolved) + return self._deep_merge_dicts(base_cfg, cfg) + + # Return a stable signature used to detect duplicate training configs + def _normalized_config_signature(self, config_path: Path) -> str: + run_script, _ = self._module_for_config(config_path) + # Generator configs do not currently use shared/extends inheritance. + if run_script.startswith("generator/"): + with open(config_path, encoding="utf-8") as fh: + cfg = json.load(fh) + else: + cfg = self._load_config_with_extends(config_path) + shared_path = config_path.parent.parent / "shared.json" + if shared_path.exists(): + with open(shared_path, encoding="utf-8") as fh: + shared_cfg = json.load(fh) + cfg = self._deep_merge_dicts(shared_cfg, cfg) + + # run_name should not influence whether two configs are equivalent to train. + cfg.pop("run_name", None) + return json.dumps(cfg, sort_keys=True, separators=(",", ":")) + + # Detect configs that are pure extends (only run_name + extends, no new training settings) + # These are pointers to an already-trained experiment and should be skipped + def _is_pure_extend(self, config_path: Path) -> str | None: + with open(config_path, encoding="utf-8") as fh: + raw = json.load(fh) + if "extends" not in raw: + return None + non_meta = {k for k in raw if k not in ("run_name", "extends")} + if non_meta: + return None + base_path = (config_path.parent / raw["extends"]).resolve() + return str(base_path.relative_to(self.project_root)) + + def _dedupe_training_configs(self, config_paths: list[Path]) -> list[Path]: + seen: dict[str, Path] = {} + deduped: list[Path] = [] + for cp in config_paths: + pure_extends = self._is_pure_extend(cp) + if pure_extends is not None: + print(f"Skipping {cp.name} (pure extend of {pure_extends})") + continue + sig = self._normalized_config_signature(cp) + if sig in seen: + first = seen[sig] + print(f"Skipping duplicate config {cp.name} (same training settings as {first.name})") + continue + seen[sig] = cp + deduped.append(cp) + return deduped + + # ── remote directory checks ─────────────────────────────────────── + + # Check whether a directory exists on the remote host + def _remote_dir_exists(self, target: SshTarget, path: str) -> bool: + out = ssh_output(target, f"if [ -d {path} ]; then echo yes; else echo no; fi").strip() + return out == "yes" + + # Check that the dataset is populated (not just an empty directory) + def _remote_data_exists(self, target: SshTarget, data_dir: str) -> bool: + out = ssh_output(target, f"if [ -d {data_dir}/wiki ]; then echo yes; else echo no; fi").strip() + return out == "yes" + + # ── training ───────────────────────────────────────────────────── + + # Launch the training script inside nohup, stream logs, and handle reconnects + def _run_training(self, target: SshTarget, config_path: Path, opts: RunOptions) -> None: + ws = self.config.remote["workspace_dir"] + config_rel = config_path.resolve().relative_to(self.project_root) + run_script, output_root = self._module_for_config(config_path) + is_generator = run_script.startswith("generator/") + + run_cmd = shell_join(["python3", "-u", run_script, str(config_rel), "--output-root", output_root]) + if opts.use_gpu: + run_cmd += " --use-gpu" + + log_dir = f"{ws}/.pipeline_logs" + log_path = f"{log_dir}/{config_path.stem}.log" + exit_path = f"{log_dir}/{config_path.stem}.exit" + + ssh(target, f"mkdir -p {log_dir}") + + # Run training under nohup; write the exit code to a marker file when done + pid = run_detached( + target, + f"cd {ws} && source .venv/bin/activate && {run_cmd}; echo $? > {exit_path}", + log_path, + ) + print(f" Training started (PID {pid})") + + poll = self.config.remote["poll_interval_seconds"] + max_reconnects = 10 + reconnects = 0 + next_sync = _GENERATOR_SYNC_INTERVAL if is_generator else None + local_output_root = self.project_root / output_root + stop_streaming = threading.Event() + + # Background thread: tail the remote log and print tqdm-aware output + def _stream_worker() -> None: + tail_proc = None + last_was_progress = False + try: + time.sleep(1) # wait for the log file to be created + tail_proc = tail_log(target, log_path) + for raw in tail_proc.stdout: + if stop_streaming.is_set(): + break + # tqdm uses \r for in-place updates; take only the last segment + line = raw.rstrip("\r\n").rsplit("\r", 1)[-1].rstrip() + if not line: + continue + is_progress = "it/s]" in line or "%|" in line + if is_progress: + print(f"\r {line}", end="", flush=True) + last_was_progress = True + else: + if last_was_progress: + print(flush=True) + last_was_progress = False + print(f" {line}", flush=True) + except Exception: + pass + finally: + if last_was_progress: + print(flush=True) + if tail_proc is not None: + tail_proc.terminate() + tail_proc.wait(timeout=5) + + stream_thread = threading.Thread(target=_stream_worker, daemon=True) + stream_thread.start() + + try: + # Main polling loop: check if the process is alive, periodically sync generator outputs + while True: + try: + alive = is_process_alive(target, pid) + except RemoteCommandError as exc: + if "Command failed (255)" not in str(exc) or reconnects >= max_reconnects: + raise + reconnects += 1 + print(f"\n SSH dropped (reconnect {reconnects}/{max_reconnects}), " + "training continues on remote...") + time.sleep(min(5 * reconnects, 30)) + continue + + if is_generator and next_sync is not None: + epoch = self._latest_epoch_in_log(target, log_path) + if epoch >= next_sync: + print(f" Syncing generator outputs at epoch {next_sync}...") + self._fetch_outputs(target, output_root, local_output_root) + next_sync += _GENERATOR_SYNC_INTERVAL + + if not alive: + break + + reconnects = 0 + time.sleep(poll) + finally: + stop_streaming.set() + stream_thread.join(timeout=10) + + # Read the exit code marker that the nohup wrapper wrote + raw_exit = read_remote_file(target, exit_path) + exit_code = int(raw_exit) if raw_exit is not None else -1 + + if exit_code != 0: + if self._remote_dir_exists(target, f"{ws}/{output_root}"): + self._fetch_outputs(target, output_root, local_output_root) + log_tail = read_remote_file(target, log_path) + snippet = "\n".join((log_tail or "").splitlines()[-30:]) + raise RemoteCommandError(f"Training failed (exit {exit_code}).\n{snippet}") + + # Parse the latest epoch number from the training log (used for generator sync scheduling) + @staticmethod + def _latest_epoch_in_log(target: SshTarget, log_path: str) -> int: + try: + out = ssh_output(target, f"tail -n 200 {log_path} 2>/dev/null || true") + epochs = [int(m) for m in re.findall(r"\[(\d+)/\d+\]", out)] + return max(epochs, default=0) + except RemoteCommandError: + return 0 + + # Download remote outputs with up to 3 retry attempts (exponential back-off) + def _fetch_outputs(self, target: SshTarget, remote_output_root: str, local_output_root: Path) -> None: + remote_path = f"{self.config.remote['workspace_dir']}/{remote_output_root}" + delay = 5 + for attempt in range(1, 4): + try: + fetch_directory(target, remote_path, local_output_root) + return + except RemoteCommandError as exc: + if attempt < 3: + print(f" Download attempt {attempt} failed: {exc}") + print(f" Retrying in {delay}s...") + time.sleep(delay) + delay *= 2 + else: + raise + + # ── public commands ─────────────────────────────────────────────── + + # Full pipeline: validate configs -> find offer -> rent instance -> train -> fetch -> destroy + def run(self, config_paths: list[Path], opts: RunOptions) -> None: + resolved = [] + for cp in config_paths: + cp = (self.project_root / cp).resolve() if not cp.is_absolute() else cp + if not cp.exists(): + raise FileNotFoundError(f"Config not found: {cp}") + resolved.append(cp) + + resolved = self._dedupe_training_configs(resolved) + # Abort early if all configs were duplicates + if not resolved: + raise ValueError("No unique configs to run after deduplication.") + + n = len(resolved) + _, first_output_root = self._module_for_config(resolved[0]) + local_output_root = self.project_root / first_output_root + + self.api.ensure_ssh_key(self.public_key) + offers = self._search_offers(opts.sort_mode, opts.region, opts.price_cap) + + if opts.list_regions: + self._print_regions(offers) + return + + try: + offer = self._choose_offer(offers) if opts.select_offer else offers[0] + except (OfferSelectionAborted, KeyboardInterrupt): + print("Aborted.") + return + + if opts.dry_run: + print(json.dumps(offer if opts.select_offer else offers[:10], indent=2)) + return + + label_stem = resolved[0].stem if n == 1 else resolved[0].parent.name + template_hash_id = self._resolve_template(opts) + offer_id = int(offer["id"]) + cost_per_hour = float(offer.get("dph_total", 0)) + + manifest = RunManifest( + created_at=datetime.now(UTC).isoformat(), + config_paths=[str(cp.relative_to(self.project_root)) for cp in resolved], + instance_id=None, + offer_id=offer_id, + ssh_host=None, + ssh_port=None, + status="creating", + remote_workspace=self.config.remote["workspace_dir"], + ) + manifest_path = self._write_manifest(manifest, local_output_root) + + instance_id: int | None = None + should_destroy = True + start_time = time.time() + + try: + # Build the instance payload and rent the GPU + payload = self._build_instance_payload(self._label_for(label_stem), + template_hash_id=template_hash_id) + print(f"Creating instance from offer {offer_id} (${cost_per_hour:.3f}/h)...") + instance_id = self.api.create_instance(offer_id, payload) + self.api.attach_ssh_key(instance_id, self.public_key) + + # Record the instance ID in the manifest immediately for debugging + manifest.instance_id = instance_id + + # Wait for the instance to be SSH-ready, then update the manifest + print(f"Waiting for instance {instance_id}...") + instance = self._wait_for_instance(instance_id) + manifest.ssh_host = instance.ssh_host + manifest.ssh_port = instance.ssh_port + manifest.status = instance.actual_status + self._write_manifest(manifest, local_output_root) + print(f"Instance ready: {instance.ssh_host}:{instance.ssh_port} " + f"({instance.gpu_name}, ${instance.dph_total}/h)") + + target = self._build_target(instance) + + # Wait for SSH to become available before running any remote commands + print("Waiting for SSH...") + wait_for_ssh( + target, + timeout_seconds=self.config.remote["ssh_timeout_seconds"], + poll_interval_seconds=self.config.remote["poll_interval_seconds"], + ) + self._prepare_remote_workspace( + target, download_data=opts.download_data, send_cropped=opts.send_cropped, + first_config=resolved[0], + ) + + # Run each training config sequentially on the same instance + for i, config_path in enumerate(resolved, 1): + print(f"\n[{i}/{n}] Training: {config_path.name}") + self._run_training(target, config_path, opts) + print(f"[{i}/{n}] Fetching outputs...") + _, output_root = self._module_for_config(config_path) + self._fetch_outputs(target, output_root, self.project_root / output_root) + + elapsed = time.time() - start_time + manifest.status = "completed" + self._write_manifest(manifest, local_output_root) + print(f"\nAll {n} run(s) completed in {self._fmt_duration(elapsed)} " + f"(~{self._fmt_cost(elapsed, cost_per_hour)}). Manifest: {manifest_path}") + + except KeyboardInterrupt: + elapsed = time.time() - start_time + manifest.status = "cancelled" + self._write_manifest(manifest, local_output_root) + print(f"\nCancelled after {self._fmt_duration(elapsed)} " + f"(~{self._fmt_cost(elapsed, cost_per_hour)}).") + + except Exception: + elapsed = time.time() - start_time + manifest.status = "failed" + self._write_manifest(manifest, local_output_root) + print(f" Failed after {self._fmt_duration(elapsed)} " + f"(~{self._fmt_cost(elapsed, cost_per_hour)}).") + if opts.keep_on_failure or self.config.keep_on_failure: + should_destroy = False + raise + + finally: + if instance_id is not None and should_destroy: + elapsed = time.time() - start_time + print(f"Destroying instance {instance_id} " + f"(total: {self._fmt_duration(elapsed)}, " + f"~{self._fmt_cost(elapsed, cost_per_hour)})") + self.api.destroy_instance(instance_id) + + # CLI subcommand: search and display available GPU offers without renting + def offers( + self, + *, + sort_mode: str | None, + region: str | None, + price_cap: float | None, + select_offer: bool, + list_regions: bool, + limit_output: int, + ) -> None: + offers = self._search_offers(sort_mode, region, price_cap) + if list_regions: + self._print_regions(offers) + return + if select_offer: + try: + offer = self._choose_offer(offers) + except (OfferSelectionAborted, KeyboardInterrupt): + print("Aborted.") + return + print(json.dumps(offer, indent=2)) + else: + print(json.dumps(offers[:limit_output], indent=2)) + + # CLI subcommand: rent an instance and print its connection details (no training) + def up(self, *, label: str | None, opts: RunOptions | None = None) -> None: + if opts is None: + opts = RunOptions() + self.api.ensure_ssh_key(self.public_key) + template_hash_id = self._resolve_template(opts) + offer = self._search_offers()[0] + offer_id = int(offer["id"]) + payload = self._build_instance_payload( + label or self._label_for("manual"), template_hash_id=template_hash_id + ) + instance_id = self.api.create_instance(offer_id, payload) + self.api.attach_ssh_key(instance_id, self.public_key) + instance = self._wait_for_instance(instance_id) + print(json.dumps({ + "offer_id": offer_id, + "instance_id": instance_id, + "ssh_host": instance.ssh_host, + "ssh_port": instance.ssh_port, + "gpu_name": instance.gpu_name, + "dph_total": instance.dph_total, + }, indent=2)) + + # CLI subcommand: print the raw instance status JSON + def status(self, instance_id: int) -> None: + print(json.dumps(self.api.show_instance(instance_id).raw, indent=2)) + + # CLI subcommand: destroy a running instance by ID + def down(self, instance_id: int) -> None: + self.api.destroy_instance(instance_id) + print(f"Destroyed instance {instance_id}") diff --git a/pipeline/remote.py b/pipeline/remote.py new file mode 100644 index 0000000..c8b3323 --- /dev/null +++ b/pipeline/remote.py @@ -0,0 +1,180 @@ +import shlex +import subprocess +import time +from dataclasses import dataclass +from pathlib import Path + + +# Error raised when a remote SSH or rsync command fails +class RemoteCommandError(RuntimeError): + pass + + +# Connection details for an SSH target (Vast instance) +@dataclass(slots=True) +class SshTarget: + user: str + host: str + port: int + private_key: Path + + # user@host string used by SSH and rsync + @property + def destination(self) -> str: + return f"{self.user}@{self.host}" + + # Base SSH flags: identity, port, keep-alive, and auto-accept host keys + def ssh_base_command(self) -> list[str]: + return [ + "ssh", + "-i", str(self.private_key), + "-p", str(self.port), + "-o", "BatchMode=yes", + "-o", "StrictHostKeyChecking=accept-new", + "-o", "ServerAliveInterval=30", + "-o", "ServerAliveCountMax=10", + "-o", "TCPKeepAlive=yes", + self.destination, + ] + + # SSH flags formatted as a string for rsync's -e option + def rsync_ssh_opts(self) -> str: + return " ".join(self.ssh_base_command()[:-1]) + + +# ── low-level subprocess helpers ───────────────────────────────────── + +# Run a local subprocess, raising RemoteCommandError on non-zero exit +def _run(command: list[str]) -> None: + result = subprocess.run(command, check=False, stderr=subprocess.PIPE, text=True) + if result.returncode != 0: + snippet = result.stderr.strip()[-500:] if result.stderr else "" + msg = f"Command failed ({result.returncode}): {' '.join(command)}" + raise RemoteCommandError(f"{msg}\n{snippet}" if snippet else msg) + + +# ── SSH ─────────────────────────────────────────────────────────────── + +# Repeatedly attempt an SSH connection until the target accepts it or the deadline passes +def wait_for_ssh(target: SshTarget, *, timeout_seconds: int, poll_interval_seconds: int) -> None: + deadline = time.time() + timeout_seconds + while time.time() < deadline: + result = subprocess.run( + target.ssh_base_command() + ["true"], + check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, + ) + if result.returncode == 0: + return + remaining = int(deadline - time.time()) + print(f" SSH not ready yet, retrying... ({remaining}s remaining)") + time.sleep(poll_interval_seconds) + raise RemoteCommandError( + f"Timed out waiting for SSH on {target.host}:{target.port} after {timeout_seconds}s." + ) + + +# Execute a single command on the remote host via SSH +def ssh(target: SshTarget, command: str) -> None: + _run(target.ssh_base_command() + [command]) + + +# Execute a remote command and return its stdout +def ssh_output(target: SshTarget, command: str) -> str: + result = subprocess.run( + target.ssh_base_command() + [command], + check=False, capture_output=True, text=True, + ) + if result.returncode != 0: + raise RemoteCommandError( + f"Command failed ({result.returncode}): {command}\n{result.stderr.strip()}" + ) + return result.stdout + + +# ── detached process management ─────────────────────────────────────── + +# Launch a remote command under nohup so it survives SSH drops; return PID +def run_detached(target: SshTarget, command: str, log_path: str) -> int: + inner = f"nohup bash -c {shlex.quote(command)} > {shlex.quote(log_path)} 2>&1 & echo $!" + output = ssh_output(target, inner) + return int(output.strip().splitlines()[-1]) + + +# Return True if the remote process is running +# Raise RemoteCommandError on SSH failure (exit 255) so callers can apply +# reconnect logic and only return False when kill -0 confirms the PID is gone +def is_process_alive(target: SshTarget, pid: int) -> bool: + try: + ssh(target, f"kill -0 {pid}") + return True + except RemoteCommandError as exc: + if "Command failed (255)" in str(exc): + raise # SSH failure, let caller handle + return False # process not found + + +# Read a remote file's contents; returns None if the file does not exist +def read_remote_file(target: SshTarget, path: str) -> str | None: + return ssh_output(target, f"cat {shlex.quote(path)} 2>/dev/null || true").strip() or None + + +# Open a persistent tail -f on a remote file for real-time streaming +def tail_log(target: SshTarget, log_path: str) -> subprocess.Popen: + cmd = target.ssh_base_command() + [f"tail -f -n +1 {shlex.quote(log_path)}"] + return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) + + +# ── rsync ───────────────────────────────────────────────────────────── + +# Upload the entire project directory, respecting the exclude list +def rsync_project(local_root: Path, target: SshTarget, remote_root: str, exclude_file: Path) -> None: + _run([ + "rsync", "-az", "--info=progress2", "--delete", + "--exclude-from", str(exclude_file), + "-e", target.rsync_ssh_opts(), + f"{local_root}/", + f"{target.destination}:{remote_root}/", + ]) + + +# Upload a single file to a remote directory +def rsync_file(local_file: Path, target: SshTarget, remote_dir: str) -> None: + _run([ + "rsync", "-az", "--partial", "--info=progress2", + "-e", target.rsync_ssh_opts(), + str(local_file), + f"{target.destination}:{remote_dir}/", + ]) + + +# Download a remote directory, verify file count, and resume partial transfers +def fetch_directory(target: SshTarget, remote_dir: str, local_dir: Path) -> None: + local_dir.mkdir(parents=True, exist_ok=True) + + # Count remote files first so we can verify completeness after transfer + remote_count = int(ssh_output( + target, f"find {remote_dir.rstrip('/')} -type f 2>/dev/null | wc -l" + ).strip()) + + _run([ + "rsync", "-az", "--partial", "--append-verify", "--info=progress2", + "-e", target.rsync_ssh_opts(), + f"{target.destination}:{remote_dir.rstrip('/')}/", + f"{local_dir}/", + ]) + + local_count = sum(1 for p in local_dir.rglob("*") if p.is_file()) + # Raise if fewer files arrived than expected (corrupted / interrupted transfer) + if local_count < remote_count: + raise RemoteCommandError( + f"Download incomplete: got {local_count} files, expected {remote_count}. " + f"Remote: {remote_dir} Local: {local_dir}" + ) + print(f" Verified: {local_count} files downloaded successfully") + + +# ── misc ────────────────────────────────────────────────────────────── + +# Quote and join shell arguments for safe remote execution +def shell_join(parts: list[str]) -> str: + return " ".join(shlex.quote(p) for p in parts) diff --git a/pipeline/rsync-excludes.txt b/pipeline/rsync-excludes.txt new file mode 100644 index 0000000..937843f --- /dev/null +++ b/pipeline/rsync-excludes.txt @@ -0,0 +1,15 @@ + +.claude/ +.git/ + +.venv/ +__pycache__/ +.ipynb_checkpoints/ + +/data/ +/cropped/ +/cropped_classifier.zip +/cropped_generator.zip + +/classifier/outputs/ +/generator/outputs/ diff --git a/pipeline/scripts/bootstrap_env.sh b/pipeline/scripts/bootstrap_env.sh new file mode 100644 index 0000000..d4333e5 --- /dev/null +++ b/pipeline/scripts/bootstrap_env.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT_DIR" + +PYTHON_BIN="${PYTHON_BIN:-python3}" +USE_SYSTEM_SITE_PACKAGES="${USE_SYSTEM_SITE_PACKAGES:-1}" +SKIP_TORCH_INSTALL="${SKIP_TORCH_INSTALL:-1}" + +VENV_ARGS=() +if [ "$USE_SYSTEM_SITE_PACKAGES" = "1" ]; then + VENV_ARGS+=(--system-site-packages) +fi + +"$PYTHON_BIN" -m venv "${VENV_ARGS[@]}" .venv +source .venv/bin/activate + +python -m pip install --upgrade pip setuptools wheel + +# Capture system torch versions before venv installs override them +TORCH_VERSION="$($PYTHON_BIN -c 'import torch; print(torch.__version__)' 2>/dev/null || true)" +TV_VERSION="$($PYTHON_BIN -c 'import torchvision; print(torchvision.__version__)' 2>/dev/null || true)" + +if [ "$SKIP_TORCH_INSTALL" = "1" ]; then + filtered_requirements="$(mktemp)" + grep -Ev '^(torch|torchvision)([<>=!~].*)?$' requirements.txt > "$filtered_requirements" + + # Pin system torch/torchvision so transitive deps (e.g. facenet-pytorch) can't downgrade them + constraints_file="$(mktemp)" + [ -n "$TORCH_VERSION" ] && echo "torch==$TORCH_VERSION" >> "$constraints_file" + [ -n "$TV_VERSION" ] && echo "torchvision==$TV_VERSION" >> "$constraints_file" + python -m pip install -c "$constraints_file" -r "$filtered_requirements" + rm -f "$filtered_requirements" "$constraints_file" +else + python -m pip install -r requirements.txt +fi + +mkdir -p classifier/outputs/logs classifier/outputs/models classifier/outputs/analysis classifier/outputs/figures classifier/outputs/pipeline + +python - <<'PY' +try: + import torch + print(f"torch={torch.__version__} cuda_available={torch.cuda.is_available()}") +except Exception as exc: + print(f"torch check failed: {exc}") +PY + diff --git a/pipeline/vast_api.py b/pipeline/vast_api.py new file mode 100644 index 0000000..c487e44 --- /dev/null +++ b/pipeline/vast_api.py @@ -0,0 +1,131 @@ +import json +from dataclasses import dataclass +from typing import Any +from urllib import error, request + + +# Generic error raised for any Vast.ai API failure +class VastApiError(RuntimeError): + pass + + +# Lightweight view of a Vast.ai instance with the fields the pipeline cares about +@dataclass(slots=True) +class VastInstance: + id: int + actual_status: str + ssh_host: str | None + ssh_port: int | None + public_ipaddr: str | None + gpu_name: str | None + dph_total: float | None + raw: dict[str, Any] + + +# Thin wrapper around the Vast.ai REST API +class VastApiClient: + def __init__(self, api_key: str, *, base_url: str = "https://console.vast.ai") -> None: + self.api_key = api_key + self.base_url = base_url.rstrip("/") + + # Low-level request helper — sends JSON, returns parsed response body + def _request(self, method: str, path: str, payload: dict[str, Any] | None = None) -> Any: + url = f"{self.base_url}{path}" + data = None + headers = {"Authorization": f"Bearer {self.api_key}"} + if payload is not None: + headers["Content-Type"] = "application/json" + data = json.dumps(payload).encode("utf-8") + + req = request.Request(url, method=method, data=data, headers=headers) + try: + with request.urlopen(req, timeout=60) as response: + body = response.read() + + # Vast.ai returns varied error formats; surface whatever body we get + except error.HTTPError as exc: + details = exc.read().decode("utf-8", errors="replace") + raise VastApiError(f"{method} {path} failed with {exc.code}: {details}") from exc + except error.URLError as exc: + raise VastApiError(f"{method} {path} failed: {exc.reason}") from exc + + if not body: + return None + return json.loads(body) + + # Fetch the currently authenticated user's profile + def show_user(self) -> dict[str, Any]: + return self._request("GET", "/api/v0/users/current/") + + # ── SSH keys ──────────────────────────────────────────────────────── + + # List registered SSH keys; handles inconsistent response shapes from the API + def show_ssh_keys(self) -> list[dict[str, Any]]: + response = self._request("GET", "/api/v0/ssh/") + if isinstance(response, list): + return response + if isinstance(response, dict): + for key in ("keys", "ssh_keys"): + value = response.get(key) + if isinstance(value, list): + return value + raise VastApiError(f"Unexpected SSH key response: {response}") + + # Register the public key if it isn't already present + def ensure_ssh_key(self, public_key: str) -> None: + existing_keys = self.show_ssh_keys() + if any( + ( + item.get("key") + or item.get("public_key") + or item.get("ssh_key") + or "" + ).strip() == public_key + for item in existing_keys + ): + return + self._request("POST", "/api/v0/ssh/", {"ssh_key": public_key}) + + # Authorise an SSH key for a running instance + def attach_ssh_key(self, instance_id: int, public_key: str) -> None: + self._request("POST", f"/api/v0/instances/{instance_id}/ssh/", {"ssh_key": public_key}) + + # ── Offers ───────────────────────────────────────────────────────── + + # Search available GPU offers matching a query filter + def search_offers(self, query: dict[str, Any]) -> list[dict[str, Any]]: + response = self._request("POST", "/api/v0/bundles/", query) + offers = response.get("offers", []) + if isinstance(offers, dict): + return [offers] + return offers + + # ── Instances ────────────────────────────────────────────────────── + + # Rent an offer, returning the new contract (instance) ID + def create_instance(self, offer_id: int, payload: dict[str, Any]) -> int: + response = self._request("PUT", f"/api/v0/asks/{offer_id}/", payload) + if not response or not response.get("success"): + raise VastApiError(f"Instance creation failed for offer {offer_id}: {response}") + return int(response["new_contract"]) + + # Fetch current status and connection details for an instance + def show_instance(self, instance_id: int) -> VastInstance: + response = self._request("GET", f"/api/v0/instances/{instance_id}/") + raw = response.get("instances") + if not raw: + raise VastApiError(f"No instance details found for {instance_id}: {response}") + return VastInstance( + id=int(raw["id"]), + actual_status=raw.get("actual_status", ""), + ssh_host=raw.get("ssh_host"), + ssh_port=raw.get("ssh_port"), + public_ipaddr=raw.get("public_ipaddr"), + gpu_name=raw.get("gpu_name"), + dph_total=raw.get("dph_total"), + raw=raw, + ) + + # Permanently destroy an instance (releases the GPU and billing) + def destroy_instance(self, instance_id: int) -> None: + self._request("DELETE", f"/api/v0/instances/{instance_id}/") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bdab5b7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,35 @@ +# Python 3.12 recommended — facenet-pytorch has compatibility issues on 3.13+ +# conda create -n drl python=3.12 (or equivalent venv) + +# ── Deep learning ───────────────────────────────────────────────────────────── +torch>=2.0.0 +torchvision>=0.15.0 + +# ── Image processing ────────────────────────────────────────────────────────── +pillow>=10.0.0 + +# ── Data / evaluation ───────────────────────────────────────────────────────── +numpy>=1.24.0 +scikit-learn>=1.3.0 # roc_auc_score, confusion_matrix, train_test_split +tqdm>=4.65.0 # progress bars during training and data loading +torchmetrics>=0.11.0 # metrics for training and evaluation +torch-fidelity>=0.3.0 # required by torchmetrics FrechetInceptionDistance + +# ── Visualisation ───────────────────────────────────────────────────────────── +matplotlib>=3.7.0 +seaborn>=0.12.0 # heatmaps and statistical plots in notebooks + +# ── Notebooks ───────────────────────────────────────────────────────────────── +jupyter>=1.0.0 + +# ── Dataset download ────────────────────────────────────────────────────────── +huggingface-hub>=0.20.0 # tools/download_data.py pulls DFF from HuggingFace + +# ── Face detection (optional) ───────────────────────────────────────────────── +# Required only for the face-crop preprocessing tools: +# classifier/tools/facecrop.py (bbox crop -> cropped/classifier/) +# generator/tools/facecrop.py (landmark alignment -> cropped/generator/) +# Skip if you don't run these — saves ~200 MB and avoids the +# pytorch-lightning transitive dependency. +facenet-pytorch>=2.5.0 +scikit-image>=0.21.0 # similarity-transform alignment in generator/tools/facecrop.py \ No newline at end of file