Skip to content

Commit dbc0969

Browse files
ConalMullanclaude
andcommitted
Switch to runtime model download for video generation workers
GitHub Actions runners have insufficient disk (~14GB) for baking 30-40GB models into Docker images at build time. Changes: - Models downloaded at container startup, cached to network volume - New download_models.py handles HuggingFace downloads with caching - Image size reduced from ~30GB to ~8GB - First cold start: 5-10 min (download), subsequent: ~30s (cached) Requires RunPod Network Volume for cost-effective operation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent db1f7d2 commit dbc0969

6 files changed

Lines changed: 310 additions & 76 deletions

File tree

docker/runpod-qwen-edit/Dockerfile

Lines changed: 16 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
# RunPod Serverless handler for Qwen-Image-Edit with LightX2V acceleration
22
#
3-
# Build: docker buildx build --platform linux/amd64 -t ghcr.io/conalmullan/video-toolkit-qwen-edit:latest --push .
3+
# Build: docker buildx build --platform linux/amd64 -t ghcr.io/digitalsamba/video-toolkit-qwen-edit:latest --push .
44
#
5-
# Image size: ~25GB (includes pre-baked model weights for fast cold starts)
5+
# Image size: ~8GB (models downloaded at runtime, cached to network volume)
66
#
7-
# Version: 1.0.0 - CUDA 12.4, PyTorch 2.5, LightX2V + Qwen-Image-Edit-2511-Lightning
7+
# Version: 1.1.0 - CUDA 12.4, PyTorch 2.5, LightX2V + runtime model download
8+
#
9+
# Model Download: ~30GB downloaded on first run, cached to /runpod-volume/models/
10+
# Cold start: ~5-10 min (first run), ~30s (cached)
811
#
912
# GPU Requirements:
1013
# - Minimum: 24GB VRAM (L4, RTX 4090) with FP8 quantization
@@ -76,53 +79,26 @@ RUN pip3 install --no-cache-dir \
7679

7780
WORKDIR /app
7881

79-
# Create models directory
80-
RUN mkdir -p /models/qwen-edit /models/qwen-edit-fp8
81-
82-
# Download Qwen-Image-Edit-2511 base model (~20GB)
83-
# Uses HF hub to cache in standard location
84-
RUN python3 -c "\
85-
from huggingface_hub import snapshot_download; \
86-
print('Downloading Qwen-Image-Edit-2511 base model...'); \
87-
snapshot_download( \
88-
repo_id='Qwen/Qwen-Image-Edit-2511', \
89-
local_dir='/models/qwen-edit', \
90-
ignore_patterns=['*.md', '*.txt', '.gitattributes'] \
91-
); \
92-
print('Base model downloaded successfully'); \
93-
"
94-
95-
# Download FP8 quantized Lightning weights (~10GB)
96-
RUN python3 -c "\
97-
from huggingface_hub import snapshot_download; \
98-
print('Downloading FP8 Lightning weights...'); \
99-
snapshot_download( \
100-
repo_id='lightx2v/Qwen-Image-Edit-2511-Lightning', \
101-
local_dir='/models/qwen-edit-fp8', \
102-
ignore_patterns=['*.md', '*.txt', '.gitattributes'] \
103-
); \
104-
print('FP8 weights downloaded successfully'); \
105-
"
106-
107-
# Copy handler
82+
# Copy handler and download script
10883
COPY handler.py /app/handler.py
84+
COPY download_models.py /app/download_models.py
10985

110-
# Environment
86+
# Environment - models will be downloaded to network volume at runtime
11187
ENV PYTHONUNBUFFERED=1
112-
ENV HF_HOME=/root/.cache/huggingface
88+
ENV HF_HOME=/runpod-volume/.cache/huggingface
11389
ENV LIGHTX2V_PATH=/app/lightx2v
114-
ENV MODEL_PATH=/models/qwen-edit
115-
ENV FP8_WEIGHTS_PATH=/models/qwen-edit-fp8
90+
# These paths will be set by handler based on network volume availability
91+
ENV MODEL_BASE_PATH=/runpod-volume/models
92+
ENV MODEL_PATH=/runpod-volume/models/qwen-edit
93+
ENV FP8_WEIGHTS_PATH=/runpod-volume/models/qwen-edit-fp8
11694

117-
# Health check - verify imports and model paths
95+
# Health check - verify imports only (models downloaded at runtime)
11896
RUN python3 -c "\
11997
import torch; \
12098
print(f'PyTorch {torch.__version__}, CUDA available: {torch.cuda.is_available()}'); \
12199
from lightx2v import LightX2VPipeline; \
122100
print('LightX2V import OK'); \
123-
from pathlib import Path; \
124-
print(f'Model path exists: {Path(\"/models/qwen-edit\").exists()}'); \
125-
print(f'FP8 weights exist: {Path(\"/models/qwen-edit-fp8\").exists()}'); \
101+
print('Models will be downloaded at runtime to /runpod-volume/models/'); \
126102
"
127103

128104
# Run handler
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Model download script for Qwen-Image-Edit with LightX2V.
4+
5+
Downloads models to network volume for caching across cold starts.
6+
Supports both RunPod network volumes and local fallback.
7+
"""
8+
9+
import os
10+
import sys
11+
from pathlib import Path
12+
from huggingface_hub import snapshot_download
13+
14+
15+
def get_model_paths():
16+
"""Determine model paths based on available storage."""
17+
# Check for RunPod network volume first
18+
if Path("/runpod-volume").exists() and os.access("/runpod-volume", os.W_OK):
19+
base_path = Path("/runpod-volume/models")
20+
cache_path = Path("/runpod-volume/.cache/huggingface")
21+
print("Using RunPod network volume for model storage")
22+
else:
23+
# Fallback to local storage (container ephemeral storage)
24+
base_path = Path("/models")
25+
cache_path = Path("/root/.cache/huggingface")
26+
print("WARNING: No network volume found, using ephemeral storage")
27+
print("Models will be re-downloaded on each cold start!")
28+
29+
base_path.mkdir(parents=True, exist_ok=True)
30+
cache_path.mkdir(parents=True, exist_ok=True)
31+
32+
# Set HF cache location
33+
os.environ["HF_HOME"] = str(cache_path)
34+
35+
return {
36+
"base_path": base_path,
37+
"model_path": base_path / "qwen-edit",
38+
"fp8_path": base_path / "qwen-edit-fp8",
39+
}
40+
41+
42+
def check_model_exists(path: Path, min_files: int = 5) -> bool:
43+
"""Check if model directory has enough files to be considered complete."""
44+
if not path.exists():
45+
return False
46+
files = list(path.glob("*"))
47+
return len(files) >= min_files
48+
49+
50+
def download_base_model(model_path: Path) -> bool:
51+
"""Download Qwen-Image-Edit-2511 base model."""
52+
if check_model_exists(model_path, min_files=10):
53+
print(f"Base model already exists at {model_path}")
54+
return True
55+
56+
print("Downloading Qwen-Image-Edit-2511 base model (~20GB)...")
57+
print("This may take 5-10 minutes on first run.")
58+
59+
try:
60+
snapshot_download(
61+
repo_id="Qwen/Qwen-Image-Edit-2511",
62+
local_dir=str(model_path),
63+
ignore_patterns=["*.md", "*.txt", ".gitattributes"],
64+
)
65+
print("Base model downloaded successfully")
66+
return True
67+
except Exception as e:
68+
print(f"ERROR downloading base model: {e}")
69+
return False
70+
71+
72+
def download_fp8_weights(fp8_path: Path) -> bool:
73+
"""Download FP8 quantized Lightning weights."""
74+
if check_model_exists(fp8_path, min_files=3):
75+
print(f"FP8 weights already exist at {fp8_path}")
76+
return True
77+
78+
print("Downloading FP8 Lightning weights (~10GB)...")
79+
80+
try:
81+
snapshot_download(
82+
repo_id="lightx2v/Qwen-Image-Edit-2511-Lightning",
83+
local_dir=str(fp8_path),
84+
ignore_patterns=["*.md", "*.txt", ".gitattributes"],
85+
)
86+
print("FP8 weights downloaded successfully")
87+
return True
88+
except Exception as e:
89+
print(f"ERROR downloading FP8 weights: {e}")
90+
return False
91+
92+
93+
def ensure_models_downloaded() -> dict:
94+
"""
95+
Ensure all required models are downloaded.
96+
97+
Returns dict with model paths if successful, raises exception if not.
98+
"""
99+
paths = get_model_paths()
100+
101+
# Download base model
102+
if not download_base_model(paths["model_path"]):
103+
raise RuntimeError("Failed to download base model")
104+
105+
# Download FP8 weights
106+
if not download_fp8_weights(paths["fp8_path"]):
107+
raise RuntimeError("Failed to download FP8 weights")
108+
109+
print(f"\nAll models ready:")
110+
print(f" Base model: {paths['model_path']}")
111+
print(f" FP8 weights: {paths['fp8_path']}")
112+
113+
return paths
114+
115+
116+
if __name__ == "__main__":
117+
# Can be run standalone to pre-download models
118+
try:
119+
paths = ensure_models_downloaded()
120+
print("\nModel download complete!")
121+
sys.exit(0)
122+
except Exception as e:
123+
print(f"\nModel download failed: {e}")
124+
sys.exit(1)

docker/runpod-qwen-edit/handler.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@
4848
import torch
4949
from PIL import Image
5050

51-
# Model paths (baked into Docker image)
52-
MODEL_PATH = Path(os.environ.get("MODEL_PATH", "/models/qwen-edit"))
53-
FP8_WEIGHTS_PATH = Path(os.environ.get("FP8_WEIGHTS_PATH", "/models/qwen-edit-fp8"))
51+
# Model paths - set after runtime download
52+
MODEL_PATH = None
53+
FP8_WEIGHTS_PATH = None
5454

5555
# Lazy-loaded pipeline
5656
_pipeline = None
@@ -62,6 +62,23 @@ def log(message: str) -> None:
6262
print(message, file=sys.stderr, flush=True)
6363

6464

65+
def ensure_models() -> None:
66+
"""Ensure models are downloaded before first use."""
67+
global MODEL_PATH, FP8_WEIGHTS_PATH
68+
69+
if MODEL_PATH is not None and MODEL_PATH.exists():
70+
return # Already initialized
71+
72+
log("Checking/downloading models...")
73+
from download_models import ensure_models_downloaded
74+
75+
paths = ensure_models_downloaded()
76+
MODEL_PATH = paths["model_path"]
77+
FP8_WEIGHTS_PATH = paths["fp8_path"]
78+
79+
log(f"Models ready: {MODEL_PATH}")
80+
81+
6582
def get_gpu_vram_gb() -> int:
6683
"""Detect GPU VRAM using PyTorch."""
6784
try:
@@ -102,6 +119,9 @@ def get_pipeline(use_fp8: bool = True):
102119
"""Get or initialize LightX2V pipeline (lazy loading)."""
103120
global _pipeline, _pipeline_config
104121

122+
# Ensure models are downloaded first
123+
ensure_models()
124+
105125
# Check if we need to reinitialize (different config)
106126
current_config = {"use_fp8": use_fp8}
107127
if _pipeline is not None and _pipeline_config == current_config:
@@ -348,15 +368,20 @@ def handler(job: dict) -> dict:
348368
# RunPod serverless entry point
349369
if __name__ == "__main__":
350370
log("Starting RunPod Qwen-Edit handler...")
351-
log(f"Model path: {MODEL_PATH}, exists: {MODEL_PATH.exists()}")
352-
log(f"FP8 weights path: {FP8_WEIGHTS_PATH}, exists: {FP8_WEIGHTS_PATH.exists()}")
353371

354-
# Check CUDA
372+
# Check CUDA first
355373
if torch.cuda.is_available():
356374
log(f"CUDA available: {torch.cuda.get_device_name(0)}")
357375
vram_gb = get_gpu_vram_gb()
358376
log(f"VRAM: {vram_gb}GB")
359377
else:
360378
log("WARNING: CUDA not available!")
361379

380+
# Download models at startup (before serverless loop)
381+
# This happens during container initialization, not during job execution
382+
log("Downloading models at startup (this may take 5-10 min on first run)...")
383+
ensure_models()
384+
log(f"Model path: {MODEL_PATH}")
385+
log(f"FP8 weights path: {FP8_WEIGHTS_PATH}")
386+
362387
runpod.serverless.start({"handler": handler})

docker/runpod-wan-i2v/Dockerfile

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
# RunPod Serverless handler for Wan2.2 Image-to-Video with LightX2V acceleration
22
#
3-
# Build: docker buildx build --platform linux/amd64 -t ghcr.io/conalmullan/video-toolkit-wan-i2v:latest --push .
3+
# Build: docker buildx build --platform linux/amd64 -t ghcr.io/digitalsamba/video-toolkit-wan-i2v:latest --push .
44
#
5-
# Image size: ~40GB (includes pre-baked model weights for fast cold starts)
5+
# Image size: ~8GB (models downloaded at runtime, cached to network volume)
66
#
7-
# Version: 1.0.0 - CUDA 12.4, PyTorch 2.5, LightX2V + Wan2.2-I2V-A14B
7+
# Version: 1.1.0 - CUDA 12.4, PyTorch 2.5, LightX2V + runtime model download
8+
#
9+
# Model Download: ~35GB downloaded on first run, cached to /runpod-volume/models/
10+
# Cold start: ~10-15 min (first run), ~30s (cached)
811
#
912
# GPU Requirements:
1013
# - Minimum: 24GB VRAM (L4, RTX 4090) with aggressive offloading
@@ -78,39 +81,24 @@ RUN pip3 install --no-cache-dir \
7881

7982
WORKDIR /app
8083

81-
# Create models directory
82-
RUN mkdir -p /models/wan-i2v
83-
84-
# Download Wan2.2-I2V-A14B model (~35GB)
85-
# MoE (Mixture of Experts) architecture - 14B active parameters
86-
RUN python3 -c "\
87-
from huggingface_hub import snapshot_download; \
88-
print('Downloading Wan2.2-I2V-A14B model (this will take a while)...'); \
89-
snapshot_download( \
90-
repo_id='Wan-AI/Wan2.2-I2V-A14B', \
91-
local_dir='/models/wan-i2v', \
92-
ignore_patterns=['*.md', '*.txt', '.gitattributes'] \
93-
); \
94-
print('Wan2.2 model downloaded successfully'); \
95-
"
96-
97-
# Copy handler
84+
# Copy handler and download script
9885
COPY handler.py /app/handler.py
86+
COPY download_models.py /app/download_models.py
9987

100-
# Environment
88+
# Environment - models will be downloaded to network volume at runtime
10189
ENV PYTHONUNBUFFERED=1
102-
ENV HF_HOME=/root/.cache/huggingface
90+
ENV HF_HOME=/runpod-volume/.cache/huggingface
10391
ENV LIGHTX2V_PATH=/app/lightx2v
104-
ENV MODEL_PATH=/models/wan-i2v
92+
ENV MODEL_BASE_PATH=/runpod-volume/models
93+
ENV MODEL_PATH=/runpod-volume/models/wan-i2v
10594

106-
# Health check - verify imports and model paths
95+
# Health check - verify imports only (models downloaded at runtime)
10796
RUN python3 -c "\
10897
import torch; \
10998
print(f'PyTorch {torch.__version__}, CUDA available: {torch.cuda.is_available()}'); \
11099
from lightx2v import LightX2VPipeline; \
111100
print('LightX2V import OK'); \
112-
from pathlib import Path; \
113-
print(f'Model path exists: {Path(\"/models/wan-i2v\").exists()}'); \
101+
print('Models will be downloaded at runtime to /runpod-volume/models/'); \
114102
"
115103

116104
# Run handler

0 commit comments

Comments
 (0)