Skip to content

Commit f938508

Browse files
authored
Merge pull request #23 from DropThe8bit/chore/3
[chore] Dockerfile GPU 베이스/HF 캐시/Yolo 모델 관련 설정
2 parents 126162a + 7cdcb93 commit f938508

File tree

4 files changed

+99
-35
lines changed

4 files changed

+99
-35
lines changed

Dockerfile

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,30 @@
1-
# 1. Python 3.10 기반 슬림 이미지 사용
2-
FROM python:3.10-slim
1+
# 1. 권장 베이스: CUDA 포함 PyTorch 런타임
2+
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-runtime
3+
4+
# OpenCV 헤드리스 등에 필요한 OS 패키지
5+
RUN apt-get update && apt-get install -y --no-install-recommends \
6+
libglib2.0-0 libsm6 libxext6 libxrender1 && rm -rf /var/lib/apt/lists/*
7+
8+
# HF 캐시 + PyTorch 메모리 튜닝(선택)
9+
ENV HF_HOME=/models/hf-cache \
10+
HUGGINGFACE_HUB_CACHE=/models/hf-cache \
11+
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
312

413
# 2. 작업 디렉토리 생성
514
WORKDIR /app
615

7-
# 3. requirements.txt 먼저 복사하고 설치
16+
# 3. 의존성만 먼저 복사 → 캐시 최대 활용
817
COPY everTale/requirements.txt ./requirements.txt
9-
RUN pip install --no-cache-dir -r requirements.txt
18+
RUN pip install --no-cache-dir --upgrade pip \
19+
&& pip install --no-cache-dir -r /app/requirements.txt \
20+
&& pip install --no-cache-dir opencv-python-headless ultralytics
21+
22+
# 모델 파일을 이미지에 포함
23+
COPY models/yolov8n.pt /models/my_yolo_model.pt
24+
ENV YOLO_MODEL_PATH=/models/my_yolo_model.pt
1025

1126
# 4. 전체 코드 복사
12-
COPY . .
27+
COPY . /app
1328

1429
# 5. 컨테이너가 열 포트 설정
1530
EXPOSE 8000

everTale/app/main.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,13 @@
2020
@app.get("/")
2121
def root():
2222
return {"message": "EverTale AI 서버가 정상 실행 중입니다."}
23+
24+
@app.get("/debug/cuda")
25+
def cuda_health():
26+
import torch, os
27+
return {
28+
"cuda_available": torch.cuda.is_available(),
29+
"device_count": torch.cuda.device_count(),
30+
"current_device": torch.cuda.current_device() if torch.cuda.is_available() else None,
31+
"visible_devices": os.getenv("NVIDIA_VISIBLE_DEVICES", "")
32+
}

everTale/app/service/yolo_service.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,35 @@
99

1010
YOLO_MODEL_PATH = os.environ["YOLO_MODEL_PATH"]
1111

12+
from ultralytics import YOLO
13+
import os, torch
14+
15+
def _resolve_yolo_path() -> str:
16+
path = os.getenv("YOLO_MODEL_PATH", "/models/my_yolo_model.pt")
17+
if not os.path.exists(path):
18+
raise FileNotFoundError(f"YOLO model not found at: {path}")
19+
return path
20+
21+
def _require_gpu_for_yolo(stage: str = "YOLO load"):
22+
if torch.cuda.is_available():
23+
return 0 # device index for CUDA
24+
# MPS는 Ultralytics 지원이 제한적이므로 필요한 경우만 허용
25+
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
26+
return "mps"
27+
raise RuntimeError(f"[ERROR] No GPU backend during {stage}. CPU is not allowed for YOLO.")
28+
1229
def load_model() -> YOLO:
30+
path = _resolve_yolo_path()
31+
device = _require_gpu_for_yolo("YOLO load")
1332
try:
14-
model = YOLO(YOLO_MODEL_PATH)
15-
print("모델이 성공적으로 로드되었습니다.")
33+
model = YOLO(path)
34+
# warm-up(선택): 작은 더미로 한 번 실행해 메모리 로딩
35+
model.predict(source=np.zeros((64,64,3), dtype=np.uint8), device=device, imgsz=64, verbose=False)
36+
print(f"[INFO] YOLO loaded on device={device} from {path}")
1637
return model
1738
except Exception as e:
18-
print(f"모델 로드 중 오류가 발생했습니다: {e}")
19-
return None
39+
raise RuntimeError(f"Failed to load YOLO model at {path}: {e}")
40+
2041

2142
def _url_to_bgr(url: str) -> np.ndarray:
2243
resp = requests.get(url, timeout=10)
@@ -35,13 +56,19 @@ def detect_object(image_paths: List[str]) -> Dict[str, Any]:
3556
탐지 후보가 전혀 없으면 {"index": None, "url": None, "detection": None}
3657
"""
3758
model = load_model()
59+
device = 0 if torch.cuda.is_available() else "mps" # 위와 일치
3860
urls = image_paths[:8]
3961
candidates: List[Dict[str, Any]] = []
4062

4163
for idx, url in enumerate(urls):
4264
try:
4365
img = _url_to_bgr(url)
44-
results = model.predict(source=img, verbose=False)
66+
results = model.predict(
67+
source=img,
68+
device=device,
69+
half=torch.cuda.is_available(),
70+
verbose=False
71+
)
4572
if not results or results[0].boxes is None or results[0].boxes.shape[0] == 0:
4673
continue
4774

everTale/requirements.txt

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,39 @@
1-
accelerate==1.8.1
2-
boto3==1.38.45
3-
diffusers==0.33.1
4-
fastapi==0.115.13
5-
fsspec==2025.5.1
6-
huggingface-hub==0.33.0
7-
Jinja2==3.1.6
8-
numpy==2.0.2
9-
openai==1.89.0
10-
pillow==11.2.1
11-
psutil==7.0.0
12-
pydantic==2.11.7
13-
python-dotenv==1.1.0
14-
python-multipart==0.0.20
1+
# --- PyTorch stack (이미지에 포함되지만 버전 명시해두면 좋음) ---
2+
torch==2.3.1
3+
torchvision==0.18.1
4+
torchaudio==2.3.1
5+
6+
# --- Diffusers/Transformers/Accelerate (안정 조합) ---
7+
diffusers==0.29.0
8+
transformers==4.42.4
9+
accelerate==0.32.1
10+
safetensors==0.4.3
11+
huggingface-hub==0.23.4
12+
tokenizers==0.19.1
13+
14+
# --- API / 서버 ---
15+
fastapi==0.115.6 # (너가 쓰는 0.115.x 라인 유지, 너무 최신 patch는 피함)
16+
uvicorn==0.30.6 # 안정 버전
17+
starlette==0.38.5 # fastapi 0.115.x와 검증된 조합
18+
19+
# --- 유틸 ---
20+
numpy==1.26.4 # torch 2.3.x와 널리 쓰이는 안정 버전
21+
pillow==10.4.0
22+
requests==2.32.3
23+
tqdm==4.66.4
24+
psutil==5.9.8
25+
python-dotenv==1.0.1
26+
python-multipart==0.0.9
1527
PyYAML==6.0.2
16-
regex==2024.11.6
17-
requests==2.32.4
18-
safetensors==0.5.3
19-
starlette==0.46.2
20-
tokenizers==0.21.1
21-
torch==2.7.1
22-
tqdm==4.67.1
23-
transformers==4.52.4
24-
typing-inspection==0.4.1
25-
typing_extensions==4.14.0
26-
uvicorn==0.34.3
28+
regex==2024.5.15
29+
typing_extensions==4.12.2
30+
fsspec==2024.6.1
31+
ultralytics
32+
33+
# --- 선택(LLM LoRA 등에 필요하면) ---
34+
peft==0.11.1
35+
36+
# --- 필요 시만 추가 (성공/실패 갈릴 수 있으므로 초기엔 제외 권장) ---
37+
# xformers==0.0.27.post2
38+
2739

0 commit comments

Comments
 (0)