diff --git a/.gitignore b/.gitignore index 1691a1b..643e43f 100644 --- a/.gitignore +++ b/.gitignore @@ -168,4 +168,6 @@ cython_debug/ pretrained_models test_data output_long -hq_results + +cache/ +.cache/ diff --git a/Dockerfile.cu12 b/Dockerfile.cu12 new file mode 100644 index 0000000..aa1d27b --- /dev/null +++ b/Dockerfile.cu12 @@ -0,0 +1,32 @@ +FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + git \ + libgl1-mesa-glx \ + libglib2.0-0 \ + build-essential \ + libsm6 \ + libxext6 \ + libxrender-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install ffmpeg and x264 using conda +RUN conda install -y conda=24.9.2 +RUN conda install -y -c conda-forge ffmpeg x264 + +# Update pip and setuptools +RUN pip install --no-cache-dir --upgrade pip setuptools wheel + +# Install Python dependencies +COPY requirements_cu12.txt . +RUN pip install -r requirements_cu12.txt --force-reinstall +RUN pip install huggingface_hub==0.20.3 + +# Copy project files +COPY . . + +# Specify runtime command +# CMD ["python", "scripts/inference_long.py", "--config", "./configs/inference/long.yaml"] diff --git a/basicsr/utils/video_util.py b/basicsr/utils/video_util.py index 20a2ff1..b29df5c 100644 --- a/basicsr/utils/video_util.py +++ b/basicsr/utils/video_util.py @@ -122,4 +122,4 @@ def write_frame(self, frame): def close(self): self.stream_writer.stdin.close() - self.stream_writer.wait() \ No newline at end of file + self.stream_writer.wait() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..40e5eb5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3.8' +services: + hallo2: + build: + context: . + dockerfile: Dockerfile.cu12 + volumes: + - .:/app + - ./.cache:/root/.cache + + # command: python scripts/inference_long.py --config ./configs/inference/long.yaml + tty: true + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] diff --git a/examples/driving_audios/6.wav b/examples/driving_audios/6.wav new file mode 100644 index 0000000..1ebf413 Binary files /dev/null and b/examples/driving_audios/6.wav differ diff --git a/examples/reference_images/7.jpg b/examples/reference_images/7.jpg new file mode 100644 index 0000000..5fdd44b Binary files /dev/null and b/examples/reference_images/7.jpg differ diff --git a/requirements_cu12.txt b/requirements_cu12.txt new file mode 100644 index 0000000..fadb409 --- /dev/null +++ b/requirements_cu12.txt @@ -0,0 +1,35 @@ +accelerate==0.28.0 +audio-separator==0.17.2 +av==12.1.0 +bitsandbytes==0.43.1 +decord==0.6.0 +diffusers==0.27.2 +einops==0.8.0 +ffmpeg-python==0.2.0 +icecream==2.1.3 +insightface==0.7.3 +librosa==0.10.2.post1 +lpips==0.1.4 +mediapipe[vision]==0.10.14 +mlflow==2.13.1 +moviepy==1.0.3 +numpy==1.26.4 +omegaconf==2.3.0 +onnx2torch==1.5.14 +onnx==1.16.1 +onnxruntime-gpu==1.18.0 +opencv-contrib-python +opencv-python-headless +opencv-python +pillow==10.3.0 +setuptools==70.0.0 +tqdm==4.66.4 +transformers==4.39.2 +xformers==0.0.25 +isort==5.13.2 +pylint==3.2.2 +pre-commit==3.7.1 +gradio==4.36.1 +lpips +ffmpeg-python==0.2.0 +huggingface_hub==0.20.3 diff --git a/test/standalone_videowriter_test.py b/test/standalone_videowriter_test.py new file mode 100644 index 0000000..f2c037a --- /dev/null +++ b/test/standalone_videowriter_test.py @@ -0,0 +1,80 @@ +import numpy as np +import ffmpeg +import sys + +class VideoWriter: + def __init__(self, video_save_path, height, width, fps, audio): + if height > 2160: + print('You are generating video that is larger than 4K, which will be very slow due to IO speed.', + 'We highly recommend to decrease the outscale(aka, -s).') + if audio is not None: + self.stream_writer = ( + ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{width}x{height}', + framerate=fps).output( + audio, + video_save_path, + pix_fmt='yuv420p', + vcodec='libx264', + loglevel='error', + acodec='copy').overwrite_output().run_async( + pipe_stdin=True, pipe_stdout=True, cmd='ffmpeg')) + else: + self.stream_writer = ( + ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{width}x{height}', + framerate=fps).output( + video_save_path, pix_fmt='yuv420p', vcodec='libx264', + loglevel='error').overwrite_output().run_async( + pipe_stdin=True, pipe_stdout=True, cmd='ffmpeg')) + + def write_frame(self, frame): + try: + frame = frame.astype(np.uint8).tobytes() + self.stream_writer.stdin.write(frame) + except BrokenPipeError: + print('Please re-install ffmpeg and libx264 by running\n', + '\t$ conda install -c conda-forge ffmpeg\n', + '\t$ conda install -c conda-forge x264') + sys.exit(0) + + def close(self): + self.stream_writer.stdin.close() + self.stream_writer.wait() + +def create_test_video(output_path, duration=5, fps=30, width=640, height=480): + # VideoWriterインスタンスを作成 + writer = VideoWriter(output_path, height, width, fps, audio=None) + + # テスト用の動画データを生成 + total_frames = duration * fps + + try: + for i in range(total_frames): + # カラフルな動く円を描画 + frame = np.zeros((height, width, 3), dtype=np.uint8) + cx = int(width/2 + width/4 * np.sin(i*2*np.pi/total_frames)) + cy = int(height/2 + height/4 * np.cos(i*2*np.pi/total_frames)) + color = ( + int(255*np.sin(i*2*np.pi/total_frames)**2), + int(255*np.cos(i*2*np.pi/total_frames)**2), + int(255*np.sin(i*4*np.pi/total_frames)**2) + ) + + # 円を描画 + xx, yy = np.meshgrid(np.arange(width), np.arange(height)) + circle = ((xx - cx)**2 + (yy - cy)**2) < 50**2 + frame[circle] = color + + # フレームを書き込む + writer.write_frame(frame) + + # ライターを閉じる + writer.close() + print(f"ビデオが正常にエンコードされ、{output_path}に保存されました") + + except Exception as e: + print(f"エラーが発生しました: {str(e)}") + +if __name__ == "__main__": + output_file = "test_video_standalone.mp4" + create_test_video(output_file) + print(f"テスト完了: {output_file}")