diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..61e85c3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,73 @@ +FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 + +# Install conda +# copy-pasted from https://github.com/anaconda/docker-images/blob/main/anaconda3/debian/Dockerfile +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ENV PATH=/opt/conda/bin:$PATH + +# renovate: datasource=custom.anaconda_installer +ARG INSTALLER_URL_LINUX64="https://repo.anaconda.com/archive/Anaconda3-2024.10-1-Linux-x86_64.sh" +ARG SHA256SUM_LINUX64="3ba0a298155c32fbfd80cbc238298560bf69a2df511783054adfc151b76d80d8" +# renovate: datasource=custom.anaconda_installer +ARG INSTALLER_URL_S390X="https://repo.anaconda.com/archive/Anaconda3-2024.10-1-Linux-s390x.sh" +ARG SHA256SUM_S390X="e00bd5e6c275695e8050a45aa85790315f504c95243dfe3632f505284310f3c4" +# renovate: datasource=custom.anaconda_installer +ARG INSTALLER_URL_AARCH64="https://repo.anaconda.com/archive/Anaconda3-2024.10-1-Linux-aarch64.sh" +ARG SHA256SUM_AARCH64="489c608e8bddd2cf29dfbdd811cf99087cd6b6a0615d41c6f9058ce340594b65" + +# hadolint ignore=DL3008 +RUN set -x && \ + apt-get update --fix-missing && \ + apt-get install -y --no-install-recommends \ + bzip2 \ + ca-certificates \ + git \ + libglib2.0-0 \ + libsm6 \ + libxcomposite1 \ + libxcursor1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxi6 \ + libxinerama1 \ + libxrandr2 \ + libxrender1 \ + mercurial \ + openssh-client \ + procps \ + subversion \ + wget \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* && \ + UNAME_M="$(uname -m)" && \ + if [ "${UNAME_M}" = "x86_64" ]; then \ + INSTALLER_URL=${INSTALLER_URL_LINUX64}; \ + SHA256SUM=${SHA256SUM_LINUX64}; \ + elif [ "${UNAME_M}" = "s390x" ]; then \ + INSTALLER_URL=${INSTALLER_URL_S390X}; \ + SHA256SUM=${SHA256SUM_S390X}; \ + elif [ "${UNAME_M}" = "aarch64" ]; then \ + INSTALLER_URL=${INSTALLER_URL_AARCH64}; \ + SHA256SUM=${SHA256SUM_AARCH64}; \ + fi && \ + wget "${INSTALLER_URL}" -O anaconda.sh -q && \ + echo "${SHA256SUM} anaconda.sh" > shasum && \ + sha256sum --check --status shasum && \ + /bin/bash anaconda.sh -b -p /opt/conda && \ + rm anaconda.sh shasum && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy + +# Install Meta perception_models +RUN git clone https://github.com/facebookresearch/perception_models.git \ + && cd perception_models \ + && conda create -y --name perception_models python=3.12 \ + && conda run --no-capture-output -n perception_models pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 xformers --index-url https://download.pytorch.org/whl/cu124 \ + && conda install -n perception_models -y ffmpeg -c conda-forge \ + && conda run --no-capture-output -n perception_models pip install torchcodec==0.1 --index-url=https://download.pytorch.org/whl/cu124 \ + && conda run --no-capture-output -n perception_models pip install -e . diff --git a/README.md b/README.md index c67c719..e1b94ff 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,38 @@ pip install -e . This will install an editable version of repo, allowing you to make changes to the code without needing to reinstall the package every time. +## Run as Docker Container + +### Build Docker Container: +```bash +docker build -t facebookresearch/perception_models . +``` + +### Run Example: + +To run the model using Docker, first copy your video to `/tmp/demo.mp4`. Replace `HF_TOKEN` with your Hugging Face token, and adjust `MODEL` and `QUESTION` according to your needs: + +```bash +HF_TOKEN=hf_REPLACE_ME +QUESTION='What is happening in the video?' +MEDIA='/tmp/demo.mp4' +MODEL='facebook/Perception-LM-1B' + +docker run -it --gpus all \ + -v /tmp:/tmp \ + -e HF_TOKEN="${HF_TOKEN}" \ + facebookresearch/perception_models \ + conda run -n perception_models --no-capture-output \ + python3 /perception_models/apps/plm/generate.py \ + --ckpt "${MODEL}" \ + --media_type video \ + --question "${QUESTION}" \ + --media_path "${MEDIA}" +``` + +This command runs the Docker container with available Nvidia GPUs attached. + + ## 🙏 Acknowledgement We are thankful to [Meta Lingua](https://github.com/facebookresearch/lingua) for releasing their code as open-source contributions. The code structure and code implementation of the LLM is directly forked from [Meta Lingua](https://github.com/facebookresearch/lingua). We are also thankful to [Open_CLIP](https://github.com/mlfoundations/open_clip) for open-source contributions in CLIP training, and [CLIP_benchmark](https://github.com/LAION-AI/CLIP_benchmark) for CLIP model evaluation.