diff --git a/modal/README.md b/modal/README.md new file mode 100644 index 000000000..ac6dbb4f3 --- /dev/null +++ b/modal/README.md @@ -0,0 +1,29 @@ +# OpenPCDet on Modal + +This is a guide to running PCDet on [Modal](https://modal.com/), a platform for running machine learning workflows on the cloud. + +In this example we will train a pointpillar model on the nuScenes mini dataset. Credit to [ies0411](https://github.com/ies0411) who contributed the dockerfile in https://github.com/open-mmlab/OpenPCDet/pull/1513. + +## Install modal + +* `pip install modal` +* `modal setup` + +See https://modal.com/docs/guide for more information. + +## Prepare data + +* Download the nuScenes mini dataset +* Create shared Modal volume: `modal volume create nuscenes`. +* Optionally: delete all image and radar folders to speed up the next step. +* Upload data to the volume: `cd path-to-downloaded-data`, `modal volume put nuscenes v1.0-mini`. + +## Format the data + +Build the nuScenes `infos` used by OpenPCDet. This is a one-time operation. + +* `modal run setup_nuscenes.py` + +## Train the model + +* `modal run train_pp.py` diff --git a/modal/current.Dockerfile b/modal/current.Dockerfile new file mode 100644 index 000000000..6b4e65218 --- /dev/null +++ b/modal/current.Dockerfile @@ -0,0 +1,23 @@ +FROM nvidia/cuda:11.6.2-devel-ubuntu20.04 +ENV DEBIAN_FRONTEND=noninteractive + +# Get all dependencies +RUN apt-get update && apt-get install -y \ + git zip build-essential cmake libssl-dev python3-dev python3-pip python3-pip cmake ninja-build git wget ca-certificates ffmpeg libsm6 libxext6 &&\ + rm -rf /var/lib/apt/lists/* + +RUN ln -sv /usr/bin/python3 /usr/bin/python +ENV PATH="/root/.local/bin:${PATH}" +RUN wget -O /root/get-pip.py https://bootstrap.pypa.io/get-pip.py && python3 /root/get-pip.py --user + +# PyTorch for CUDA 11.6 +RUN pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 +ENV TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX;Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing;8.6" + +RUN pip install numpy==1.23.0 llvmlite numba opencv-python tensorboardX easydict pyyaml scikit-image tqdm SharedArray open3d mayavi av2 pyquaternion kornia==0.6.8 nuscenes-devkit==1.0.5 spconv-cu116 +RUN python -m pip install --user jupyter +RUN git clone https://github.com/open-mmlab/OpenPCDet.git +WORKDIR OpenPCDet +RUN python setup.py develop +RUN pip install torch-scatter==2.0.9 -f https://data.pyg.org/whl/torch-1.13.1+cu116.html + diff --git a/modal/setup_nuscenes.py b/modal/setup_nuscenes.py new file mode 100644 index 000000000..528d7aef7 --- /dev/null +++ b/modal/setup_nuscenes.py @@ -0,0 +1,18 @@ +from shared import pcdet_cuda_image, volumes +import modal + +app = modal.App("pcdet") + +@app.function(image=pcdet_cuda_image, gpu="T4", timeout=3600*2, volumes=volumes) +def build_nuscenes(): + print("-> Building nuScenes") + + import subprocess + subprocess.run(["python", "-m", "pcdet.datasets.nuscenes.nuscenes_dataset", "--func", "create_nuscenes_infos", + "--cfg_file", "tools/cfgs/dataset_configs/nuscenes_dataset.yaml", "--version", "v1.0-mini"]) + + print("-> Done building nuScenes") + +@app.local_entrypoint() +def main(): + build_nuscenes.remote() \ No newline at end of file diff --git a/modal/shared.py b/modal/shared.py new file mode 100644 index 000000000..47bb1cb5f --- /dev/null +++ b/modal/shared.py @@ -0,0 +1,11 @@ +import modal + +pcdet_cuda_image = ( + modal.Image.from_dockerfile("current.dockerfile") + .copy_local_dir("../tools", "/OpenPCDet/tools") +) + +volume = modal.Volume.from_name("nuscenes") +VOL_MOUNT_PATH = "/OpenPCDet/data/nuscenes" + +volumes={VOL_MOUNT_PATH: volume} \ No newline at end of file diff --git a/modal/train_pp.py b/modal/train_pp.py new file mode 100644 index 000000000..667c7ddc2 --- /dev/null +++ b/modal/train_pp.py @@ -0,0 +1,24 @@ +import os +from shared import pcdet_cuda_image, volumes +import modal + +app = modal.App("pcdet") + +@app.function(image=pcdet_cuda_image, + volumes=volumes, + cpu=8.0, + memory=32768, + gpu="T4", + timeout=3600*2) +def train_pointpillars(): + print("-> Training PP") + + import subprocess + os.chdir("/OpenPCDet/tools") + subprocess.run(["python", "train.py", "--cfg_file", "cfgs/nuscenes_models/cbgs_pp_multihead.yaml"]) + + print("-> Done training PP") + +@app.local_entrypoint() +def main(): + train_pointpillars.remote() \ No newline at end of file diff --git a/pcdet/datasets/__init__.py b/pcdet/datasets/__init__.py index 47c3900bf..7728f0b99 100644 --- a/pcdet/datasets/__init__.py +++ b/pcdet/datasets/__init__.py @@ -12,7 +12,6 @@ from .pandaset.pandaset_dataset import PandasetDataset from .lyft.lyft_dataset import LyftDataset from .once.once_dataset import ONCEDataset -from .argo2.argo2_dataset import Argo2Dataset from .custom.custom_dataset import CustomDataset __all__ = { @@ -24,9 +23,20 @@ 'LyftDataset': LyftDataset, 'ONCEDataset': ONCEDataset, 'CustomDataset': CustomDataset, - 'Argo2Dataset': Argo2Dataset } +def DataSetClassFactory(name): + if name in __all__: + return __all__[name] + else: + # argo2_dataset.py does a global import of av2, + # which is not part of the standard install. + # This is a workaround to avoid requiring av2 to be installed + # unless the user is using the Argo2Dataset. + from .argo2.argo2_dataset import Argo2Dataset + return Argo2Dataset + + class DistributedSampler(_DistributedSampler): @@ -54,7 +64,7 @@ def __iter__(self): def build_dataloader(dataset_cfg, class_names, batch_size, dist, root_path=None, workers=4, seed=None, logger=None, training=True, merge_all_iters_to_one_epoch=False, total_epochs=0): - dataset = __all__[dataset_cfg.DATASET]( + dataset = DataSetClassFactory(dataset_cfg.DATASET)( dataset_cfg=dataset_cfg, class_names=class_names, root_path=root_path, diff --git a/tools/cfgs/nuscenes_models/cbgs_pp_multihead.yaml b/tools/cfgs/nuscenes_models/cbgs_pp_multihead.yaml index 60e782d13..1db9aa5c1 100644 --- a/tools/cfgs/nuscenes_models/cbgs_pp_multihead.yaml +++ b/tools/cfgs/nuscenes_models/cbgs_pp_multihead.yaml @@ -5,6 +5,7 @@ DATA_CONFIG: _BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml POINT_CLOUD_RANGE: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0] + VERSION: 'v1.0-mini' DATA_PROCESSOR: - NAME: mask_points_and_boxes_outside_range REMOVE_OUTSIDE_BOXES: True