diff --git a/.github/actions/build-and-test/action.yml b/.github/actions/build-and-test/action.yml new file mode 100644 index 00000000..217b444f --- /dev/null +++ b/.github/actions/build-and-test/action.yml @@ -0,0 +1,92 @@ +name: 'Build and Test' +description: 'Build nvblox Docker image and run tests' +inputs: + platform: + description: 'Platform to build for' + required: true + cuda-arch: + description: 'CUDA architecture' + required: true + cuda-version: + description: 'CUDA version' + required: true + ubuntu-version: + description: 'Ubuntu version' + required: true + gcc-sanitizer: + description: 'Build in debug mode with gcc sanitizers enabled' + required: false + default: 0 + ngc-api-key: + description: 'NGC API Key for authentication' + required: true + run-cpp-tests: + description: 'Run C++ unit tests' + required: false + default: 'true' + run-python-tests: + description: 'Run Python unit tests' + required: false + default: 'true' + run-cuda-sanitizer: + description: 'Run CUDA sanitizer tests' + required: false + default: 'true' + run-realsense-tests: + description: 'Run Realsense example tests' + required: false + default: 'true' +runs: + using: 'composite' + steps: + - name: NGC Login + uses: ./.github/actions/ngc-login + with: + ngc-api-key: ${{ inputs.ngc-api-key }} + - name: Set COMMON_PREMERGE_ARGS env + shell: bash + run: | + # Store arguments used for all jobs in an environment variable. + echo "COMMON_PREMERGE_ARGS=--platform ${{ inputs.platform }} --cuda-arch ${{ inputs.cuda-arch }} --cuda-version ${{ inputs.cuda-version }} --ubuntu-version ${{ inputs.ubuntu-version }} --gcc-sanitizer ${{ inputs.gcc-sanitizer }}" >> $GITHUB_ENV + # Note that the two build-image steps could be omitted since the build-and-test steps also build necessary images. + # However, we separate the steps to get cleaner logs and better timing granularity. + - name: Build dependency image + shell: bash + run: | + echo "::group::BUILD DEPENDENCY IMAGE" + time python3 ci/premerge.py $COMMON_PREMERGE_ARGS --build-image deps + echo "::endgroup::" + - name: Build build image + shell: bash + run: | + echo "::group::BUILD BINARIES IMAGE" + time python3 ci/premerge.py $COMMON_PREMERGE_ARGS --build-image build + echo "::endgroup::" + - name: Run CPP unit tests + if: inputs.run-cpp-tests == 'true' + shell: bash + run: | + echo "::group::RUN CPP UNIT TESTS" + time python3 ci/premerge.py $COMMON_PREMERGE_ARGS --build-and-test cpp + echo "::endgroup::" + - name: Run Python unit tests + if: inputs.run-python-tests == 'true' + shell: bash + run: | + echo "::group::RUN PYTHON UNIT TESTS" + time python3 ci/premerge.py $COMMON_PREMERGE_ARGS --build-and-test python + echo "::endgroup::" + - name: Run CUDA Sanitizer + if: inputs.run-cuda-sanitizer == 'true' + shell: bash + run: | + echo "::group::RUN CUDA SANITIZER" + time python3 ci/premerge.py $COMMON_PREMERGE_ARGS --build-and-test cuda-sanitizer + echo "::endgroup::" + - name: Run Realsense example test + if: inputs.run-realsense-tests == 'true' + shell: bash + run: | + echo "::group::RUN REALSENSE EXAMPLE TEST" + time python3 ci/premerge.py $COMMON_PREMERGE_ARGS --build-and-test realsense + echo "::endgroup::" diff --git a/.github/actions/ngc-login/action.yml b/.github/actions/ngc-login/action.yml new file mode 100644 index 00000000..b795b4b5 --- /dev/null +++ b/.github/actions/ngc-login/action.yml @@ -0,0 +1,21 @@ +name: 'NGC Login' +description: 'Login to NVIDIA NGC Container Registry' +inputs: + ngc-api-key: + description: 'NGC API Key for authentication' + required: false +runs: + using: 'composite' + steps: + - name: NGC Login + shell: bash + run: | + # Only attempt NGC login if API key is available + if [ -n "${{ inputs.ngc-api-key }}" ]; then + echo "Logging into NGC registry..." + docker login -u \$oauthtoken -p ${{ inputs.ngc-api-key }} nvcr.io + echo "✅ Successfully logged into NGC registry" + else + echo "⚠️ NGC_API_KEY not available - skipping NGC login" + echo "This is normal for PRs from forks or when secrets are not configured" + fi diff --git a/.github/workflows/premerge.yml b/.github/workflows/premerge.yml new file mode 100644 index 00000000..73dacb5a --- /dev/null +++ b/.github/workflows/premerge.yml @@ -0,0 +1,132 @@ +name: nvblox premerge +on: + pull_request: +jobs: + ## ------------------------------------------ + ## Linting and formatting + ## ------------------------------------------ + lint_precommit: + name: pre-commit + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + lfs: false + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install pre-commit + run: | + python -m pip install --upgrade pip + pip install pre-commit clang-format==14.0.6 + - name: Run pre-commit + run: pre-commit run --all-files --show-diff-on-failure + ## ------------------------------------------ + ## Build and test x86/CU11/U22 + ## ------------------------------------------ + premerge_x86_cu11_u22: + name: Build&Test x86-CU11-U22 + needs: [lint_precommit] + runs-on: [self-hosted, gpu] # GPU jobs will run on AWS + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + lfs: true + - name: premerge_x86_cu11_u22 - Unit tests + uses: ./.github/actions/build-and-test + with: + platform: x86_64 + cuda-arch: 'native' + cuda-version: '11' + ubuntu-version: '22' + ngc-api-key: ${{ secrets.NGC_API_KEY }} + ## ------------------------------------------ + ## Build and test x86/CU12/U22 + ## ------------------------------------------ + premerge_x86_cu12_u22: + name: Build&Test x86-CU12-U22 + needs: [lint_precommit] + runs-on: [self-hosted, gpu] # GPU jobs will run on AWS + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + lfs: true + - name: premerge_x86_cu12_u22 - Unit tests + uses: ./.github/actions/build-and-test + with: + platform: x86_64 + cuda-arch: 'native' + cuda-version: '12' + ubuntu-version: '22' + ngc-api-key: ${{ secrets.NGC_API_KEY }} + ## ------------------------------------------ + ## Build and test x86/CU13/U24 + ## ------------------------------------------ + premerge_x86_cu13_u24: + name: Build&Test x86-CU13-U24 + needs: [lint_precommit] + runs-on: [self-hosted, gpu] # GPU jobs will run on AWS + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + lfs: true + - name: premerge_x86_cu13_u24 - Unit tests + uses: ./.github/actions/build-and-test + with: + platform: x86_64 + cuda-arch: 'native' + cuda-version: '13' + ubuntu-version: '24' + ngc-api-key: ${{ secrets.NGC_API_KEY }} + run-realsense-tests: false # TODO(dtingahl) make realsense docker build for cuda13 + ## -------------------------------------------------------- + ## Build and test x86/CU12/U22/gcc-sanitizer + ## -------------------------------------------------------- + premerge_x86_cu12_u22_debug: + name: Build&Test x86-gcc-sanitizer + needs: [lint_precommit] + runs-on: [self-hosted, gpu] # GPU jobs will run on AWS + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + lfs: true + - name: premerge_x86_cu12_u22_debug - Unit tests + uses: ./.github/actions/build-and-test + with: + platform: x86_64 + cuda-arch: 'native' + cuda-version: '12' + ubuntu-version: '22' + gcc-sanitizer: 1 + ngc-api-key: ${{ secrets.NGC_API_KEY }} + run-python-tests: false # TODO(dtingdahl) Enable these tests + run-realsense-tests: false # + run-cuda-sanitizer: false # + ## ------------------------------------------ + ## Build and test Orin + ## ------------------------------------------ + premerge_orin_jetpack6: + name: Build&Test orin-jetpack6 + needs: [lint_precommit] + runs-on: [self-hosted, jetson-orin, jetpack-6.2] + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + lfs: true + - name: premerge_orin_jetpack6 - Unit tests + uses: ./.github/actions/build-and-test + with: + platform: jetpack6 + cuda-version: '12' # unused + ubuntu-version: '22' # unused + cuda-arch: '87' # Native detection of sm-arch not supported on orin. + ngc-api-key: ${{ secrets.NGC_API_KEY }} + run-realsense-tests: false + run-cuda-sanitizer: false diff --git a/.github/workflows/sonar.yml b/.github/workflows/sonar.yml new file mode 100644 index 00000000..cf4a6340 --- /dev/null +++ b/.github/workflows/sonar.yml @@ -0,0 +1,28 @@ +name: SonarQube SA +on: + push: + branches: + - dtingdahl/ci_script2 + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + build: + name: Build and analyze + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + - uses: SonarSource/sonarqube-scan-action@v4 + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }} + # If you wish to fail your job when the Quality Gate is red, uncomment the + # following lines. This would typically be used to fail a deployment. + # We do not recommend to use this in a pull request. Prefer using pull request + # decoration instead. + # - uses: SonarSource/sonarqube-quality-gate-action@v1 + # timeout-minutes: 5 + # env: + # SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b73628cb..9012968c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,8 +20,7 @@ repos: - repo: https://github.com/doublify/pre-commit-clang-format rev: 62302476d0da01515660132d76902359bed0f782 hooks: - - id: clang-format - entry: clang-format-14 -i + - id: clang-format # See premerge.yml for the clang-format version used. files: \.(c|cc|cpp|cxx|cu|cuh|h|hh|hpp|hxx|inl|proto|pb\.h|pb\.cc)$ - repo: https://github.com/pylint-dev/pylint rev: v3.0.3 diff --git a/ci/blossom-development.jenkinsfile b/ci/blossom-development.jenkinsfile new file mode 100644 index 00000000..c5fed15a --- /dev/null +++ b/ci/blossom-development.jenkinsfile @@ -0,0 +1,406 @@ +// NVBLOX Development Pipeline +// +// This will take care of building and doing basic tests for merge requests +// It will help to give developers faster feedback and for code reviewers +// to verify the compliance of the commit +// +// Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +@Library('ci-lib@dab5e635315c343114eca42af8734edd1043340b') +import com.nvidia.isaac_ros.ci.Notify +import com.nvidia.isaac.ci.utils.WorkflowScriptUtil + +// Environment variable +env.REPO_NAME = 'nvblox' +env.DOCKER_URL = 'urm.nvidia.com' +env.DOCKER_REGISTERY = 'https://urm.nvidia.com' +env.DOCKER_REPO = 'sw-isaac-public-docker' +env.DOCKER_BASE = "${env.DOCKER_URL}/${env.DOCKER_REPO}" + +// Replace url-encoded '/' with '_' to make docker happy +env.BUILD_TAG_SANITIZED = "${env.BUILD_TAG}".replaceAll("%2F", "_") + +// Docker images produced by this pipeline +env.DOCKER_IMAGE_LINT_X86 = "${env.DOCKER_BASE}/lint:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_DOCS_X86 = "${env.DOCKER_BASE}/docs:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_DEPS_X86 = "${env.DOCKER_BASE}/deps-x86_64:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_DEPS_X86_CU13 = "${env.DOCKER_BASE}/deps-x86_64-cu13:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_RELEASE_X86 = "${env.DOCKER_BASE}/release-x86_64:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_RELEASE_X86_CU13 = "${env.DOCKER_BASE}/release-x86_64-cu13:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_DEBUG_X86 = "${env.DOCKER_BASE}/debug-x86_64:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_DEPS_AARCH_JP5 = "${env.DOCKER_BASE}/deps-aarch64-jp5:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_DEPS_AARCH_JP6 = "${env.DOCKER_BASE}/deps-aarch64-jp6:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_RELEASE_AARCH_JP5 = "${env.DOCKER_BASE}/release-aarch64-jp5:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_RELEASE_AARCH_JP6 = "${env.DOCKER_BASE}/release-aarch64-jp6:${env.BUILD_TAG_SANITIZED}" +env.DOCKER_IMAGE_REALSENSE_X86 = "${env.DOCKER_BASE}/realsense-example-x86:${env.BUILD_TAG_SANITIZED}" + +// Cuda architectures used in CI +env.CMAKE_CUDA_ARCHITECTURES_X86="120;100;90;89;86;80;75" + +// Cuda architectures for the realsense example. +// TODO9(dtingdahl) make the example work for Blackwell +env.CMAKE_CUDA_ARCHITECTURES_REALSENSE_X86="90;89;86;80;75" + +// Cuda architectures for Jetson +env.CMAKE_CUDA_ARCHITECTURES_AARCH64="87" + + +String driver_version = '535.104.05' +String image_pull_secret = 'vault-artifactory' +String arm_builder_label = 'arm-image-builder' + +def gpu_list = [ "A10", // 8.6 + "A40", // 8.6 + "A100_PCIE_40GB", // 8.0 + "Tesla_T4", // 7.5 + "QUADRO_RTX_8000", // 7.5 + + ] + +def pod_cpu = '10000m' // 10 cores (unit is milllicores) +def pod_memory = '64Gi' +def pod_storage = '100Gi' + + +blossom.init() +common.init() + +Notify notify = new Notify(this) + +// Build, tag and push a docker image +def buildAndPushX86Image(dockerfile_path, docker_tag, docker_build_args="") { + script { + blossom.run_container_with_docker('docker:20.10.15', 'urm') { + checkout scm + withCredentials([ + usernamePassword(credentialsId: 'vault-ngc', usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD'), + usernamePassword(credentialsId: 'vault-artifactory', usernameVariable: 'URM_USERNAME', passwordVariable: 'URM_PASSWORD') + ]) { + sh """ + docker login -u '$USERNAME' -p $PASSWORD https://nvcr.io && \ + docker login -u '${URM_USERNAME}' -p $URM_PASSWORD https://urm.nvidia.com && \ + docker build . \ + ${docker_build_args} \ + --tag ${docker_tag} \ + --file ${dockerfile_path} \ + --network host && \ + + docker push ${docker_tag} + """ + } + } + } +} + +// Build, tag and push a docker image +def buildAndPushARMImage(arm_builder_label, dockerfile_path, docker_tag, docker_build_args="") { + script { + blossom.run_on_static_node(arm_builder_label) { + checkout scm + withCredentials([ + usernamePassword(credentialsId: 'vault-ngc', usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD'), + usernamePassword(credentialsId: 'vault-artifactory', usernameVariable: 'URM_USERNAME', passwordVariable: 'URM_PASSWORD') + ]) { + sh """ + docker login -u '$USERNAME' -p $PASSWORD https://nvcr.io && \ + docker login -u '${URM_USERNAME}' -p $URM_PASSWORD https://urm.nvidia.com && \ + docker build . \ + ${docker_build_args} \ + --tag ${docker_tag} \ + --file ${dockerfile_path} \ + --network host && \ + + docker push ${docker_tag} + """ + } + } + } +} + +// Main pipeline +pipeline { + agent any + options { + gitLabConnection('gitlab-master') + } + triggers { + gitlab(triggerOnMergeRequest: true, branchFilterType: 'All') + } + stages { + // Get details about the commit. Used when sending slack notifications + stage('Get Commit Details') { + steps { + script { + updateGitlabCommitStatus name: 'premerge', state: 'running' + env.GIT_COMMIT_MSG = sh (script: 'git log -1 --pretty=%B ${GIT_COMMIT}', returnStdout: true).trim() + env.GIT_AUTHOR = sh (script: 'git log -1 --format=%ae ${GIT_COMMIT}', returnStdout: true).trim() + } + } + } + + // Stage which checks for lint errors + stage('Build and Push lint docker') { + steps { + buildAndPushX86Image("docker/Dockerfile.lint", + "${env.DOCKER_IMAGE_LINT_X86}") + } + } + stage('Lint checker') { + steps { + script { + blossom.run_container("${env.DOCKER_IMAGE_LINT_X86}", image_pull_secret, driver_version) { + sh ''' + cd /nvblox + bash ci/lint_nvblox_h.sh + pre-commit run --all-files + ''' + + } + } + } + } + // Stage that creates docker images containing dependencies for the build + stage('Build base images') { + parallel { + stage('X86: Build deps') { + steps { + buildAndPushX86Image("docker/Dockerfile.deps", + "${env.DOCKER_IMAGE_DEPS_X86}") + } + } + stage('X86+CU13: Build deps') { + steps { + // Pytorch is not available for cuda13, so we disable the wrapper. + buildAndPushX86Image("docker/Dockerfile.deps", + "${env.DOCKER_IMAGE_DEPS_X86_CU13}", + "--build-arg BASE_IMAGE=nvcr.io/nvidia/cuda:13.0.0-devel-ubuntu24.04 --build-arg BUILD_PYTORCH_WRAPPER=0") + + } + } + stage('AARCH-jp6: build deps') { + steps { + buildAndPushARMImage(arm_builder_label, + "docker/Dockerfile.jetson_deps", + "${env.DOCKER_IMAGE_DEPS_AARCH_JP6}", + "--build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.3.0") + } + } + stage('AARCH-jp5: build deps') { + steps { + buildAndPushARMImage(arm_builder_label, + "docker/Dockerfile.jetson_deps", + "${env.DOCKER_IMAGE_DEPS_AARCH_JP5}", + "--build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r35.4.1 --build-arg SKIP_PYTORCH_INSTALL=1") + } + } + stage('x86: Build Docs Image') { + steps { + buildAndPushX86Image("docker/Dockerfile.docs", + "${env.DOCKER_IMAGE_DOCS_X86}") + } + } + } + } + // Stage for building the source code into images used for testing + stage('Build source') { + steps { + script { + parallel ( + 'X86: build release': { + buildAndPushX86Image("docker/Dockerfile.build", + "${env.DOCKER_IMAGE_RELEASE_X86}", + "--build-arg CMAKE_ARGS=\"-DCMAKE_BUILD_TYPE=RELEASE -DWARNING_AS_ERROR=1 -DCMAKE_CUDA_ARCHITECTURES=\'${env.CMAKE_CUDA_ARCHITECTURES_X86}\'\" " + + "--build-arg BASE_IMAGE=" + "${env.DOCKER_IMAGE_DEPS_X86}") + }, + 'X86+CU13: build release': { + buildAndPushX86Image("docker/Dockerfile.build", + "${env.DOCKER_IMAGE_RELEASE_X86_CU13}", + "--build-arg CMAKE_ARGS=\"-DCMAKE_BUILD_TYPE=RELEASE -DBUILD_PYTORCH_WRAPPER=0 -DWARNIN_AS_ERROR=1 -DCMAKE_CUDA_ARCHITECTURES=\'${env.CMAKE_CUDA_ARCHITECTURES_X86}\'\" " + + "--build-arg BASE_IMAGE=" + "${env.DOCKER_IMAGE_DEPS_X86_CU13}") + }, + 'X86: build debug + sanitizer': { + buildAndPushX86Image("docker/Dockerfile.build", + "${env.DOCKER_IMAGE_DEBUG_X86}", + "--build-arg CMAKE_ARGS=\"-DCMAKE_BUILD_TYPE=DEBUG -DUSE_SANITIZER=yes -DBUILD_PYTORCH_WRAPPER=0 -DWARNING_AS_ERROR=1 -DCMAKE_CUDA_ARCHITECTURES=\'${env.CMAKE_CUDA_ARCHITECTURES_X86}\'\" " + + "--build-arg BASE_IMAGE=" + "${env.DOCKER_IMAGE_DEPS_X86}") + }, + 'AARCH-jp6: build release': { + buildAndPushARMImage(arm_builder_label, + "docker/Dockerfile.build", + "${env.DOCKER_IMAGE_RELEASE_AARCH_JP6}", + "--build-arg CMAKE_ARGS=\"-DCMAKE_BUILD_TYPE=RELEASE -DWARNING_AS_ERROR=1 -DCMAKE_CUDA_ARCHITECTURES=\"${env.CMAKE_CUDA_ARCHITECTURES_AARCH64}\"\" " + + "--build-arg BASE_IMAGE=" + "${env.DOCKER_IMAGE_DEPS_AARCH_JP6}") + }, + 'AARCH-jp5: build release': { + buildAndPushARMImage(arm_builder_label, + "docker/Dockerfile.build", + "${env.DOCKER_IMAGE_RELEASE_AARCH_JP5}", + "--build-arg CMAKE_ARGS=\"-DCMAKE_BUILD_TYPE=RELEASE -DWARNING_AS_ERROR=1 -DBUILD_PYTORCH_WRAPPER=0 -DCMAKE_CUDA_ARCHITECTURES=\"${env.CMAKE_CUDA_ARCHITECTURES_AARCH64}\"\" " + + "--build-arg BASE_IMAGE=" + "${env.DOCKER_IMAGE_DEPS_AARCH_JP5}") + }, + 'x86: Build docs': { + blossom.run_container("${env.DOCKER_IMAGE_DOCS_X86}", image_pull_secret, driver_version) { + checkout scm + sh '''cd docs && make html SPHINXOPTS="-W --keep-going" && make linkcheck''' + } + }, + 'x86: Build Realsense example': { + script { + // We use the ubuntu version of the earthly dind (docker-in-docker) image since we need Python + blossom.run_container_with_docker('earthly/dind:ubuntu-24.04-docker-27.3.1-1', 'urm') { + checkout scm + withCredentials([ + usernamePassword(credentialsId: 'vault-ngc', usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD'), + usernamePassword(credentialsId: 'vault-artifactory', usernameVariable: 'URM_USERNAME', passwordVariable: 'URM_PASSWORD') + ]) { + // TODO(dtingdahl) use build_images.py for other builds as well + sh """ + docker login -u '$USERNAME' -p $PASSWORD https://nvcr.io && \ + docker login -u '${URM_USERNAME}' -p $URM_PASSWORD https://urm.nvidia.com && \ + python3 docker/build_images.py --build-realsense-example --cuda-arch=\"${env.CMAKE_CUDA_ARCHITECTURES_REALSENSE_X86}\" --max-num-build-jobs=8 && \ + docker tag nvblox_realsense_example_cu12_u22 ${env.DOCKER_IMAGE_REALSENSE_X86} && \ + docker push ${env.DOCKER_IMAGE_REALSENSE_X86} + """ + } + } + } + } + ) + } + } + } + // Various testing stages that execute in parallel + stage('Tests') { + steps { + script { + parallel( + 'Unit tests: X86 debug + sanitizer': { + blossom.run_container("${env.DOCKER_IMAGE_DEBUG_X86}", image_pull_secret, driver_version, pod_cpu, pod_memory, pod_storage, gpu_list) { + sh '''cd /nvblox/build/nvblox/tests && nvidia-smi && ctest -j8 --verbose -T test --no-compress-output''' + } + }, + 'Unit tests: AARCH-jp6': { + blossom.run_on_static_node("${env.DOCKER_REGISTERY}", "${env.DOCKER_IMAGE_RELEASE_AARCH_JP6}", image_pull_secret, + 'jp-6.0', '-u root') { + + // Test nvblox_torch + sh '''. /opt/venv/bin/activate && pip install /nvblox/nvblox_torch''' + sh '''. /opt/venv/bin/activate && pip install pytest && cd /nvblox/nvblox_torch/internal_tests && pytest --capture=no''' + sh '''. /opt/venv/bin/activate && pip install pytest && cd /nvblox/nvblox_torch/nvblox_torch/tests && pytest --capture=no''' + + // Run unit tests + sh '''cd /nvblox/build/nvblox/tests && nvidia-smi && ctest -j8 --verbose -T test --no-compress-output''' + } + }, + 'Realsense example': { + blossom.run_container("${env.DOCKER_IMAGE_REALSENSE_X86}", image_pull_secret, driver_version, pod_cpu, pod_memory, pod_storage, gpu_list) { + // Note(dtingdahl) The Realsense test is disabled per default since + // it requires a dedicated docker image. We pass --runxfail to + // pytest in order to enable it. + sh '''. /opt/venv/bin/activate && pip install pytest && cd /nvblox/nvblox_torch/internal_tests && pytest --runxfail test_realsense_example.py --capture=no''' + } + }, + // TODO(dtingdahl) enable these tests when the JP5 node is in an usable state + 'Unit tests: AARCH-jp5': { + blossom.run_on_static_node("${env.DOCKER_REGISTERY}", "${env.DOCKER_IMAGE_RELEASE_AARCH_JP5}", image_pull_secret, + 'jp-5.1.2', '-u root') { + sh '''cd /nvblox/build/nvblox/tests && ctest -j8 --verbose -T test --no-compress-output''' + } + }, + 'Tests in the Release image': { + blossom.run_container("${env.DOCKER_IMAGE_RELEASE_X86}", image_pull_secret, driver_version, pod_cpu, pod_memory, pod_storage, gpu_list) { + // Run Pytest. We need to install pytest after venv is activated for it to be found. Reasons unclear. Some more info: + // Note also that we need to specify the full path to the tests. Otherwise there might be conflicts with installed + // nvblox_torch that also contain the same tests. + // https://stackoverflow.com/questions/35045038/how-do-i-use-pytest-with-virtualenv + echo "Running Pytest" + + sh '''. /opt/venv/bin/activate && pip install /nvblox/python/evaluation /nvblox/python/scripts /nvblox/python/common /nvblox/nvblox_torch''' + // TODO(dtingdahl) Investigate how we can run all tests from the root dir with a single command. We're getting conflicts with the installed tests + // NOTE(dtingdahl) nvblox/python tests require cwd=/nvblox + sh '''. /opt/venv/bin/activate && pip install pytest && cd /nvblox && pytest /nvblox/python --capture=no''' + sh '''. /opt/venv/bin/activate && pip install pytest && cd /nvblox/nvblox_torch/internal_tests && pytest --capture=no''' + sh '''. /opt/venv/bin/activate && pip install pytest && cd /nvblox/nvblox_torch/nvblox_torch/tests && pytest --capture=no''' + + // Run Cuda Compute-sanitizer + echo "Running Cuda Compute-sanitizer" + sh '''bash /nvblox/ci/compute_sanitizer.sh''' + // Run Unit Tests + echo "Running Unit Tests" + sh '''cd /nvblox/build/nvblox/tests && nvidia-smi && ctest -j8 --verbose -T test --no-compress-output''' + sh '''cd /nvblox/build/nvblox_torch/cpp/tests && ctest -j8 --verbose -T test --no-compress-output''' + // Run Stability tests + echo "Running Stability tests" + sh '''bash /nvblox/ci/fuser_redwood_apartment.sh''' + } + }, + 'Tests in the Release + CUDA13 image': { + blossom.run_container("${env.DOCKER_IMAGE_RELEASE_X86_CU13}", image_pull_secret, driver_version, pod_cpu, pod_memory, pod_storage, gpu_list) { + // Run Unit Tests + echo "Running Unit Tests" + sh '''cd /nvblox/build/nvblox/tests && nvidia-smi && ctest -j8 --verbose -T test --no-compress-output''' + } + }, + ) + } + } + } + } + + post { + always { + cleanWs() + } + failure { + script { + if (!(BRANCH_NAME ==~ /^(main|release-.*)$/)) { + notify.slack(GIT_AUTHOR, """ + ❌❌ Build failed for job #$BUILD_NUMBER of $JOB_NAME. ❌❌ + See build at $BUILD_URL. + For reference, the commit message for this job was "$GIT_COMMIT_MSG" + """.stripIndent().trim(), + 'failure', 'danger', false) + } + updateGitlabCommitStatus name: 'premerge', state: 'failed' + } + } + unstable { + script { + if (!(BRANCH_NAME ==~ /^(main|release-.*)$/)) { + notify.slack(GIT_AUTHOR, """ + ❌❌ Build failed for job #$BUILD_NUMBER of $JOB_NAME. ❌❌ + See build at $BUILD_URL. + For reference, the commit message for this job was "$GIT_COMMIT_MSG" + """.stripIndent().trim(), + 'failure', 'danger', false) + } + updateGitlabCommitStatus name: 'premerge', state: 'failed' + } + } + success { + script { + if (!(BRANCH_NAME ==~ /^(main|release-.*)$/)) { + notify.slack(GIT_AUTHOR, + """ + 🐸🐸 Build successful for job #$BUILD_NUMBER of $JOB_NAME. 🐸🐸 + See build at $BUILD_URL. + For reference, the commit message for this job was "$GIT_COMMIT_MSG" + """.stripIndent().trim(), + 'success', 'good', false) + } + updateGitlabCommitStatus name: 'premerge', state: 'success' + } + } + aborted { + script { + updateGitlabCommitStatus name: 'premerge', state: 'canceled' + } + } + } +} diff --git a/ci/blossom-nightly.jenkinsfile b/ci/blossom-nightly.jenkinsfile new file mode 100644 index 00000000..3d711b1c --- /dev/null +++ b/ci/blossom-nightly.jenkinsfile @@ -0,0 +1,231 @@ +// NVBLOX deployment Pipeline +// +// Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +// +// NVIDIA CORPORATION and its licensors retain all intellectual property +// and proprietary rights in and to this software, related documentation +// and any modifications thereto. Any use, reproduction, disclosure or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA CORPORATION is strictly prohibited. + +@Library('ci-lib@dab5e635315c343114eca42af8734edd1043340b') +import com.nvidia.isaac.ci.Notify +import com.nvidia.isaac.ci.utils.WorkflowScriptUtil + +// Environment variables +env.REPO_NAME = 'nvblox' +env.DOCKER_URL = 'urm.nvidia.com' +env.DOCKER_REGISTERY = 'https://urm.nvidia.com' +env.DOCKER_REPO = 'sw-isaac-public-docker' +env.DOCKER_BASE = "${env.DOCKER_URL}/${env.DOCKER_REPO}" + +// Replace url-encoded '/' with '_' to make docker happy +env.BUILD_TAG_SANITIZED = "${env.BUILD_TAG}".replaceAll("%2F", "_") + +// Cuda architectures supported for released artifacts +env.CMAKE_CUDA_ARCHITECTURES_X86="90;89;86;80;75;70;61" + +// Initialize blossom and common libraries +blossom.init() +common.init() + +// Users that will be notified via Slack when the nightly pipeline fails or succeeds +// TODO(dtingdahl): Figure out if we can send notifications to a slack channel instead of a user +USERS_TO_NOTIFY = ["dtingdahl@nvidia.com", "amillane@nvidia.com", "remos@nvidia.com", "vramasamy@nvidia.com", "cvolk@nvidia.com"] + +// Slack notifications +Notify notify = new Notify(this) +// Build, tag and push a docker image +def buildAndPushX86Image(dockerfile_path, docker_tag, docker_build_args="") { + script { + blossom.run_container_with_docker('docker:20.10.15', 'urm') { + checkout scm + withCredentials([ + usernamePassword(credentialsId: 'vault-ngc', usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD'), + usernamePassword(credentialsId: 'vault-artifactory', usernameVariable: 'URM_USERNAME', passwordVariable: 'URM_PASSWORD') + ]) { + sh """ + # BUILD AND PUSH IMAGE + docker login -u '$USERNAME' -p $PASSWORD https://nvcr.io && \ + docker login -u '${URM_USERNAME}' -p $URM_PASSWORD https://urm.nvidia.com && \ + docker build . \ + ${docker_build_args} \ + --tag ${docker_tag} \ + --file ${dockerfile_path} \ + --network host && \ + + docker push ${docker_tag} + """ + } + } + } +} + +// Build, test and deploy for the given base image and platform postfix +def runDeploymentPipeline(String base_image, String platform_postfix) { + script { + // Image names + // N.B: don't forget to add "def" when declaring local variables. Otherwise they will become static + def deps_image = "${env.DOCKER_BASE}/deps-x86_64:${env.BUILD_TAG_SANITIZED}${platform_postfix}" + def build_image = "${env.DOCKER_BASE}/release-x86_64:${env.BUILD_TAG_SANITIZED}${platform_postfix}" + def test_image = "${env.DOCKER_BASE}/test-x86_64:${env.BUILD_TAG_SANITIZED}${platform_postfix}" + def image_pull_secret = 'vault-artifactory' + + + echo "Deps image: ${deps_image}" + echo "Build image: ${build_image}" + echo "Test image: ${test_image}" + + // GPUs and drivers used during testing + def String driver_version = '535.104.05' + def gpu_list = [ "A10", "A40","A100_PCIE_40GB", + "Tesla_T4", "Tesla_V100_PCIE_32GB", + "QUADRO_GV100", "QUADRO_RTX_8000", + ] + + // Build deps image + buildAndPushX86Image("docker/Dockerfile.deps", + deps_image, + "--build-arg BASE_IMAGE=" + base_image) + + // Build binary image + buildAndPushX86Image("docker/Dockerfile.build", + build_image, + "--build-arg CMAKE_ARGS=\"-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CUDA_ARCHITECTURES=\'${env.CMAKE_CUDA_ARCHITECTURES_X86}\'\" " + + "--build-arg BASE_IMAGE=" + deps_image + " " + + "--build-arg MAX_NUM_JOBS=2" // We tend to run out of memory with too many simultaneous jobs. + ) + + // Publish py wheel to staging area + blossom.run_container("${build_image}", "${image_pull_secret}") { + withCredentials([ + usernamePassword(credentialsId: 'vault-artifactory', usernameVariable: 'URM_USERNAME', passwordVariable: 'URM_PASSWORD') + ]) { + sh """ + # PUBLISH PY WHEEL TO STAGING AREA + . /opt/venv/bin/activate && \ + pip install --upgrade setuptools && \ + python /nvblox/ci/ship_it.py --build-number ${env.BUILD_NUMBER} --publish-to-staging --username '${URM_USERNAME}' --password '${URM_PASSWORD}' + """ + + } + } + + + // Build image we're running the test in. It's just the base image with some basic deps installed + buildAndPushX86Image("docker/Dockerfile.test_nvblox_torch", + test_image, + "--build-arg BASE_IMAGE=" + base_image) + + + // Test the py wheel in the test image + // Note that we also checkout the source code. This is needed to run the deploy script which gives us an URL to the wheel. + // Nothing else in the test image depends on the source code. + blossom.run_container(test_image, image_pull_secret, driver_version, '10000m', '20Gi', '100Gi', gpu_list) { + checkout scm + sh """ + # TEST PY WHEEL IN TEST IMAGE + # Get the URL of the wheel in the staging area + . /opt/venv/bin/activate && \ + STAGING_URL=\$(python3 ci/ship_it.py --build-number ${env.BUILD_NUMBER} --print-staging-url) + + # Install the wheel + . /opt/venv/bin/activate && python3 -m pip install \$STAGING_URL + + # Run the tests + NVBLOX_TORCH_INSTALL_DIR=\$(. /opt/venv/bin/activate && python3 -c "import site; print(site.getsitepackages()[0])")/nvblox_torch + . /opt/venv/bin/activate && pip install pytest && pytest -s \$NVBLOX_TORCH_INSTALL_DIR + """ + + } + + // Publish to release area + blossom.run_container(build_image, image_pull_secret) { + withCredentials([ + usernamePassword(credentialsId: 'vault-artifactory', usernameVariable: 'URM_USERNAME', passwordVariable: 'URM_PASSWORD') + ]) { + sh """ + # PUBLISH PY WHEEL TO RELEASE AREA + . /opt/venv/bin/activate && \ + python /nvblox/ci/ship_it.py --build-number ${env.BUILD_NUMBER} --publish-to-release --username '${URM_USERNAME}' --password '${URM_PASSWORD}' + """ + } + } + } +} // end runDeploymentPipeline + + +pipeline { + agent any + options { + gitLabConnection('gitlab-master') + } // end options + triggers { + gitlab(triggerOnMergeRequest: true, branchFilterType: 'All') + } // end triggers + + + stages { + stage('Clean Workspace') { + steps { + sh 'rm -f .git/refs/remotes/origin/feature/weighting.lock' + cleanWs() + } + } + + stage ('Cuda12 / Ubuntu24') + { + steps { + runDeploymentPipeline("nvcr.io/nvidia/cuda:12.8.0-devel-ubuntu24.04", "cu12_u24") + } + } + + stage ('Cuda12 / Ubuntu22') + { + steps { + runDeploymentPipeline("nvcr.io/nvidia/cuda:12.6.1-devel-ubuntu22.04", "cu12_u22") + } + } + + stage ('Cuda11 / Ubuntu22') + { + steps { + runDeploymentPipeline("nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04", "cu11_u22") + } + } + + + } //end stages + + post { + always { + cleanWs() + } + failure { + script { + for (user in USERS_TO_NOTIFY) { + notify.slack(user, """ + ❌❌ NVBLOX deployment pipeline failed ❌❌ + ❌❌ Build: #$BUILD_NUMBER Job: $JOB_NAME. ❌❌ + See build at $BUILD_URL. + """.stripIndent().trim(), + 'failure', 'danger', false) + } //end fo + } // end script + } //end failure + + success { + script { + for (user in USERS_TO_NOTIFY) { + notify.slack(user, """ + ✅✅ NVBLOX deployment pipeline succeeded ✅✅ + ✅✅ Build: #$BUILD_NUMBER Job: $JOB_NAME. ✅✅ + See build at $BUILD_URL. + Artifact URL: https://urm.nvidia.com/ui/repos/tree/General/hw-nvblox-alpine-local/pypi/release/nvblox_torch + """.stripIndent().trim(), + 'success', 'good', false) + } //end fo + } // end script + } //end success + } // end post +} // end pipeline diff --git a/ci/ci_utils.py b/ci/ci_utils.py new file mode 100644 index 00000000..b7e60b0b --- /dev/null +++ b/ci/ci_utils.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. +# +"""Base classes and abstractions for Docker image management. + +This module provides abstract base classes for building and testing Docker images, +along with common enumerations used across different CI systems. +""" + +import argparse +import subprocess +from abc import ABC, abstractmethod +from enum import Enum +from typing import List, Optional, Tuple +import re +import pprint + + +class Platform(Enum): + X86_64 = 'x86_64' + JETPACK_5 = 'jetpack5' + JETPACK_6 = 'jetpack6' + + +class CudaVersion(Enum): + CUDA_11 = '11' + CUDA_12 = '12' + CUDA_13 = '13' + + +class UbuntuVersion(Enum): + UBUNTU_22 = '22' + UBUNTU_24 = '24' + + +class CudaSmArchitectures(Enum): + SM_X86_CI_SUPPORTED = '120;100;90;89;86;80;75' + SM_JETPACK_ORIN = '87' + SM_NATIVE = 'native' + + +MAX_CONSECUTIVE_IDENTICAL_LOG_LINES = 100 + + +def _try_parse_gcc_output_line(line: str) -> Tuple[Optional[str], Optional[int]]: + """Try to extract file path and line number from gcc/clang/cmake-style output""" + # Example: /path/to/file.cpp:LINE: + match = re.search(r'([^\s:]+):(\d+)(?::\d+)?[ :]', line) + if match: + return match.group(1), int(match.group(2)) + return None, None + + +def _try_parse_nvcc_output_line(line: str) -> Tuple[Optional[str], Optional[int]]: + """Try to extract file path and line number from nvcc output""" + # Example: /path/to/file.cu(LINE): + match = re.search(r'([^\s:()]+)\((\d+)\)[ :]', line) + if match: + return match.group(1), int(match.group(2)) + return None, None + + +def _maybe_print_github_annotation(line: str) -> None: + """Print a line as a GitHub Actions annotation if a warning/error keyword is found.""" + + error_keywords = [ + # gcc/nvcc + 'error:', + 'fatal error:', + # cmake + 'cmake error', + 'cmake fatal error', + ] + warning_keywords = [ + # gcc + 'warning:', + # nvcc + 'warning #', + # Cmake + 'cmake warning', + 'cmake deprecation warning', + # various + 'permission denied', + ] + + title = None + if any(keyword in line.lower() for keyword in error_keywords): + title = '::error' + if any(keyword in line.lower() for keyword in warning_keywords): + title = '::warning' + + # If warning or error found, print as GitHub Actions annotation. + if title is not None: + # Try to extract file path and line number from output. + file_path, line_number = _try_parse_gcc_output_line(line) + if file_path is None or line_number is None: + file_path, line_number = _try_parse_nvcc_output_line(line) + + if file_path is None or line_number is None: + print(f'{title} ::{line.strip()}') + else: + # Make file path relative to nvblox root. + file_path = file_path.replace('/nvblox/', '') + print(f'{title} file={file_path},line={line_number} ::{line.strip()}') + + +def _run_and_parse_log(cmd: List[str]) -> None: + """Run a command and parse its log. + - The log is printed to console unmodified. + - Errors and warnings are captured and annotated for GitHub Actions. + - If there are too many identical lines in a row, the output is truncated. + """ + # Run the subprocess and redirect stderr to stdout + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True, + ) as process: + + # Count successive identical lines. + num_identical = 0 + last_line = None + + # Parse output line by line. + if process.stdout is not None: + for line in process.stdout: + num_identical = num_identical + 1 if line == last_line else 0 + last_line = line + + # Only print if there are not too many identical lines in a row. + if num_identical < MAX_CONSECUTIVE_IDENTICAL_LOG_LINES: + # Print live output + print(line, end='') + + # Print errors and warnings as GitHub Actions annotations. + _maybe_print_github_annotation(line) + elif num_identical == MAX_CONSECUTIVE_IDENTICAL_LOG_LINES: + print( + '::warning :: Truncating output due to too many identical lines in a row.') + + assert process.wait() == 0, 'Command failed' + + +class DockerImage(ABC): + """Abstract base class for Docker images. + + Wraps a dockerfile + build args. + Support a base image if "FROM ${BASE_IMAGE}" is used in the Dockerfile. + """ + + def __init__(self, args: argparse.Namespace): + self.args = args + + @abstractmethod + def image_name_root(self) -> str: + """Root name for the image (without suffix)""" + pass + + @abstractmethod + def dockerfile_path(self) -> str: + """Path to the Dockerfile""" + pass + + @abstractmethod + def parent_image(self) -> Optional['DockerImage']: + """Image can have a single parent image. + + Returns a DockerImage or OSImage. + """ + pass + + @abstractmethod + def build_args(self) -> List[str]: + """Build arguments for the docker build command""" + return [] + + def image_name_suffix(self) -> str: + """Platform/arch dependent suffix for the image name""" + suffix = (f'{self.args.platform.value}_cu{self.args.cuda_version.value}' + f'_u{self.args.ubuntu_version.value}') + if self.args.gcc_sanitizer == 1: + suffix += '_gsan' + return suffix + + def image_name(self) -> str: + """Full image name with suffix""" + return self.image_name_root() + '_' + self.image_name_suffix() + + def build(self) -> None: + """Build a docker image from a Dockerfile. First builds the parent image if it exists.""" + + parent = self.parent_image() + if parent is not None: + parent.build() + + image_name = self.image_name() + + # Print build information + print('=' * 80) + print(f'BUILDING: {image_name}') + print('=' * 80) + print(f'Dockerfile: {self.dockerfile_path()}') + if parent is not None: + print(f'Parent image: {parent.image_name()}') + print(f'Platform: {self.args.platform.value}') + print(f'CUDA version: {self.args.cuda_version.value}') + print(f'CUDA architecture: {self.args.cuda_arch.value}') + print(f'Ubuntu version: {self.args.ubuntu_version.value}') + print(f'Max number of jobs: {self.args.max_num_jobs}') + build_args_str = ', '.join(self.build_args() or []) + print(f'Build arguments: {build_args_str}') + user_build_args_str = ', '.join(self.args.user_build_args or []) + print(f'User build arguments: {user_build_args_str}') + print('=' * 80) + print('', flush=True) + + cmd = [ + 'docker', + 'build', + '-f', + self.dockerfile_path(), + '-t', + image_name, + '--network=host', + '--progress=plain', + ] + + if parent is not None: + parent_name = parent.image_name() + cmd += ['--build-arg', f'BASE_IMAGE={parent_name}'] + + if self.build_args() is not None: + for arg in self.build_args(): + cmd += ['--build-arg', arg] + + # Add extra docker args from args if provided + if self.args.user_build_args is not None: + cmd += self.args.user_build_args + + cmd += ['.'] + + print(' '.join(cmd)) + + _run_and_parse_log(cmd) + + self._validate() + + def _validate(self) -> None: + """Validate that the correct cuda/ubuntu version was built""" + + # Check ubuntu version + lsb_release_result = subprocess.run( + ['docker', 'run', '--rm', + self.image_name(), 'lsb_release', '-a'], + check=True, + capture_output=True, + text=True, + ) + expected_ubuntu = f'Ubuntu {self.args.ubuntu_version.value}' + assert expected_ubuntu in lsb_release_result.stdout, ( + f'Failed to find the correct ubuntu version. ' + f'Stdout: {lsb_release_result.stdout}') + + # Check cuda version + cuda_version_result = subprocess.run( + ['docker', 'run', '--rm', + self.image_name(), 'nvcc', '--version'], + check=True, + capture_output=True, + text=True, + ) + expected_cuda = f'cuda_{self.args.cuda_version.value}' + assert expected_cuda in cuda_version_result.stdout, ( + f'Failed to find the correct cuda version. ' + f'Stdout: {cuda_version_result.stdout}') + + print(f'Successfully validated image: {self.image_name()}') + + +class TestBase(ABC): + """Base class for running unit tests in a container""" + + def __init__(self, args: argparse.Namespace): + self.args = args + + @abstractmethod + def image(self) -> DockerImage: + """Get the image to run the test on""" + pass + + @abstractmethod + def get_command(self) -> str: + """Get the command to run in the test""" + pass + + @abstractmethod + def get_cwd(self) -> str: + """Get the current working directory""" + pass + + def run(self) -> None: + """Build image and run command inside it""" + self.image().build() + docker_cmd = ['docker', 'run', '--privileged', '--rm', self.image().image_name()] + cwd = self.get_cwd() + cmd = self.get_command() + full_cmd = docker_cmd + ['bash', '-c'] + [f'cd {cwd} && {cmd}'] + + print(' '.join(full_cmd)) + _run_and_parse_log(full_cmd) + + +class OsImage(DockerImage): + """External cuda or jetpack OS base image. Used as a parent image for other images.""" + + AVAILABLE_OS_IMAGES = { + Platform.X86_64: { + CudaVersion.CUDA_11: { + UbuntuVersion.UBUNTU_22: 'nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04', + }, + CudaVersion.CUDA_12: { + UbuntuVersion.UBUNTU_22: 'nvcr.io/nvidia/cuda:12.8.0-devel-ubuntu22.04', + UbuntuVersion.UBUNTU_24: 'nvcr.io/nvidia/cuda:12.8.0-devel-ubuntu24.04', + }, + CudaVersion.CUDA_13: { + UbuntuVersion.UBUNTU_22: 'nvcr.io/nvidia/cuda:13.0.0-devel-ubuntu22.04', + UbuntuVersion.UBUNTU_24: 'nvcr.io/nvidia/cuda:13.0.0-devel-ubuntu24.04', + }, + }, + Platform.JETPACK_5: { + CudaVersion.CUDA_11: { + UbuntuVersion.UBUNTU_22: 'nvcr.io/nvidia/l4t-jetpack:r35.4.1' + } + }, + Platform.JETPACK_6: { + CudaVersion.CUDA_12: { + UbuntuVersion.UBUNTU_22: 'nvcr.io/nvidia/l4t-jetpack:r36.4.0' + } + }, + } + + def get_os_image_name(self) -> str: + platform_images = self.AVAILABLE_OS_IMAGES.get(self.args.platform, {}) + cuda_images = platform_images.get(self.args.cuda_version, {}) + os_image = cuda_images.get(self.args.ubuntu_version) + if os_image is None: + raise ValueError(f'No OS image available for platform {self.args.platform}, ' + f'cuda version {self.args.cuda_version}, ' + f'and ubuntu version {self.args.ubuntu_version}.\n' + f'Available images:\n' + f'{pprint.pformat(self.AVAILABLE_OS_IMAGES, indent=2)}') + return os_image + + def image_name(self) -> str: + return self.get_os_image_name() + + def image_name_root(self) -> str: + raise NotImplementedError('OsImage does not have a base name') + + def dockerfile_path(self) -> str: + raise NotImplementedError('OsImage does not have a dockerfile') + + def parent_image(self) -> None: + return None + + def build_args(self) -> List[str]: + return [] + + def build(self) -> None: + """OS images are external and do not need to be built""" + pass diff --git a/ci/compute_sanitizer.sh b/ci/compute_sanitizer.sh new file mode 100755 index 00000000..a3b8857b --- /dev/null +++ b/ci/compute_sanitizer.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Run selcted tests under cuda's compute-sanitizer +# TODO(dtingdahl) integrate with ctest + +set -exo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +TEST_DIR=$SCRIPT_DIR/../build/nvblox/tests + +# Logging for debug reasons +compute-sanitizer --version +nvidia-smi +ls /usr/local/cuda/lib64 +dpkg -l | grep cuda || true + +# List of tests to run under compute-sanitizer +# Add tests that are cuda-intensive and lightweight +TESTS_TO_RUN=("nvblox_benchmark --benchmark_filter=benchmarkAll" + "test_color_integrator --gtest_filter=ColorIntegrationTest.IntegrateColorToGroundTruthDistanceField" + "test_esdf_integrator --gtest_filter=ParameterizedEsdfTests/EsdfIntegratorTest.OccupancySingleEsdfTestGPU/0" + "test_freespace_integrator" + "test_gpu_hash_interface" + "test_gpu_layer_view" + "test_layer_serializer_gpu" + "test_lidar_integration" + "test_mesh_serializer" + "test_occupancy_integrator --gtest_filter=*ReconstructPlane*" + "test_occupancy_decay" + "test_tsdf_integrator --gtest_filter=*ReconstructPlane*" + "test_tsdf_decay" + ) + +# Create a txt file with all commands to run +JOB_FILE=$(mktemp) +for test_cmd in "${TESTS_TO_RUN[@]}" +do + echo "compute-sanitizer --error-exitcode=1 --tool memcheck $test_cmd" >> $JOB_FILE + echo "compute-sanitizer --error-exitcode=1 --tool initcheck $test_cmd" >> $JOB_FILE + echo "compute-sanitizer --error-exitcode=1 --tool racecheck $test_cmd" >> $JOB_FILE + echo "compute-sanitizer --error-exitcode=1 --tool synccheck $test_cmd" >> $JOB_FILE +done + +# Launch all commands in parallel +( + cd $TEST_DIR + cat $JOB_FILE | parallel --halt-on-error now,fail=1 -j4 + rm $JOB_FILE +) diff --git a/ci/fuser_redwood_apartment.sh b/ci/fuser_redwood_apartment.sh new file mode 100755 index 00000000..a0cf96ad --- /dev/null +++ b/ci/fuser_redwood_apartment.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Stability test that runs through the redwood apartment dataset (~30k frames). +set -exo pipefail + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +EXEC_DIR=$SCRIPT_DIR/../build/nvblox/executables + +# Download the test dataset +wget https://urm.nvidia.com/artifactory/sw-isaac-sdk-generic-local/dependencies/internal/data/redwood_apartment.tar +tar -xvf redwood_apartment.tar > /dev/null + +# Run the thing +$EXEC_DIR/fuse_redwood redwood/apartment diff --git a/ci/lint_nvblox_h.sh b/ci/lint_nvblox_h.sh new file mode 100755 index 00000000..025524d8 --- /dev/null +++ b/ci/lint_nvblox_h.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +echo -e "Linting nvblox.h" +echo -e "" + +set -exuo pipefail + +SUCCESS=1 + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' # No Color + +# Get list of files expected in the nvblox.h header. +#These are files which are in include, but not in an internal subfolder. +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +HEADER_DIR=$SCRIPT_DIR/../nvblox/include +HEADER_FILE_LIST=$(find $HEADER_DIR -type f \( -iname "*.h" ! -iname "*nvblox.h" \) ! -path "*/internal/*" ! -path "*/experimental/*" -printf '%P\n') + +# Check that there are no impl files in public folder locations +for HEADER_FILE in $HEADER_FILE_LIST +do + if [[ $HEADER_FILE == *"/impl/"* ]] + then + echo -e "${RED}Implementation file found in public folder: $HEADER_FILE" + SUCCESS=0 + fi +done + + +# Search nvblox.h for each of these files. +NVBLOX_H_PATH=$HEADER_DIR/nvblox/nvblox.h +INCLUDES_STRING="" +AT_LEAST_ONE_HEADER_NOT_FOUND=0 +for HEADER_FILE in $HEADER_FILE_LIST +do + if ! grep -Fq $HEADER_FILE $NVBLOX_H_PATH + then + echo -e "${RED}Public header not in nvblox.h: $HEADER_FILE${NC}" + AT_LEAST_ONE_HEADER_NOT_FOUND=1 + fi + INCLUDES_STRING+="#include \"$HEADER_FILE\"\n" +done + + +# If not all headers in, fail and suggest headers to add. +if [ $AT_LEAST_ONE_HEADER_NOT_FOUND == 1 ] +then + echo -e "" + echo -e "${RED}Lint failing: Not all public headers are found in nvblox.h${NC}" + echo -e "" + echo -e "Replace includes in nvblox.h with the following:" + echo -e "" + echo -e $INCLUDES_STRING + SUCCESS=0 +fi + +if [ $SUCCESS == 0 ] +then + echo -e "${RED}Lint of public includes in nvblox.h failed." + exit 1 +else + echo -e "${GREEN}Lint of public includes in nvblox.h passed." +fi diff --git a/ci/premerge.py b/ci/premerge.py new file mode 100755 index 00000000..cb8513ee --- /dev/null +++ b/ci/premerge.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. +# +"""Nvblox CI script for building Docker images and running tests. + +This script provides a command-line interface for building nvblox Docker images +and running various tests within those images. +""" + +import argparse +import os +import sys +from typing import Dict, List, Type + +from ci_utils import ( + CudaSmArchitectures, + CudaVersion, + DockerImage, + Platform, + TestBase, + UbuntuVersion, + OsImage, +) +from system_info import get_native_cuda_sm_architecture, print_system_info + + +class DependenciesImage(DockerImage): + """Nvblox dependencies (deps) image""" + + def build_args(self) -> List[str]: + return [] + + def image_name_root(self) -> str: + return 'nvblox_deps' + + def dockerfile_path(self) -> str: + """Deps are different for x86 and Jetson platforms.""" + if self.args.platform == Platform.X86_64: + return os.path.join('docker', 'Dockerfile.deps') + else: + return os.path.join('docker', 'Dockerfile.jetson_deps') + + def parent_image(self) -> OsImage: + return OsImage(self.args) + + +class BuildImage(DockerImage): + """Nvblox build image containing compiled binaries and installed python modules.""" + + def image_name_root(self) -> str: + return 'nvblox_build' + + def dockerfile_path(self) -> str: + return os.path.join('docker', 'Dockerfile.build') + + def parent_image(self) -> DockerImage: + return DependenciesImage(self.args) + + def get_cuda_sm_architecture(self) -> str: + """Get the CUDA SM architectures to build for, either from args or detect native""" + if self.args.cuda_arch == CudaSmArchitectures.SM_NATIVE: + return get_native_cuda_sm_architecture() + else: + return self.args.cuda_arch.value + + def build_args(self) -> List[str]: + cuda_arch = self.get_cuda_sm_architecture() + + # Setup args to cmake + cmake_args = f'-DCMAKE_CUDA_ARCHITECTURES={cuda_arch} -DWARNING_AS_ERROR=1' + if self.args.gcc_sanitizer == 1: + cmake_args += ' -DCMAKE_BUILD_TYPE=Debug -DUSE_SANITIZER=yes' + + # Setup args to docker build + args = [f'CMAKE_ARGS={cmake_args}'] + if self.args.max_num_jobs is not None: + args += [f'MAX_NUM_JOBS={self.args.max_num_jobs}'] + + return args + + +class RealsenseImage(DockerImage): + """Nvblox image for running the Realsense example test. + + Includes specialized dependencies on top of the build image.""" + + def image_name_root(self) -> str: + return 'nvblox_realsense_example' + + def dockerfile_path(self) -> str: + return os.path.join('docker', 'Dockerfile.realsense_example') + + def parent_image(self) -> DockerImage: + return BuildImage(self.args) + + def build_args(self) -> List[str]: + return [] + + +class DocsImage(DockerImage): + """Nvblox documentation builder image.""" + + def image_name_root(self) -> str: + return 'nvblox_docs' + + def dockerfile_path(self) -> str: + return os.path.join('docker', 'Dockerfile.docs') + + def parent_image(self) -> None: + return None + + def build_args(self) -> List[str]: + return [] + + +class CppUnitTests(TestBase): + """Run the C++ unit tests""" + + def get_command(self) -> str: + num_jobs = self.args.max_num_jobs + base_cmd = f'ctest -j{num_jobs} -T test --no-compress-output --output-on-failure' + + # When running tests with gcc sanitizers, we need to disable address space + # randomization due to bug in libgcc that appears on certain platforms. + # https://stackoverflow.com/questions/77894856/possible-bug-in-gcc-sanitizers + if self.args.gcc_sanitizer == 1: + return f'setarch $(uname -m) --addr-no-randomize {base_cmd}' + return base_cmd + + def image(self) -> DockerImage: + return BuildImage(self.args) + + def get_cwd(self) -> str: + return '/nvblox/build/nvblox/tests' + + +class PythonUnitTests(TestBase): + """Run the Python unit tests""" + + def get_command(self) -> str: + cmd = '. /opt/venv/bin/activate && ' + cmd += 'pytest --capture=no /opt/venv/lib/*/site-packages/nvblox_torch' + return cmd + + def image(self) -> DockerImage: + return BuildImage(self.args) + + def get_cwd(self) -> str: + return '/nvblox/' + + +class CudaSanitizer(TestBase): + """Run the CUDA Sanitizer tests""" + + def get_command(self) -> str: + return 'bash ci/compute_sanitizer.sh' + + def image(self) -> DockerImage: + return BuildImage(self.args) + + def get_cwd(self) -> str: + return '/nvblox/' + + +class StabilityTest(TestBase): + """Run the Stability tests""" + + def get_command(self) -> str: + return 'bash ci/fuser_redwood_apartment.sh' + + def image(self) -> DockerImage: + return BuildImage(self.args) + + def get_cwd(self) -> str: + return '/nvblox/' + + +class RealsenseTest(TestBase): + """Run the Realsense tests""" + + def get_command(self) -> str: + + # The realsense example test is disabled per default since + # it requires a dedicated docker image. Here we pass --runxfail to + # pytest in order to enable it. + cmd = '. /opt/venv/bin/activate && ' + cmd += 'pytest --runxfail test_realsense_example.py --capture=no' + return cmd + + def get_cwd(self) -> str: + return '/nvblox/nvblox_torch/internal_tests/' + + def image(self) -> DockerImage: + return RealsenseImage(self.args) + + +# Map cmd line docker image arg to image class. +ARG_TO_IMAGE: Dict[str, Type[DockerImage]] = { + 'deps': DependenciesImage, + 'build': BuildImage, + 'realsense': RealsenseImage, + 'docs': DocsImage, +} + +# Map cmd line test arg to test class. +ARG_TO_TEST: Dict[str, Type[TestBase]] = { + 'cpp': CppUnitTests, + 'python': PythonUnitTests, + 'cuda-sanitizer': CudaSanitizer, + 'stability': StabilityTest, + 'realsense': RealsenseTest, +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description='Build nvblox docker images and/or run tests inside them.') + parser.add_argument( + '--build-image', + type=str, + choices=ARG_TO_IMAGE.keys(), + required=False, + help='Docker image to build. Will build the image and then exit.', + ) + parser.add_argument( + '--build-and-test', + type=str, + choices=ARG_TO_TEST.keys(), + required=False, + help= + 'Test to run. Will also build the necessary image (no-op if the image is already built).', + ) + parser.add_argument( + '--cuda-version', + type=CudaVersion, + default=CudaVersion.CUDA_12, + help='CUDA version', + ) + parser.add_argument( + '--cuda-arch', + type=CudaSmArchitectures, + required=False, + default=CudaSmArchitectures.SM_NATIVE, + help='CUDA SM architecture.', + ) + parser.add_argument( + '--platform', + type=Platform, + default=Platform.X86_64, + help='Platform to build for.', + ) + parser.add_argument( + '--ubuntu-version', + type=UbuntuVersion, + required=False, + default=UbuntuVersion.UBUNTU_24, + help='Ubuntu version to build for.', + ) + parser.add_argument( + '--user-build-args', + type=str, + required=False, + help='Additional user-provided docker build arguments.', + ) + parser.add_argument( + '--max-num-jobs', + type=int, + required=False, + default=8, + help='Maximum number of jobs to run in parallel (build and ctest).', + ) + parser.add_argument( + '--gcc-sanitizer', + type=int, + default=False, + help='Build in debug mode with gcc sanitizers enabled. (1=yes, 0=no)', + ) + args = parser.parse_args() + + if args.build_image is None and args.build_and_test is None: + parser.error('Either image or test must be provided') + + return args + + +def main() -> int: + args = parse_args() + print_system_info() + + if args.build_image is not None: + image = ARG_TO_IMAGE[args.build_image](args) + image.build() + + if args.build_and_test is not None: + test = ARG_TO_TEST[args.build_and_test](args) + test.run() + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/ci/system_info.py b/ci/system_info.py new file mode 100644 index 00000000..ee3fa9b9 --- /dev/null +++ b/ci/system_info.py @@ -0,0 +1,227 @@ +# +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. +# +import os +import shutil +import platform +import subprocess +from typing import List, Optional + + +def get_native_cuda_sm_architecture() -> str: + """Get the cuda architecture from nvidia-smi""" + try: + command_output = subprocess.check_output( + ['nvidia-smi', '--query-gpu=compute_cap', '--format=csv']) + arch = command_output.decode('utf-8').split()[1].replace('.', '') + return arch + except FileNotFoundError: + print('::error :: nvidia-smi not found. Cannot detect native CUDA SM architecture.') + raise + + +def print_system_info() -> None: + """Print system information""" + + def _border(char: str = '=', width: int = 80) -> str: + return char * width + + def _print_section(title: str, lines: List[str]) -> None: + print(_border('=')) + print(f'[ {title} ]') + print(_border('-')) + for line in lines: + print(line) + + def _read_os_release() -> Optional[dict]: + os_release_path = '/etc/os-release' + if not os.path.exists(os_release_path): + return None + info = {} + try: + with open(os_release_path, 'r', encoding='utf-8') as f: + for raw_line in f: + line = raw_line.strip() + if not line or line.startswith('#') or '=' not in line: + continue + key, value = line.split('=', 1) + value = value.strip().strip('"').strip("'") + info[key] = value + return info + except (OSError, IOError): + return None + + def _human_bytes(num_bytes: int) -> str: + units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] + size = float(num_bytes) + unit_idx = 0 + while size >= 1024.0 and unit_idx < len(units) - 1: + size /= 1024.0 + unit_idx += 1 + # Use at most 2 decimals for readability + if size >= 100 or unit_idx == 0: + return f'{int(size)} {units[unit_idx]}' + return f'{size:.2f} {units[unit_idx]}' + + def _read_meminfo() -> Optional[dict]: + meminfo_path = '/proc/meminfo' + if not os.path.exists(meminfo_path): + return None + info = {} + try: + with open(meminfo_path, 'r', encoding='utf-8') as f: + for raw_line in f: + line = raw_line.strip() + if ':' not in line: + continue + key, value = line.split(':', 1) + value = value.strip() + # Values are typically like "123456 kB" + parts = value.split() + if not parts: + continue + try: + amount_kb = int(parts[0]) + info[key] = amount_kb * 1024 # store in bytes + except ValueError: + # Non-integer field; ignore + pass + return info + except (OSError, IOError): + return None + + def _memory_lines() -> List[str]: + meminfo = _read_meminfo() or {} + total = meminfo.get('MemTotal') + available = meminfo.get('MemAvailable') + if available is None: + # Fallback heuristic for older kernels without MemAvailable + free = meminfo.get('MemFree', 0) + buffers = meminfo.get('Buffers', 0) + cached = meminfo.get('Cached', 0) + available = free + buffers + cached + used = None + if total is not None and available is not None: + used = max(total - available, 0) + lines: List[str] = [] + if total is not None: + lines.append(f'Total: {_human_bytes(total)}') + if used is not None: + lines.append(f'Used: {_human_bytes(used)}') + if available is not None: + lines.append(f'Available: {_human_bytes(available)}') + if not lines: + lines.append('Memory information not available.') + return lines + + def _disk_lines() -> List[str]: + try: + usage = shutil.disk_usage('/') + return [ + 'Mount: /', + f'Total: {_human_bytes(usage.total)}', + f'Used: {_human_bytes(usage.used)}', + f'Free: {_human_bytes(usage.free)}', + ] + except OSError as e: + return [f'Disk information not available: {e}'] + + def _nvidia_smi_lines() -> List[str]: + try: + result = subprocess.run(['nvidia-smi'], + check=False, + capture_output=True, + text=True, + timeout=10) + if result.returncode == 0 and result.stdout: + # Limit very long outputs to keep CI logs readable + stdout = result.stdout.strip() + max_chars = 8000 + if len(stdout) > max_chars: + truncated = stdout[:max_chars] + '\n... (truncated) ...' + return truncated.splitlines() + return stdout.splitlines() + # If stderr contains something useful, show it; otherwise, generic message + err = (result.stderr or '').strip() + if err: + return ['nvidia-smi returned non-zero exit code:', err] + return ['nvidia-smi returned non-zero exit code with no output.'] + except FileNotFoundError: + return ['nvidia-smi not found on this system.'] + except subprocess.TimeoutExpired: + return ['nvidia-smi timed out.'] + except (OSError, subprocess.SubprocessError, ValueError) as e: + return [f'Failed to run nvidia-smi: {e}'] + + def _uname_lines() -> List[str]: + try: + result = subprocess.run( + ['uname', '-a'], + check=False, + capture_output=True, + text=True, + timeout=5, + ) + if result.stdout: + return [result.stdout.strip()] + err = (result.stderr or '').strip() + return [err or 'uname -a returned no output.'] + except subprocess.TimeoutExpired: + return ['uname -a timed out.'] + except (OSError, subprocess.SubprocessError, ValueError) as e: + return [f'Failed to run uname -a: {e}'] + + def _l4t_lines() -> List[str]: + nv_path = '/etc/nv_tegra_release' + if os.path.exists(nv_path): + try: + with open(nv_path, 'r', encoding='utf-8') as f: + lines = [line.strip() for line in f if line.strip()] + return lines if lines else [f'{nv_path} is present but empty.'] + except (OSError, IOError) as e: + return [f'Found {nv_path} but failed to read: {e}'] + return [f'{nv_path} not present (likely non-Jetson).'] + + # System section + os_info = _read_os_release() or {} + pretty_name = os_info.get('PRETTY_NAME') + if not pretty_name: + name = os_info.get('NAME') + version = os_info.get('VERSION') + if name and version: + pretty_name = f'{name} {version}' + elif name: + pretty_name = name + else: + pretty_name = 'Unknown' + system_lines = [ + f'Platform: {platform.platform()}', + f'OS: {pretty_name}', + f'Kernel: {platform.release()}', + f'Machine: {platform.machine()}', + f'Num CPUs: {os.cpu_count()}', + ] + _print_section('System Information', system_lines) + + # Kernel / L4T sections + _print_section('Kernel (uname -a)', _uname_lines()) + _print_section('Jetson L4T (nv_tegra_release)', _l4t_lines()) + + # Memory section + _print_section('Memory', _memory_lines()) + + # Disk section + _print_section('Disk (root filesystem)', _disk_lines()) + + # NVIDIA SMI section + _print_section('NVIDIA SMI', _nvidia_smi_lines()) + + # Ensure it is flushed to the console + print('', flush=True) diff --git a/docker/Dockerfile.build b/docker/Dockerfile.build index 7df84df4..3167fa3c 100644 --- a/docker/Dockerfile.build +++ b/docker/Dockerfile.build @@ -22,7 +22,8 @@ RUN cd nvblox && mkdir build && cd build && \ # To avoid out-of-memory, we limit the number of build jobs to max(num_cpu_cores, MAX_NUM_JOBS). NUM_JOBS=$(echo "$(nproc) $MAX_NUM_JOBS" | awk '{print ($1<$2)?$1:$2}') && \ echo "Number of build jobs: $NUM_JOBS" && \ - bash -ic "cmake .. ${CMAKE_ARGS} -DCMAKE_VERBOSE_MAKEFILE=1 && make -j $NUM_JOBS" + bash -ic "cmake .. ${CMAKE_ARGS} && make -j $NUM_JOBS" -# Copy the complete source tree +# Copy the complete source tree and install nvblox_torch. Note that we need the .git dir to read the SHA in CI. COPY . /nvblox +RUN . /opt/venv/bin/activate && pip install /nvblox/nvblox_torch/ diff --git a/docker/Dockerfile.deps b/docker/Dockerfile.deps index 7b5d679f..04056104 100644 --- a/docker/Dockerfile.deps +++ b/docker/Dockerfile.deps @@ -8,7 +8,7 @@ ENV DEBIAN_FRONTEND noninteractive RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update && apt-get install -y tzdata -# Install apt dependencies. +# Install apt dependencies and cleanup after. RUN apt update && apt-get --no-install-recommends install -y \ # Source development \ git jq build-essential \ @@ -22,20 +22,22 @@ RUN apt update && apt-get --no-install-recommends install -y \ # NOTE(alexmillane): Taken from: http://www.open3d.org/docs/release/docker.html \ libegl1 libgl1 libgomp1 \ # Tools used in tests \ - cuda-nsight-systems-12-6 parallel + cuda-nsight-systems-12-6 parallel \ + && rm -rf /var/lib/apt/lists/* && apt-get clean # Install pytorch. # Note that all python deps are installed inside a venv. This is required by ubuntu >= 24. -RUN python3 -m venv /opt/venv -RUN . /opt/venv/bin/activate && \ - python3 -m pip install --ignore-installed --upgrade pip && \ - python3 -m pip install \ +# umask 000 is needed to make the python libs readable by all users. +RUN umask 000 && python3 -m venv /opt/venv +RUN umask 000 && . /opt/venv/bin/activate && \ + python3 -m pip install --ignore-installed --upgrade pip --no-cache-dir && \ + python3 -m pip install --no-cache-dir \ # Pytorch. Note that this version is only compatible with pre-cxx11 ABI binaries. \ torch==2.4 # Install python deps. -RUN . /opt/venv/bin/activate && \ - python3 -m pip install --upgrade \ +RUN umask 000 && . /opt/venv/bin/activate && \ + python3 -m pip install --upgrade --no-cache-dir \ # Deployment tools \ wheel requests setuptools \ # Testing \ @@ -45,11 +47,11 @@ RUN . /opt/venv/bin/activate && \ # Install cmake. COPY docker/install_cmake.sh / -RUN /install_cmake.sh +RUN bash /install_cmake.sh # Install ccache. We need a recent version for improved nvcc support. COPY docker/install_ccache.sh / -RUN /install_ccache.sh +RUN bash /install_ccache.sh # Make sure we're using text-based interactive terminal. ENV DEBIAN_FRONTEND teletype @@ -65,6 +67,3 @@ RUN echo 'PATH=$PATH:/usr/local/cuda/bin' | tee --append /etc/nvblox_env.sh && \ # Always activate the venv upon login \ echo 'source /opt/venv/bin/activate' | tee --append /etc/nvblox_env.sh RUN echo 'source /etc/nvblox_env.sh' | tee --append /etc/bash.bashrc - -# Ensure that a local user can access the python venv -RUN chmod -R a+rw /opt/venv diff --git a/docker/Dockerfile.jetson_deps b/docker/Dockerfile.jetson_deps index 2944dffb..bd316a0a 100644 --- a/docker/Dockerfile.jetson_deps +++ b/docker/Dockerfile.jetson_deps @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.3.0 +ARG BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 FROM ${BASE_IMAGE} # Argument for skipping pytorch installation. diff --git a/docker/maybe_install_cuda_pytorch.sh b/docker/maybe_install_cuda_pytorch.sh index 45b7d9db..d90d9584 100755 --- a/docker/maybe_install_cuda_pytorch.sh +++ b/docker/maybe_install_cuda_pytorch.sh @@ -6,13 +6,15 @@ set -e if [ $SKIP_PYTORCH_INSTALL -eq 0 ]; then echo "Installing pytorch" . /opt/venv/bin/activate - python3 -m pip install --ignore-installed --upgrade pip - wget https://nvidia.box.com/shared/static/mp164asf3sceb570wvjsrezk1p4ftj8t.whl -O /torch-2.3.0-cp310-cp310-linux_aarch64.whl - pip install /torch-2.3.0-cp310-cp310-linux_aarch64.whl - rm /torch-2.3.0-cp310-cp310-linux_aarch64.whl - wget https://nvidia.box.com/shared/static/xpr06qe6ql3l6rj22cu3c45tz1wzi36p.whl -O /torchvision-0.18.0-cp310-cp310-linux_aarch64.whl - pip install /torchvision-0.18.0-cp310-cp310-linux_aarch64.whl - rm /torchvision-0.18.0-cp310-cp310-linux_aarch64.whl + python3 -m pip install --ignore-installed --upgrade pip wheel setuptools + + # Install PyTorch/vision/torchaudio wheels built for JetPack 6 (JP6) with CUDA 12.6 (cu126), + # which are compatible with cuDNN 9.x shipped in JetPack 6.1/6.2 (L4T r36.4.x). + # Note: These wheels target aarch64 + Python 3.10. + pip uninstall -y torch torchvision torchaudio || true + pip install --no-cache-dir \ + --index-url https://pypi.jetson-ai-lab.io/jp6/cu126 \ + torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 else echo "Skipping pytorch installation" fi diff --git a/docs/pages/torch_examples_realsense.rst b/docs/pages/torch_examples_realsense.rst index 267febfa..cedf1573 100644 --- a/docs/pages/torch_examples_realsense.rst +++ b/docs/pages/torch_examples_realsense.rst @@ -95,7 +95,7 @@ repository run: .. code-block:: bash - python3 docker/build_images.py --build-realsense-example + python3 docker/nvblox_ci.py --image realsense Then launch the container: diff --git a/nvblox/tests/CMakeLists.txt b/nvblox/tests/CMakeLists.txt index fceba55e..f47a202e 100644 --- a/nvblox/tests/CMakeLists.txt +++ b/nvblox/tests/CMakeLists.txt @@ -197,7 +197,7 @@ if(NOT USE_SANITIZER) add_nvblox_executable( run_memcpy_on_default_cuda_stream SOURCE_FILES run_memcpy_on_default_cuda_stream.cpp LINK_LIBRARIES_PUBLIC nvblox_lib) - add_test(test_default_stream_utilization + add_test(test_default_stream_utilization bash ${CMAKE_CURRENT_SOURCE_DIR}/check_default_stream_utilization.sh) endif(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") endif(NOT USE_SANITIZER) diff --git a/nvblox_torch/internal_tests/test_deployment.py b/nvblox_torch/internal_tests/test_deployment.py new file mode 100644 index 00000000..a2fb725c --- /dev/null +++ b/nvblox_torch/internal_tests/test_deployment.py @@ -0,0 +1,60 @@ +# +# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. +# + +import sys +import os +import argparse +import io +import contextlib + +# Dig up the module that we're testing +CI_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'ci') +sys.path.insert(0, CI_PATH) +# pylint: disable=wrong-import-position +import ship_it + + +def make_args(build_number: str = '1234', + print_staging_url: bool = False, + print_release_url: bool = False, + build_package: bool = True) -> argparse.Namespace: + args = argparse.Namespace() + args.build_number = build_number + args.print_staging_url = print_staging_url + args.print_release_url = print_release_url + args.build_package = build_package + return args + + +def test_build_package() -> None: + _run_and_check_output(make_args(build_package=True), expected_output='Finished') + + +def test_print_staging_url() -> None: + _run_and_check_output(make_args(print_staging_url=True), expected_output='urm.nvidia.com') + + +def test_print_release_url() -> None: + _run_and_check_output(make_args(print_release_url=True), expected_output='urm.nvidia.com') + + +def _run_and_check_output(args: argparse.Namespace, expected_output: str) -> None: + """Run the ship_it.main function and capture the output.""" + stdout_buffer = io.StringIO() + stderr_buffer = io.StringIO() + + with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(stderr_buffer): + ship_it.main(args) + + assert expected_output in stdout_buffer.getvalue(), ( + f'Ship it did not finish. Missing string: {expected_output}\n' + f'STDOUT:\n{stdout_buffer.getvalue()}\n' + f'STDERR:\n{stderr_buffer.getvalue()}') diff --git a/nvblox_torch/internal_tests/test_mapper_params.py b/nvblox_torch/internal_tests/test_mapper_params.py new file mode 100644 index 00000000..82f3740c --- /dev/null +++ b/nvblox_torch/internal_tests/test_mapper_params.py @@ -0,0 +1,208 @@ +# +# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. +# +#%% + +from typing import List, Any, Callable, Set +import inspect +import pathlib +import re + +from nvblox_torch.mapper import Mapper +from nvblox_torch.mapper_params import (ProjectiveIntegratorParams, MeshIntegratorParams, + DecayIntegratorBaseParams, TsdfDecayIntegratorParams, + OccupancyDecayIntegratorParams, EsdfIntegratorParams, + MapperParams, BlockMemoryPoolParams, ViewCalculatorParams) +from nvblox_torch.projective_integrator_types import ProjectiveIntegratorType + +NVBLOX_TORCH_DIR = pathlib.Path(pathlib.Path(__file__).parents[1]) +NVBLOX_CORE_DIR = NVBLOX_TORCH_DIR.parents[0] / 'nvblox' +PROJECTIVE_INTEGRATOR_PARAMS_PATH = NVBLOX_CORE_DIR \ + / 'include' / 'nvblox' / 'integrators' / 'projective_integrator_params.h' +MESH_INTEGRATOR_PARAMS_PATH = NVBLOX_CORE_DIR \ + / 'include' / 'nvblox' / 'mesh' / 'mesh_integrator_params.h' +DECAY_INTEGRATOR_BASE_PARAMS_PATH = NVBLOX_CORE_DIR \ + / 'include' / 'nvblox' / 'integrators' / 'internal' / 'decay_integrator_base_params.h' +TSDF_DECAY_INTEGRATOR_PARAMS_PATH = NVBLOX_CORE_DIR \ + / 'include' / 'nvblox' / 'integrators' / 'tsdf_decay_integrator_params.h' +OCCUPANCY_DECAY_INTEGRATOR_PARAMS_PATH = NVBLOX_CORE_DIR \ + / 'include' / 'nvblox' / 'integrators' / 'occupancy_decay_integrator_params.h' +ESDF_DECAY_INTEGRATOR_PARAMS_PATH = NVBLOX_CORE_DIR \ + / 'include' / 'nvblox' / 'integrators' / 'esdf_integrator_params.h' +VIEW_CALCULATOR_PARAMS_PATH = NVBLOX_CORE_DIR \ + / 'include' / 'nvblox' / 'integrators' / 'view_calculator_params.h' + + +def get_attributes(mapper_params: Any) -> List[str]: + attribute_names = [ + attribute for attribute in dir(mapper_params) if not attribute.startswith('_') + ] + non_method_attribute_names = [] + for attribute_name in attribute_names: + attr = getattr(mapper_params, attribute_name) + if not inspect.ismethod(attr): + non_method_attribute_names.append(attribute_name) + return non_method_attribute_names + + +# These are values we have to hardcode because they're enums which cant +# just be set to a generated integer value +special_test_values = { + 'projective_integrator_weighting_mode': 'kInverseSquareTsdfDistancePenalty', + 'workspace_bounds_type': 'kUnbounded' +} + + +def get_test_value(attribute_name: str, idx: int, params: Any) -> Any: + if attribute_name in special_test_values: + return special_test_values[attribute_name] + else: + type_converter = type(getattr(params, attribute_name)) + return type_converter(idx) + + +def assert_getting_and_setting(parameter_class: Callable) -> None: + params = parameter_class() + # Gather the attributes from the object + attribute_names = get_attributes(params) + print(f'Parameter class has attributes: {attribute_names}') + + # Loop through all the attributes and set them to a random value (the index) + # We also convert the index into the type of the attribute (we got errors + # using the raw index). + for idx, attribute_name in enumerate(attribute_names): + test_value = get_test_value(attribute_name, idx, params) + print(f'Setting attribute {attribute_name} = {test_value}') + setattr(params, attribute_name, test_value) + + # Loop through all the attributes and check them + for idx, attribute_name in enumerate(attribute_names): + value = getattr(params, attribute_name) + print(f'Getting attribute {attribute_name} = {value}') + test_value = get_test_value(attribute_name, idx, params) + assert value == test_value + + +def get_parameter_names_from_cpp(header_file_path: pathlib.Path) -> List[str]: + param_names = [] + with open(header_file_path, 'r', encoding='utf-8') as file: + for line in file.readlines(): + if 'Param<' in line and 'constexpr' not in line: + line.split() + param_name = re.split(r'[{>]+', line)[1].strip() + param_names.append(param_name) + return param_names + + +def get_unwrapped_parameters(parameter_class: Callable, header_file_path: pathlib.Path) -> Set[str]: + parameter_names_cpp = get_parameter_names_from_cpp(header_file_path) + mapper_params = parameter_class() + attribute_names = get_attributes(mapper_params) + unwrapped = set(parameter_names_cpp) - set(attribute_names) + return unwrapped + + +def assert_wrapped_exist(parameter_class: Callable, header_file_path: pathlib.Path) -> None: + # Tests that each of the parameters/attributes existing in python + # actually exist in the underlying cpp struct. + parameter_names_cpp = get_parameter_names_from_cpp(header_file_path) + mapper_params = parameter_class() + attribute_names = get_attributes(mapper_params) + wrapped_non_existing = set(attribute_names) - set(parameter_names_cpp) + assert len( + wrapped_non_existing + ) == 0, f'Some attributes correspond to non-existing parameters: {wrapped_non_existing}' + # Print the correctly wrapped for encouragement + correctly_wrapped = set(parameter_names_cpp).intersection(set(attribute_names)) + print(f'Correctly wrapped the following params: {list(correctly_wrapped)}') + unwrapped = get_unwrapped_parameters(parameter_class, header_file_path) + print(f'The following parameters are unwrapped: {list(unwrapped)}') + + +def assert_no_unwrapped_params(parameter_class: Callable, header_file_path: pathlib.Path) -> None: + unwrapped = get_unwrapped_parameters(parameter_class, header_file_path) + print(f'The following parameters are unwrapped: {list(unwrapped)}') + assert len(unwrapped) == 0, 'Some MapperParams are unwrapped in python' + + +def test_getting_and_setting() -> None: + assert_getting_and_setting(ProjectiveIntegratorParams) + assert_getting_and_setting(MeshIntegratorParams) + assert_getting_and_setting(DecayIntegratorBaseParams) + assert_getting_and_setting(TsdfDecayIntegratorParams) + assert_getting_and_setting(EsdfIntegratorParams) + assert_getting_and_setting(ViewCalculatorParams) + assert_getting_and_setting(BlockMemoryPoolParams) + + +def test_all_wrapped() -> None: + + assert_wrapped_exist(ProjectiveIntegratorParams, PROJECTIVE_INTEGRATOR_PARAMS_PATH) + assert_no_unwrapped_params(ProjectiveIntegratorParams, PROJECTIVE_INTEGRATOR_PARAMS_PATH) + + assert_wrapped_exist(MeshIntegratorParams, MESH_INTEGRATOR_PARAMS_PATH) + assert_no_unwrapped_params(MeshIntegratorParams, MESH_INTEGRATOR_PARAMS_PATH) + + assert_wrapped_exist(DecayIntegratorBaseParams, DECAY_INTEGRATOR_BASE_PARAMS_PATH) + assert_no_unwrapped_params(DecayIntegratorBaseParams, DECAY_INTEGRATOR_BASE_PARAMS_PATH) + + assert_wrapped_exist(TsdfDecayIntegratorParams, TSDF_DECAY_INTEGRATOR_PARAMS_PATH) + assert_no_unwrapped_params(TsdfDecayIntegratorParams, TSDF_DECAY_INTEGRATOR_PARAMS_PATH) + + assert_wrapped_exist(OccupancyDecayIntegratorParams, OCCUPANCY_DECAY_INTEGRATOR_PARAMS_PATH) + assert_no_unwrapped_params(OccupancyDecayIntegratorParams, + OCCUPANCY_DECAY_INTEGRATOR_PARAMS_PATH) + + assert_wrapped_exist(EsdfIntegratorParams, ESDF_DECAY_INTEGRATOR_PARAMS_PATH) + assert_no_unwrapped_params(EsdfIntegratorParams, ESDF_DECAY_INTEGRATOR_PARAMS_PATH) + + assert_wrapped_exist(ViewCalculatorParams, VIEW_CALCULATOR_PARAMS_PATH) + assert_no_unwrapped_params(ViewCalculatorParams, VIEW_CALCULATOR_PARAMS_PATH) + + +def test_mapper_params_sub_params() -> None: + mapper_params = MapperParams() + + # Projective integrator sub-params test + projective_integrator_params = mapper_params.get_projective_integrator_params() + projective_integrator_params.projective_integrator_max_weight = 6.0 + mapper_params.set_projective_integrator_params(projective_integrator_params) + # projective_integrator_params_2 = mapper_params.get_projective_integrator_params() + assert projective_integrator_params.projective_integrator_max_weight == 6.0 + + # Get and set all subparams + getter_names = [method for method in dir(mapper_params) if method[:len('get_')] == 'get_'] + for getter_name in getter_names: + # Get + sub_params = getattr(mapper_params, getter_name)() + # Set + setter_name = 'set_' + getter_name[len('get_'):] + getattr(mapper_params, setter_name)(sub_params) + + +def test_parameter_setting_in_mapper() -> None: + test_value = 1.0 + + # Create a new parameter struct + projective_integrator_params = ProjectiveIntegratorParams() + projective_integrator_params.projective_integrator_max_integration_distance_m = 1.0 + mapper_params = MapperParams() + mapper_params.set_projective_integrator_params(projective_integrator_params) + + # Recreate a mapper with modified/non-default params + voxel_size_m = 0.1 + new_mapper = Mapper(voxel_sizes_m=[voxel_size_m], + integrator_types=[ProjectiveIntegratorType.TSDF], + mapper_parameters=mapper_params) + new_mapper_params = new_mapper.params() + + new_projective_integrator_params = new_mapper_params.get_projective_integrator_params() + new_value = new_projective_integrator_params.projective_integrator_max_integration_distance_m + assert new_value == test_value diff --git a/nvblox_torch/internal_tests/test_realsense_example.py b/nvblox_torch/internal_tests/test_realsense_example.py new file mode 100644 index 00000000..99b8db50 --- /dev/null +++ b/nvblox_torch/internal_tests/test_realsense_example.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. +# + +import numpy as np +from unittest.mock import patch +import io +import sys +import pytest + +from contextlib import redirect_stdout +from typing import Any +import torch + +xfail = pytest.mark.xfail + +MAX_STEPS = 10 +WIDTH = 640 +HEIGHT = 640 + + +class MockIntrinsics: + """Mock class for realsense intrinsics.""" + + def __init__(self, _: Any = None) -> None: + self.fx = 320.0 + self.fy = 320.0 + self.ppx = 160.0 + self.ppy = 160.0 + self.width = WIDTH + self.height = HEIGHT + + +class MockExtrinsics: + """Mock class for realsense extrinsics.""" + + def __init__(self, _: Any = None) -> None: + self.rotation = np.eye(3, 3) + self.translation = np.zeros(3) + + +class MockRealsenseDataloader: + """Mock class for realsense dataloader.""" + + def __init__(self, max_steps: int) -> None: + assert max_steps == MAX_STEPS + self.max_steps = max_steps + self.current_step = 0 + + def __len__(self) -> int: + return self.max_steps + + def __iter__(self) -> 'MockRealsenseDataloader': + return self + + def __next__(self) -> dict: + self.current_step += 1 + print(f'current_step: {self.current_step}') + if self.current_step >= self.max_steps: + raise StopIteration + + return { + 'left_infrared_image': 255 * np.random.randint(0, 256, (HEIGHT, WIDTH), dtype=np.uint8), + 'right_infrared_image': 255 * np.random.randint(0, 256, + (HEIGHT, WIDTH), dtype=np.uint8), + 'depth': torch.tensor(np.random.rand(HEIGHT, WIDTH), dtype=torch.float32, + device='cuda'), + 'rgb': torch.randint(low=0, + high=256, + size=(HEIGHT, WIDTH, 3), + dtype=torch.uint8, + device='cuda'), + 'timestamp': self.current_step + } + + def left_infrared_intrinsics(self) -> MockIntrinsics: + return MockIntrinsics() + + def right_infrared_intrinsics(self) -> MockIntrinsics: + return MockIntrinsics() + + def depth_intrinsics(self) -> MockIntrinsics: + return MockIntrinsics() + + def color_intrinsics(self) -> MockIntrinsics: + return MockIntrinsics() + + # pylint: disable=C0103 + def T_C_left_infrared_C_color(self) -> MockExtrinsics: + return MockExtrinsics() + + def T_C_left_infrared_C_right_infrared(self) -> MockExtrinsics: + return MockExtrinsics() + + +@patch('nvblox_torch.examples.realsense.run_realsense_mapper.RealsenseDataloader', + MockRealsenseDataloader) +@patch('nvblox_torch.examples.realsense.realsense_dataloader.rs.pyrealsense2.intrinsics', + MockIntrinsics) +@patch('nvblox_torch.examples.realsense.realsense_dataloader.rs.pyrealsense2.extrinsics', + MockExtrinsics) +@xfail(run=False, + reason='''This test requres a dedicated docker image and is therefore disabled per default. + To enable it, run "pytest --runxfail''') +def test_realsense_example() -> None: + # This import will fail outside the realsense docker. Since the test is marked with "xfail" it + # will not break the non-realsense test suite + # pylint: disable=import-outside-toplevel + from nvblox_torch.examples.realsense import run_realsense_mapper + + # We pass/get CLI input/output by: + # - Pass CLI args by using the unittest.mock.patch. + # - Redirect stdout to a buffer for inspection. + test_args = [ + 'run_realsense_mapper.py', + '--max_frames', + str(MAX_STEPS), + ] + buffer = io.StringIO() + with patch.object(sys, 'argv', test_args): + with redirect_stdout(buffer): + assert run_realsense_mapper.main() == 0 + assert 'Done' in buffer.getvalue() diff --git a/nvblox_torch/sonar-project.properties b/nvblox_torch/sonar-project.properties new file mode 100644 index 00000000..c5db4c3a --- /dev/null +++ b/nvblox_torch/sonar-project.properties @@ -0,0 +1 @@ +sonar.projectKey=GPUSW_nvblox_nvblox_nvblox