From ea865a420acb816236e5b2d64f6bd779216f50ea Mon Sep 17 00:00:00 2001 From: Jonathan Giannuzzi Date: Fri, 8 Sep 2023 00:32:20 +0100 Subject: [PATCH] Add prebuilt devcontainer (#369) * Use vcpkg.json only This is necessary to be able to build ParquetSharpNative offline with a populated vcpkg cache. * Ensure vcpkg builtin registry is up-to-date in CI runners If we reference a baseline that is too recent, some CI runner images will not have it. With this step we ensure it won't be the case. This used to be done via vcpkg-configuration.json, but we couldn't rely on it for offline devcontainer usage. * Improve Powershell build script Format via Powershell VS Code extension and fix linting issues. * Build native lib in both Debug and Release by default (unless in the CI) * Add devcontainer * Add devcontainer workflow * Update documentation about building ParquetSharp * Add solution to .gitignore --- .devcontainer/Dockerfile | 74 +++++++++++++++++++ .devcontainer/devcontainer.json | 72 ++++++++++++++++++ .dockerignore | 3 + .github/workflows/ci.yml | 7 ++ .github/workflows/devcontainer.yml | 113 +++++++++++++++++++++++++++++ .github/workflows/nudge.yml | 2 +- .gitignore | 4 + README.md | 73 +++++++++++++++---- build_unix.sh | 13 +++- build_windows.ps1 | 28 ++++--- vcpkg-configuration.json | 7 -- vcpkg.json | 3 +- 12 files changed, 362 insertions(+), 37 deletions(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .dockerignore create mode 100644 .github/workflows/devcontainer.yml delete mode 100644 vcpkg-configuration.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000..debcc9ab --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,74 @@ +FROM mcr.microsoft.com/devcontainers/dotnet:0-7.0-bullseye-slim AS dotnet + +#==================================================================== + +FROM dotnet AS nuget + +USER vscode + +# Copy our projects +COPY --chown=vscode:vscode . /tmp/build/ + +# Populate the nuget cache with all of our dependencies +RUN for project in /tmp/build/csharp*; do \ + dotnet restore $project; \ + done + +#==================================================================== + +FROM dotnet AS cpp + +# Install the C++ dev tools +RUN echo "deb http://deb.debian.org/debian bullseye-backports main" >> /etc/apt/sources.list \ + && apt-get update \ + && export DEBIAN_FRONTEND=noninteractive \ + && apt-get -y install --no-install-recommends \ + bison \ + build-essential \ + cmake/bullseye-backports \ + cppcheck \ + flex \ + gdb \ + ninja-build \ + pkg-config \ + valgrind \ +&& apt-get autoremove -y \ +&& apt-get clean -y \ +&& rm -rf /var/lib/apt/lists/* + +# Set vcpkg environment variables +ENV VCPKG_ROOT=/opt/vcpkg \ + VCPKG_FORCE_SYSTEM_BINARIES=1 + +#==================================================================== + +FROM cpp AS vcpkg + +USER vscode + +# Install vcpkg +RUN sudo mkdir -p $VCPKG_ROOT \ + && sudo chown vscode:vscode $VCPKG_ROOT \ + && git clone https://github.com/microsoft/vcpkg.git $VCPKG_ROOT \ + && cd $VCPKG_ROOT \ + && ./bootstrap-vcpkg.sh -disableMetrics + +# Copy our vcpkg manifest +COPY --chown=vscode:vscode vcpkg.json /tmp/build/ + +# Populate the vcpkg binary cache with all of our dependencies +RUN cd /tmp/build \ + && $VCPKG_ROOT/vcpkg install --clean-after-build \ + && bash -c 'rm -rf $VCPKG_ROOT/{buildtrees,downloads/temp,packages}' \ + && rm -rf * + +#==================================================================== + +FROM cpp AS devcontainer + +# Copy the nuget cache +COPY --from=nuget --chown=vscode:vscode /home/vscode/.nuget/packages /home/vscode/.nuget/packages + +# Copy the installed vcpkg and its binary cache +COPY --from=vcpkg --chown=vscode:vscode $VCPKG_ROOT $VCPKG_ROOT +COPY --from=vcpkg --chown=vscode:vscode /home/vscode/.cache/vcpkg /home/vscode/.cache/vcpkg \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..c8f30967 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,72 @@ +// For format details, see https://aka.ms/devcontainer.json. +{ + "name": "ParquetSharp", + + // Use the prebuilt image. Comment this out if you want to make changes to it. + "image": "ghcr.io/g-research/parquetsharp/devcontainer:latest", + + // Uncomment the following lines to build the container locally. You will also need + // to comment out the "image" line above. + // "build": { + // "dockerfile": "./Dockerfile", + // "context": ".." + // }, + + // Necessary for C++ debugger to work. + "capAdd": [ + "SYS_PTRACE" + ], + "securityOpt": [ + "seccomp=unconfined" + ], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + // Use vcpkg. + "cmake.configureEnvironment": { + "CMAKE_TOOLCHAIN_FILE": "/opt/vcpkg/scripts/buildsystems/vcpkg.cmake" + }, + + // Run cmake configure on open. + "cmake.configureOnOpen": true, + + // Remove some cmake elements from the status bar. + "cmake.statusbar.advanced": { + "buildTarget": { + "visibility": "hidden" + }, + "kit": { + "visibility": "hidden" + }, + "ctest": { + "visibility": "hidden" + } + } + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "ms-dotnettools.csdevkit", + "ms-dotnettools.csharp", + "ms-vscode.cpptools", + "ms-vscode.cmake-tools" + ] + } + }, + + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "", + + // Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "vscode" +} \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..7d0de582 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +** +!vcpkg.json +!*/*.csproj \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5f218f00..f6ccab7e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,6 +117,13 @@ jobs: vcpkg-${{ steps.vcpkg-info.outputs.triplet }}-cmake:${{ steps.cmake-info.outputs.version }} vcpkg-${{ steps.vcpkg-info.outputs.triplet }} + # Ensure vcpkg builtin registry is up-to-date + - name: Update vcpkg builtin registry + working-directory: ${{ steps.vcpkg-info.outputs.root }} + run: | + git reset --hard + git pull + # Setup a CentOS 7 container to build on Linux x64 for backwards compatibility. - name: Start CentOS container and install toolchain if: runner.os == 'Linux' && matrix.arch == 'x64' diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml new file mode 100644 index 00000000..e1ae6233 --- /dev/null +++ b/.github/workflows/devcontainer.yml @@ -0,0 +1,113 @@ +name: Build devcontainer image + +on: + push: + branches: [master] + paths: + - ".devcontainer/**" + - ".dockerignore" + - "vcpkg.json" + - "*/*.csproj" + - ".github/workflows/devcontainer.yml" + pull_request: + branches: [master] + paths: + - ".devcontainer/**" + - ".dockerignore" + - "vcpkg.json" + - "*/*.csproj" + - ".github/workflows/devcontainer.yml" + # Run once a week + schedule: + - cron: "34 2 * * 2" + +permissions: + contents: read + packages: write + +jobs: + build: + name: Build devcontainer image + strategy: + fail-fast: false + matrix: + runner: [ubuntu-latest, ubuntu-20.04-arm64] + runs-on: ${{ matrix.runner }} + steps: + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Compute image info + id: image + run: | + echo "name=ghcr.io/$(echo ${{ github.repository }} | tr A-Z a-z)/devcontainer" >> "$GITHUB_OUTPUT" + echo "push=${{ (github.event_name == 'push' && github.ref == 'refs/heads/master') || github.event_name == 'schedule' }}" >> "$GITHUB_OUTPUT" + - name: Compute image labels + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ steps.image.outputs.name }} + tags: latest + labels: | + org.opencontainers.image.title=ParquetSharp devcontainer + org.opencontainers.image.description=devcontainer for ParquetSharp + - if: fromJson(steps.image.outputs.push) + name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build image${{ fromJson(steps.image.outputs.push) && ' and push it by digest' || ''}} + id: build + uses: docker/build-push-action@v4 + with: + file: .devcontainer/Dockerfile + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ steps.image.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ steps.image.outputs.push }} + cache-from: type=gha,scope=${{ github.ref }}-${{ matrix.runner }} + cache-to: type=gha,scope=${{ github.ref }}-${{ matrix.runner }},mode=max + - if: fromJson(steps.image.outputs.push) + name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - if: fromJson(steps.image.outputs.push) + name: Upload digest + uses: actions/upload-artifact@v3 + with: + name: digests + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + outputs: + image_name: ${{ steps.image.outputs.name }} + image_push: ${{ steps.image.outputs.push }} + + merge: + name: Merge platforms + if: fromJson(needs.build.outputs.image_push) + runs-on: ubuntu-latest + needs: + - build + steps: + - name: Download digests + uses: actions/download-artifact@v3 + with: + name: digests + path: /tmp/digests + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create -t ${{ needs.build.outputs.image_name }}:latest \ + $(printf '${{ needs.build.outputs.image_name }}@sha256:%s ' *) + - name: Inspect image + run: docker buildx imagetools inspect ${{ needs.build.outputs.image_name }}:latest diff --git a/.github/workflows/nudge.yml b/.github/workflows/nudge.yml index 1424bb85..7bff6c9c 100644 --- a/.github/workflows/nudge.yml +++ b/.github/workflows/nudge.yml @@ -2,7 +2,7 @@ name: Nudge on: workflow_run: - workflows: [CI] + workflows: ["CI", "Build devcontainer image"] types: [completed] branches: [master] diff --git a/.gitignore b/.gitignore index 7524e530..18dd5418 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,7 @@ build obj nuget BenchmarkDotNet.Artifacts + +# The solution files get generated by vcpkg on Windows +# and by the C# Dev Kit within a dev container. +*.sln \ No newline at end of file diff --git a/README.md b/README.md index 7d3ce6b8..ec3f77ce 100644 --- a/README.md +++ b/README.md @@ -123,27 +123,68 @@ As only 64-bit runtimes are available, ParquetSharp cannot be referenced by a 32 ## Building -Building ParquetSharp for Windows requires the following dependencies: -- Visual Studio 2022 (17.0 or higher) -- Apache Arrow (13.0.0) +### Dev Container -For building Arrow (including Parquet) and its dependencies, we recommend using Microsoft's [vcpkg](https://github.com/Microsoft/vcpkg). -The build scripts will use an existing vcpkg installation if either of the `VCPKG_INSTALLATION_ROOT` or `VCPKG_ROOT` environment variables are defined, -otherwise vcpkg will be downloaded into the build directory. -Note that the Windows build needs to be done in a Visual Studio Developer PowerShell for the build script to succeed. +ParquetSharp can be built and tested within a [dev container](https://containers.dev). This is a probably the easiest way to get started, as all the C++ dependencies are prebuilt into the container image. -**Windows (Visual Studio 2022 Win64 solution)** -``` -> build_windows.ps1 -> dotnet build csharp.test --configuration=Release +#### GitHub Codespaces + +If you have a GitHub account, you can simply open ParquetSharp in a new GitHub Codespace by clicking on the green "Code" button at the top of this page. + +Choose the "unspecified" CMake kit when prompted and let the C++ configuration run. Once done, you can build the C++ code via the "Build" button in the status bar at the bottom. + +You can then build the C# code by right-clicking the ParquetSharp solution in the Solution Explorer on the left and choosing "Build". The Test Explorer will then get populated with all the C# tests too. + +#### Visual Studio Code + +If you want to work locally in [Visual Studio Code](https://code.visualstudio.com), all you need is to have [Docker](https://docs.docker.com/get-docker/) and the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) installed. + +Simply open up your copy of ParquetSharp in VS Code and click "Reopen in container" when prompted. Once the project has been opened, you can follow the GitHub Codespaces instructions above. + +#### CLI + +If the CLI is how you roll, then you can install the [Dev Container CLI](https://github.com/devcontainers/cli) tool and issue the following command in the your copy of ParquetSharp to get up and running: + +```bash +devcontainer up ``` -**Linux and macOS (Makefile)** + +Build the C++ code and run the C# tests with: + +```bash +devcontainer exec ./build_unix.sh +devcontainer exec dotnet test csharp.test ``` -> ./build_unix.sh -> dotnet build csharp.test --configuration=Release + +### Native + +Building ParquetSharp natively requires the following dependencies: +- A modern C++ compiler toolchain +- .NET SDK 7.0 +- Apache Arrow (13.0.0) + +For building Arrow (including Parquet) and its dependencies, we recommend using Microsoft's [vcpkg](https://vcpkg.io). +The build scripts will use an existing vcpkg installation if either of the `VCPKG_INSTALLATION_ROOT` or `VCPKG_ROOT` environment variables are defined, otherwise vcpkg will be downloaded into the build directory. + +#### Windows + +Building ParquetSharp on Windows requires Visual Studio 2022 (17.0 or higher). + +Open a Visual Studio Developer PowerShell and run the following commands to build the C++ code and run the C# tests: + +```pwsh +build_windows.ps1 +dotnet test csharp.test ``` -We have had to write our own `FindPackage` macros for most of the dependencies to get us going - it clearly needs more love and attention and is likely to be redundant with some vcpkg helper tools. +#### Unix + +Build the C++ code and run the C# tests with: + +```bash +./build_unix.sh +dotnet test csharp.test +``` ## Contributing @@ -151,7 +192,7 @@ We welcome new contributors! We will happily receive PRs for bug fixes or small ## License -Copyright 2018-2021 G-Research +Copyright 2018-2023 G-Research Licensed under the Apache License, Version 2.0 (the "License"); you may not use these files except in compliance with the License. You may obtain a copy of the License at diff --git a/build_unix.sh b/build_unix.sh index 86b8bd79..fb68c026 100755 --- a/build_unix.sh +++ b/build_unix.sh @@ -61,9 +61,13 @@ if [ -z "$VCPKG_INSTALLATION_ROOT" ]; then fi fi +# Cmake build types +build_types="Debug Release" + # Only build release configuration in CI if [ "$GITHUB_ACTIONS" = "true" ] then + build_types="Release" custom_triplets_dir=$PWD/build/custom-triplets mkdir -p "$custom_triplets_dir" for vcpkg_triplet_file in $VCPKG_INSTALLATION_ROOT/triplets/{,community/}$triplet.cmake @@ -77,5 +81,10 @@ then options+=" -D VCPKG_OVERLAY_TRIPLETS=$custom_triplets_dir" fi -cmake -B build/$triplet -S . -D VCPKG_TARGET_TRIPLET=$triplet -D CMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake $options -cmake --build build/$triplet -j +for build_type in $build_types +do + echo ">> Building ParquetSharpNative $build_type for $triplet" + build_dir=build/$triplet-$(tr A-Z a-z <<<$build_type) + cmake -B $build_dir -S . -D VCPKG_TARGET_TRIPLET=$triplet -D CMAKE_TOOLCHAIN_FILE=$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake -D CMAKE_BUILD_TYPE=$build_type $options + cmake --build $build_dir -j +done \ No newline at end of file diff --git a/build_windows.ps1 b/build_windows.ps1 index 5def545c..a853907d 100644 --- a/build_windows.ps1 +++ b/build_windows.ps1 @@ -2,15 +2,17 @@ Set-StrictMode -Version 3 $ErrorActionPreference = "Stop" # Find vcpkg or download it if required -if ($Env:VCPKG_INSTALLATION_ROOT -ne $null) { +if ($null -ne $Env:VCPKG_INSTALLATION_ROOT) { $vcpkgDir = $Env:VCPKG_INSTALLATION_ROOT - echo "Using vcpkg at $vcpkgDir from VCPKG_INSTALLATION_ROOT" -} elseif ($Env:VCPKG_ROOT -ne $null) { + Write-Output "Using vcpkg at $vcpkgDir from VCPKG_INSTALLATION_ROOT" +} +elseif ($null -ne $Env:VCPKG_ROOT) { $vcpkgDir = $Env:VCPKG_ROOT - echo "Using vcpkg at $vcpkgDir from VCPKG_ROOT" -} else { - $vcpkgDir = "$(pwd)/build/vcpkg" - echo "Using local vcpkg at $vcpkgDir" + Write-Output "Using vcpkg at $vcpkgDir from VCPKG_ROOT" +} +else { + $vcpkgDir = "$(Get-Location)/build/vcpkg" + Write-Output "Using local vcpkg at $vcpkgDir" if (-not (Test-Path $vcpkgDir)) { git clone https://github.com/microsoft/vcpkg.git $vcpkgDir if (-not $?) { throw "git clone failed" } @@ -21,9 +23,12 @@ if ($Env:VCPKG_INSTALLATION_ROOT -ne $null) { $triplet = "x64-windows-static" +$build_types = @("Debug", "Release") + $options = @() if ($Env:GITHUB_ACTIONS -eq "true") { - $customTripletsDir = "$(pwd)/build/custom-triplets" + $build_types = @("Release") + $customTripletsDir = "$(Get-Location)/build/custom-triplets" New-Item -Path $customTripletsDir -ItemType "directory" -Force > $null $sourceTripletFile = "$vcpkgDir/triplets/$triplet.cmake" $customTripletFile = "$customTripletsDir/$triplet.cmake" @@ -35,5 +40,8 @@ if ($Env:GITHUB_ACTIONS -eq "true") { cmake -B build/$triplet -S . -D VCPKG_TARGET_TRIPLET=$triplet -D CMAKE_TOOLCHAIN_FILE=$vcpkgDir/scripts/buildsystems/vcpkg.cmake -G "Visual Studio 17 2022" -A "x64" $options if (-not $?) { throw "cmake failed" } -msbuild build/$triplet/ParquetSharp.sln -t:ParquetSharpNative:Rebuild -p:Configuration=Release -if (-not $?) { throw "msbuild failed" } + +foreach ($build_type in $build_types) { + msbuild build/$triplet/ParquetSharp.sln -t:ParquetSharpNative:Rebuild -p:Configuration=$build_type + if (-not $?) { throw "msbuild failed" } +} \ No newline at end of file diff --git a/vcpkg-configuration.json b/vcpkg-configuration.json deleted file mode 100644 index 75bcc35d..00000000 --- a/vcpkg-configuration.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "default-registry": { - "kind": "git", - "repository": "https://github.com/microsoft/vcpkg.git", - "baseline": "2cf957350da28ad032178a974607f59f961217d9" - } -} diff --git a/vcpkg.json b/vcpkg.json index 706e55f2..e901354b 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,7 +1,8 @@ { - "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json", + "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", "name": "parquetsharp", "version-string": "undefined", + "builtin-baseline": "2cf957350da28ad032178a974607f59f961217d9", "dependencies": [ "arrow" ],