diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml new file mode 100644 index 0000000000000..519741abc66dd --- /dev/null +++ b/.github/workflows/codspeed.yml @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: CodSpeed + +on: + push: + branches: + - "main" + pull_request: + # `workflow_dispatch` allows CodSpeed to trigger backtest + # performance analysis in order to generate initial data. + workflow_dispatch: + +permissions: + contents: read + id-token: write + +jobs: + codspeed: + name: Criterion CPU Simulation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + submodules: true + fetch-depth: 1 + + - name: Setup Rust toolchain + run: rustup toolchain install stable + + - name: Install protobuf compiler + run: sudo apt-get install -y protobuf-compiler + + - name: Install cargo-codspeed + run: cargo install cargo-codspeed --version 4.3.0 + + - name: Build the benchmark target(s) + run: cargo codspeed build -m simulation + + - name: Run the benchmarks + uses: CodSpeedHQ/action@4deb3275dd364fb96fb074c953133d29ec96f80f # v4.10.6 + with: + mode: simulation + run: cargo codspeed run + + codspeed-memory: + name: Criterion Memory Profiling + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + submodules: true + fetch-depth: 1 + + - name: Setup Rust toolchain + run: rustup toolchain install stable + + - name: Install protobuf compiler + run: sudo apt-get install -y protobuf-compiler + + - name: Install cargo-codspeed + run: cargo install cargo-codspeed --version 4.3.0 + + - name: Build benchmarks (memory mode) + run: cargo codspeed build -m memory + + - name: Run memory benchmarks + uses: CodSpeedHQ/action@4deb3275dd364fb96fb074c953133d29ec96f80f # v4.10.6 + with: + mode: memory + run: cargo codspeed run + + codspeed-macro: + name: Macro Benchmarks (Simulation) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + submodules: true + fetch-depth: 1 + + - name: Setup Rust toolchain + run: rustup toolchain install stable + + - name: Install protobuf compiler + run: sudo apt-get install -y protobuf-compiler + + - name: Install CodSpeed CLI + run: curl -fsSL https://codspeed.io/install.sh | bash + + - name: Install tpchgen-cli + run: cargo install tpchgen-cli --locked + + - name: Build dfbench binary + run: cargo build --release --bin dfbench + + - name: Generate TPC-H SF1 data + run: | + mkdir -p benchmarks/data/tpch_sf1 + tpchgen-cli --scale-factor 1 --format parquet --parquet-compression='ZSTD(1)' --parts=1 --output-dir benchmarks/data/tpch_sf1 + + - name: Download ClickBench partitioned data + run: | + mkdir -p benchmarks/data/hits_partitioned + seq 0 99 | xargs -P10 -I{} bash -c 'wget -q --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet -O benchmarks/data/hits_partitioned/hits_{}.parquet' + + - name: Run macro benchmarks + uses: CodSpeedHQ/action@4deb3275dd364fb96fb074c953133d29ec96f80f # v4.10.6 + with: + mode: simulation + run: benchmarks/codspeed_macro.sh diff --git a/Cargo.lock b/Cargo.lock index f011844f40f82..71cffde455ebe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,15 +94,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "alloca" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" -dependencies = [ - "cc", -] - [[package]] name = "allocator-api2" version = "0.2.21" @@ -209,6 +200,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "ar_archive_writer" version = "0.2.0" @@ -1378,12 +1378,84 @@ dependencies = [ "cc", ] +[[package]] +name = "codspeed" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c2eb3388ebe26b5a0ab6bf4969d9c4840143d7f6df07caa3cc851b0606cef6" +dependencies = [ + "anyhow", + "cc", + "colored", + "getrandom 0.2.16", + "glob", + "libc", + "nix 0.30.1", + "serde", + "serde_json", + "statrs", +] + +[[package]] +name = "codspeed-criterion-compat" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e270597a1d1e183f86d1cc9f94f0133654ee3daf201c17903ee29363555dd7" +dependencies = [ + "clap", + "codspeed", + "codspeed-criterion-compat-walltime", + "colored", + "futures", + "regex", + "tokio", +] + +[[package]] +name = "codspeed-criterion-compat-walltime" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c2613d2fac930fe34456be76f9124ee0800bb9db2e7fd2d6c65b9ebe98a292" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "codspeed", + "criterion-plot", + "futures", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "tokio", + "walkdir", +] + [[package]] name = "colorchoice" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys 0.59.0", +] + [[package]] name = "comfy-table" version = "7.2.1" @@ -1534,41 +1606,14 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "criterion" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" -dependencies = [ - "alloca", - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "futures", - "itertools 0.13.0", - "num-traits", - "oorandom", - "page_size", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "tokio", - "walkdir", -] - [[package]] name = "criterion-plot" -version = "0.8.2" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools 0.13.0", + "itertools 0.10.5", ] [[package]] @@ -1723,7 +1768,7 @@ dependencies = [ "bytes", "bzip2", "chrono", - "criterion", + "codspeed-criterion-compat", "ctor", "dashmap", "datafusion-catalog", @@ -1897,7 +1942,7 @@ dependencies = [ "arrow", "arrow-ipc", "chrono", - "criterion", + "codspeed-criterion-compat", "half", "hashbrown 0.16.1", "hex", @@ -1935,7 +1980,7 @@ dependencies = [ "bytes", "bzip2", "chrono", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", @@ -2053,7 +2098,7 @@ dependencies = [ "async-trait", "bytes", "chrono", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", @@ -2224,7 +2269,7 @@ dependencies = [ "blake3", "chrono", "chrono-tz", - "criterion", + "codspeed-criterion-compat", "ctor", "datafusion-common", "datafusion-doc", @@ -2253,7 +2298,7 @@ version = "52.1.0" dependencies = [ "ahash", "arrow", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2275,7 +2320,7 @@ version = "52.1.0" dependencies = [ "ahash", "arrow", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", @@ -2288,7 +2333,7 @@ version = "52.1.0" dependencies = [ "arrow", "arrow-ord", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -2324,7 +2369,7 @@ name = "datafusion-functions-window" version = "52.1.0" dependencies = [ "arrow", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-doc", "datafusion-expr", @@ -2360,7 +2405,7 @@ dependencies = [ "arrow", "async-trait", "chrono", - "criterion", + "codspeed-criterion-compat", "ctor", "datafusion-common", "datafusion-expr", @@ -2386,7 +2431,7 @@ version = "52.1.0" dependencies = [ "ahash", "arrow", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -2464,7 +2509,7 @@ dependencies = [ "arrow-ord", "arrow-schema", "async-trait", - "criterion", + "codspeed-criterion-compat", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", @@ -2579,8 +2624,8 @@ dependencies = [ "arrow", "bigdecimal", "chrono", + "codspeed-criterion-compat", "crc32fast", - "criterion", "datafusion", "datafusion-catalog", "datafusion-common", @@ -3323,6 +3368,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -3750,12 +3801,32 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -4348,16 +4419,6 @@ version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "parking_lot" version = "0.12.5" @@ -5788,6 +5849,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx", + "num-traits", +] + [[package]] name = "stringprep" version = "0.1.5" diff --git a/Cargo.toml b/Cargo.toml index 60904a70c9574..d286653db9424 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -109,7 +109,7 @@ bigdecimal = "0.4.8" bytes = "1.11" bzip2 = "0.6.1" chrono = { version = "0.4.43", default-features = false } -criterion = "0.8" +criterion = { version = "4.3.0", package = "codspeed-criterion-compat" } ctor = "0.6.3" dashmap = "6.0.1" datafusion = { path = "datafusion/core", version = "52.1.0", default-features = false } diff --git a/README.md b/README.md index 630d4295bd427..c07b6f80c85e2 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ [![Discord chat][discord-badge]][discord-url] [![Linkedin][linkedin-badge]][linkedin-url] ![Crates.io MSRV][msrv-badge] +[![CodSpeed][codspeed-badge]][codspeed-url] [crates-badge]: https://img.shields.io/crates/v/datafusion.svg [crates-url]: https://crates.io/crates/datafusion @@ -45,6 +46,8 @@ [linkedin-badge]: https://img.shields.io/badge/Follow-Linkedin-blue [linkedin-url]: https://www.linkedin.com/company/apache-datafusion/ [msrv-badge]: https://img.shields.io/crates/msrv/datafusion?label=Min%20Rust%20Version +[codspeed-badge]: https://img.shields.io/endpoint?url=https://codspeed.io/badge.json +[codspeed-url]: https://codspeed.io/pydantic/datafusion?utm_source=badge [Website](https://datafusion.apache.org/) | [API Docs](https://docs.rs/datafusion/latest/datafusion/) | diff --git a/benchmarks/codspeed_macro.sh b/benchmarks/codspeed_macro.sh new file mode 100755 index 0000000000000..135d257ef58bd --- /dev/null +++ b/benchmarks/codspeed_macro.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -e + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) +DATA_DIR="${SCRIPT_DIR}/data" +DFBENCH="./target/release/dfbench" + +# TPC-H SF1 (22 queries) +for q in $(seq 1 22); do + codspeed exec -- $DFBENCH tpch --iterations 1 --path "${DATA_DIR}/tpch_sf1" --format parquet --query $q +done + +# ClickBench partitioned (43 queries: q0-q42, 100 parquet files) +for q in $(seq 0 42); do + codspeed exec -- $DFBENCH clickbench --iterations 1 --path "${DATA_DIR}/hits_partitioned" --queries-path "${SCRIPT_DIR}/queries/clickbench/queries" --query $q +done