From c470bb19340f4f3fa00751c2ba6313425bc5edd7 Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Mon, 20 Apr 2026 17:27:29 -0700
Subject: [PATCH 01/12] chore: backport version from `branch-53`, update some
 dependencies (#21708)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #.

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 Cargo.lock                        | 548 +++++++++++++++++-------------
 Cargo.toml                        |  76 ++---
 docs/source/user-guide/configs.md |   2 +-
 3 files changed, 344 insertions(+), 282 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 37b18c3048f30..02da8661eedea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -221,7 +221,7 @@ dependencies = [
  "arrow-select",
  "arrow-string",
  "half",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
@@ -272,7 +272,7 @@ dependencies = [
  "flate2",
  "indexmap 2.14.0",
  "liblzma",
- "rand 0.9.2",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "snap",
@@ -638,9 +638,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-rs"
-version = "1.16.2"
+version = "1.16.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc"
+checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f"
 dependencies = [
  "aws-lc-sys",
  "zeroize",
@@ -648,9 +648,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-sys"
-version = "0.39.0"
+version = "0.40.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a"
+checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7"
 dependencies = [
  "cc",
  "cmake",
@@ -733,9 +733,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.101.0"
+version = "1.102.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a"
+checksum = "0fc35b7a14cabdad13795fbbbd26d5ddec0882c01492ceedf2af575aad5f37dd"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -769,11 +769,11 @@ dependencies = [
  "bytes",
  "form_urlencoded",
  "hex",
- "hmac",
+ "hmac 0.12.1",
  "http 0.2.12",
  "http 1.4.0",
  "percent-encoding",
- "sha2",
+ "sha2 0.10.9",
  "time",
  "tracing",
 ]
@@ -864,9 +864,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.10.3"
+version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "028999056d2d2fd58a697232f9eec4a643cf73a71cf327690a7edad1d2af2110"
+checksum = "0504b1ab12debb5959e5165ee5fe97dd387e7aa7ea6a477bfd7635dfe769a4f5"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -889,11 +889,12 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.11.6"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "876ab3c9c29791ba4ba02b780a3049e21ec63dabda09268b175272c3733a79e6"
+checksum = "b71a13df6ada0aafbf21a73bdfcdf9324cfa9df77d96b8446045be3cde61b42e"
 dependencies = [
  "aws-smithy-async",
+ "aws-smithy-runtime-api-macros",
  "aws-smithy-types",
  "bytes",
  "http 0.2.12",
@@ -904,6 +905,17 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "aws-smithy-runtime-api-macros"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "aws-smithy-types"
 version = "1.4.7"
@@ -952,9 +964,9 @@ dependencies = [
 
 [[package]]
 name = "axum"
-version = "0.8.8"
+version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
+checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90"
 dependencies = [
  "axum-core",
  "bytes",
@@ -1030,9 +1042,9 @@ dependencies = [
 
 [[package]]
 name = "bitflags"
-version = "2.11.0"
+version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
 
 [[package]]
 name = "blake2"
@@ -1101,7 +1113,7 @@ dependencies = [
  "log",
  "num",
  "pin-project-lite",
- "rand 0.9.2",
+ "rand 0.9.4",
  "rustls",
  "rustls-native-certs",
  "rustls-pki-types",
@@ -1225,9 +1237,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.2.57"
+version = "1.2.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
+checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20"
 dependencies = [
  "find-msvc-tools",
  "jobserver",
@@ -1255,7 +1267,7 @@ checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601"
 dependencies = [
  "cfg-if",
  "cpufeatures 0.3.0",
- "rand_core 0.10.0",
+ "rand_core 0.10.1",
 ]
 
 [[package]]
@@ -1311,9 +1323,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.6.0"
+version = "4.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1333,9 +1345,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.6.0"
+version = "4.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
+checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -1360,13 +1372,19 @@ dependencies = [
 
 [[package]]
 name = "cmake"
-version = "0.1.57"
+version = "0.1.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"
 dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "cmov"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746"
+
 [[package]]
 name = "colorchoice"
 version = "1.0.5"
@@ -1674,6 +1692,15 @@ version = "0.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a7ab264ea985f1bd27887d7b21ea2bb046728e05d11909ca138d700c494730db"
 
+[[package]]
+name = "ctutils"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e"
+dependencies = [
+ "cmov",
+]
+
 [[package]]
 name = "cty"
 version = "0.2.2"
@@ -1730,7 +1757,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1783,7 +1810,7 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "rand_distr",
  "recursive",
  "regex",
@@ -1802,7 +1829,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-benchmarks"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1818,7 +1845,7 @@ dependencies = [
  "mimalloc",
  "object_store",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "serde",
  "serde_json",
@@ -1829,7 +1856,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1852,7 +1879,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1874,7 +1901,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1905,7 +1932,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1923,7 +1950,7 @@ dependencies = [
  "log",
  "object_store",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "recursive",
  "sqlparser",
  "tokio",
@@ -1933,7 +1960,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "futures",
  "log",
@@ -1942,7 +1969,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-compression",
@@ -1969,7 +1996,7 @@ dependencies = [
  "log",
  "object_store",
  "parking_lot",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tempfile",
  "tokio",
  "tokio-util",
@@ -1979,7 +2006,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -2002,7 +2029,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-avro",
@@ -2019,7 +2046,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2040,7 +2067,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2062,7 +2089,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2095,11 +2122,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "53.0.0"
+version = "53.1.0"
 
 [[package]]
 name = "datafusion-examples"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-flight",
@@ -2123,7 +2150,7 @@ dependencies = [
  "nom",
  "object_store",
  "prost",
- "rand 0.9.2",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "strum",
@@ -2140,7 +2167,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2156,14 +2183,14 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tempfile",
  "url",
 ]
 
 [[package]]
 name = "datafusion-expr"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -2187,7 +2214,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2198,7 +2225,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-ffi"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -2233,7 +2260,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2254,19 +2281,19 @@ dependencies = [
  "hex",
  "itertools 0.14.0",
  "log",
- "md-5",
+ "md-5 0.10.6",
  "memchr",
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
- "sha2",
+ "sha2 0.10.9",
  "tokio",
  "uuid",
 ]
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "criterion",
@@ -2282,24 +2309,24 @@ dependencies = [
  "half",
  "log",
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "criterion",
  "datafusion-common",
  "datafusion-expr-common",
  "datafusion-physical-expr-common",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2319,12 +2346,12 @@ dependencies = [
  "itoa",
  "log",
  "memchr",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
 name = "datafusion-functions-table"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2337,7 +2364,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "criterion",
@@ -2353,7 +2380,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2361,7 +2388,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "datafusion-doc",
  "quote",
@@ -2370,7 +2397,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2397,7 +2424,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "criterion",
@@ -2414,7 +2441,7 @@ dependencies = [
  "itertools 0.14.0",
  "parking_lot",
  "petgraph",
- "rand 0.9.2",
+ "rand 0.9.4",
  "recursive",
  "rstest",
  "tokio",
@@ -2422,7 +2449,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2435,7 +2462,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -2446,12 +2473,12 @@ dependencies = [
  "indexmap 2.14.0",
  "itertools 0.14.0",
  "parking_lot",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2472,7 +2499,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "arrow-data",
@@ -2501,7 +2528,7 @@ dependencies = [
  "num-traits",
  "parking_lot",
  "pin-project-lite",
- "rand 0.9.2",
+ "rand 0.9.4",
  "rstest",
  "rstest_reuse",
  "tokio",
@@ -2509,7 +2536,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2539,7 +2566,7 @@ dependencies = [
  "pbjson 0.9.0",
  "pretty_assertions",
  "prost",
- "rand 0.9.2",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "tokio",
@@ -2547,7 +2574,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2559,7 +2586,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2577,7 +2604,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -2589,7 +2616,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-spark"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2607,16 +2634,16 @@ dependencies = [
  "log",
  "num-traits",
  "percent-encoding",
- "rand 0.9.2",
+ "rand 0.9.4",
  "serde_json",
  "sha1 0.11.0",
- "sha2",
+ "sha2 0.10.9",
  "url",
 ]
 
 [[package]]
 name = "datafusion-sql"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2641,7 +2668,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sqllogictest"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2673,7 +2700,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -2694,7 +2721,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-wasmtest"
-version = "53.0.0"
+version = "53.1.0"
 dependencies = [
  "bytes",
  "chrono",
@@ -2751,6 +2778,7 @@ dependencies = [
  "block-buffer 0.12.0",
  "const-oid",
  "crypto-common 0.2.1",
+ "ctutils",
 ]
 
 [[package]]
@@ -2881,9 +2909,9 @@ dependencies = [
 
 [[package]]
 name = "env_filter"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f"
+checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
 dependencies = [
  "log",
  "regex",
@@ -2948,18 +2976,18 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
 
 [[package]]
 name = "fastrand"
-version = "2.3.0"
+version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
 
 [[package]]
 name = "ferroid"
-version = "0.8.9"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb330bbd4cb7a5b9f559427f06f98a4f853a137c8298f3bd3f8ca57663e21986"
+checksum = "ee93edf3c501f0035bbeffeccfed0b79e14c311f12195ec0e661e114a0f60da4"
 dependencies = [
  "portable-atomic",
- "rand 0.9.2",
+ "rand 0.10.1",
  "web-time",
 ]
 
@@ -3244,7 +3272,7 @@ dependencies = [
  "cfg-if",
  "libc",
  "r-efi 6.0.0",
- "rand_core 0.10.0",
+ "rand_core 0.10.1",
  "wasip2",
  "wasip3",
 ]
@@ -3296,7 +3324,7 @@ dependencies = [
  "cfg-if",
  "crunchy",
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
  "rand_distr",
  "zerocopy",
 ]
@@ -3365,6 +3393,15 @@ dependencies = [
  "digest 0.10.7",
 ]
 
+[[package]]
+name = "hmac"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f"
+dependencies = [
+ "digest 0.11.2",
+]
+
 [[package]]
 name = "home"
 version = "0.5.12"
@@ -3449,18 +3486,18 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
 
 [[package]]
 name = "hybrid-array"
-version = "0.4.9"
+version = "0.4.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a79f2aff40c18ab8615ddc5caa9eb5b96314aef18fe5823090f204ad988e813"
+checksum = "3944cf8cf766b40e2a1a333ee5e9b563f854d5fa49d6a8ca2764e97c6eddb214"
 dependencies = [
  "typenum",
 ]
 
 [[package]]
 name = "hyper"
-version = "1.8.1"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -3473,7 +3510,6 @@ dependencies = [
  "httpdate",
  "itoa",
  "pin-project-lite",
- "pin-utils",
  "smallvec",
  "tokio",
  "want",
@@ -3496,16 +3532,15 @@ dependencies = [
 
 [[package]]
 name = "hyper-rustls"
-version = "0.27.7"
+version = "0.27.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
+checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f"
 dependencies = [
  "http 1.4.0",
  "hyper",
  "hyper-util",
  "rustls",
  "rustls-native-certs",
- "rustls-pki-types",
  "tokio",
  "tokio-rustls",
  "tower-service",
@@ -3588,12 +3623,13 @@ dependencies = [
 
 [[package]]
 name = "icu_collections"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
 dependencies = [
  "displaydoc",
  "potential_utf",
+ "utf8_iter",
  "yoke",
  "zerofrom",
  "zerovec",
@@ -3601,9 +3637,9 @@ dependencies = [
 
 [[package]]
 name = "icu_locale_core"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -3614,9 +3650,9 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
 dependencies = [
  "icu_collections",
  "icu_normalizer_data",
@@ -3628,15 +3664,15 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer_data"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
 
 [[package]]
 name = "icu_properties"
-version = "2.1.2"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
+checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
 dependencies = [
  "icu_collections",
  "icu_locale_core",
@@ -3648,15 +3684,15 @@ dependencies = [
 
 [[package]]
 name = "icu_properties_data"
-version = "2.1.2"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
+checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
 
 [[package]]
 name = "icu_provider"
-version = "2.1.1"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
 dependencies = [
  "displaydoc",
  "icu_locale_core",
@@ -3777,9 +3813,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
 
 [[package]]
 name = "iri-string"
-version = "0.7.10"
+version = "0.7.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
+checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20"
 dependencies = [
  "memchr",
  "serde",
@@ -3851,9 +3887,9 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.93"
+version = "0.3.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "797146bb2677299a1eb6b7b50a890f4c361b29ef967addf5b2fa45dae1bb6d7d"
+checksum = "2964e92d1d9dc3364cae4d718d93f227e3abb088e747d92e0395bfdedf1c12ca"
 dependencies = [
  "cfg-if",
  "futures-util",
@@ -3932,9 +3968,9 @@ dependencies = [
 
 [[package]]
 name = "libbz2-rs-sys"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
+checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f"
 
 [[package]]
 name = "libc"
@@ -3963,9 +3999,9 @@ dependencies = [
 
 [[package]]
 name = "liblzma-sys"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f2db66f3268487b5033077f266da6777d057949b8f93c8ad82e441df25e6186"
+checksum = "1a60851d15cd8c5346eca4ab8babff585be2ae4bc8097c067291d3ffe2add3b6"
 dependencies = [
  "cc",
  "libc",
@@ -3980,25 +4016,24 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
 
 [[package]]
 name = "libmimalloc-sys"
-version = "0.1.44"
+version = "0.1.46"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870"
+checksum = "bc89deee4af0429081d2a518c0431ae068222a5a262a3bc6ff4d8535ec2e02fe"
 dependencies = [
  "cc",
  "cty",
- "libc",
 ]
 
 [[package]]
 name = "libredox"
-version = "0.1.14"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
+checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c"
 dependencies = [
  "bitflags",
  "libc",
  "plain",
- "redox_syscall 0.7.3",
+ "redox_syscall 0.7.4",
 ]
 
 [[package]]
@@ -4027,9 +4062,9 @@ checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "litemap"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
 
 [[package]]
 name = "lock_api"
@@ -4077,6 +4112,16 @@ dependencies = [
  "digest 0.10.7",
 ]
 
+[[package]]
+name = "md-5"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98"
+dependencies = [
+ "cfg-if",
+ "digest 0.11.2",
+]
+
 [[package]]
 name = "memchr"
 version = "2.8.0"
@@ -4085,9 +4130,9 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
 [[package]]
 name = "mimalloc"
-version = "0.1.48"
+version = "0.1.49"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8"
+checksum = "aca3c01a711f395b4257b81674c0e90e8dd1f1e62c4b7db45f684cc7a4fcb18a"
 dependencies = [
  "libmimalloc-sys",
 ]
@@ -4218,9 +4263,9 @@ dependencies = [
 
 [[package]]
 name = "num-conv"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"
 
 [[package]]
 name = "num-integer"
@@ -4319,7 +4364,7 @@ dependencies = [
  "humantime",
  "hyper",
  "itertools 0.14.0",
- "md-5",
+ "md-5 0.10.6",
  "parking_lot",
  "percent-encoding",
  "quick-xml",
@@ -4639,9 +4684,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pkg-config"
-version = "0.3.32"
+version = "0.3.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
 
 [[package]]
 name = "plain"
@@ -4685,18 +4730,18 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
 
 [[package]]
 name = "portable-atomic-util"
-version = "0.2.6"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3"
+checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618"
 dependencies = [
  "portable-atomic",
 ]
 
 [[package]]
 name = "postgres-derive"
-version = "0.4.7"
+version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56df96f5394370d1b20e49de146f9e6c25aa9ae750f449c9d665eafecb3ccae6"
+checksum = "ca1dad89d9ffdbf78502fde418eeede499b87772d88be780478f7f76dc8d471f"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -4706,19 +4751,19 @@ dependencies = [
 
 [[package]]
 name = "postgres-protocol"
-version = "0.6.10"
+version = "0.6.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491"
+checksum = "56201207dac53e2f38e848e31b4b91616a6bb6e0c7205b77718994a7f49e70fc"
 dependencies = [
  "base64 0.22.1",
  "byteorder",
  "bytes",
  "fallible-iterator",
- "hmac",
- "md-5",
+ "hmac 0.13.0",
+ "md-5 0.11.0",
  "memchr",
- "rand 0.9.2",
- "sha2",
+ "rand 0.10.1",
+ "sha2 0.11.0",
  "stringprep",
 ]
 
@@ -4737,9 +4782,9 @@ dependencies = [
 
 [[package]]
 name = "potential_utf"
-version = "0.1.4"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
 dependencies = [
  "zerovec",
 ]
@@ -4906,7 +4951,7 @@ dependencies = [
  "bytes",
  "getrandom 0.3.4",
  "lru-slab",
- "rand 0.9.2",
+ "rand 0.9.4",
  "ring",
  "rustc-hash",
  "rustls",
@@ -4965,9 +5010,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.8.5"
+version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
 dependencies = [
  "libc",
  "rand_chacha 0.3.1",
@@ -4976,9 +5021,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.2"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.5",
@@ -4992,7 +5037,7 @@ checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
 dependencies = [
  "chacha20",
  "getrandom 0.4.2",
- "rand_core 0.10.0",
+ "rand_core 0.10.1",
 ]
 
 [[package]]
@@ -5035,9 +5080,9 @@ dependencies = [
 
 [[package]]
 name = "rand_core"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba"
+checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69"
 
 [[package]]
 name = "rand_distr"
@@ -5046,14 +5091,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
 dependencies = [
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
 name = "rayon"
-version = "1.11.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
 dependencies = [
  "either",
  "rayon-core",
@@ -5100,9 +5145,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.7.3"
+version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16"
+checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a"
 dependencies = [
  "bitflags",
 ]
@@ -5290,15 +5335,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
 dependencies = [
  "quote",
- "rand 0.8.5",
+ "rand 0.8.6",
  "syn 2.0.117",
 ]
 
 [[package]]
 name = "rustc-hash"
-version = "2.1.1"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
 
 [[package]]
 name = "rustc_version"
@@ -5324,9 +5369,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.37"
+version = "0.23.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
+checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21"
 dependencies = [
  "aws-lc-rs",
  "log",
@@ -5683,6 +5728,17 @@ dependencies = [
  "digest 0.10.7",
 ]
 
+[[package]]
+name = "sha2"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.3.0",
+ "digest 0.11.2",
+]
+
 [[package]]
 name = "sharded-slab"
 version = "0.1.7"
@@ -5710,9 +5766,9 @@ dependencies = [
 
 [[package]]
 name = "simd-adler32"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
 
 [[package]]
 name = "simdutf8"
@@ -5792,9 +5848,9 @@ dependencies = [
  "humantime",
  "itertools 0.13.0",
  "libtest-mimic",
- "md-5",
+ "md-5 0.10.6",
  "owo-colors",
- "rand 0.8.5",
+ "rand 0.8.6",
  "regex",
  "similar",
  "subst",
@@ -6022,14 +6078,14 @@ dependencies = [
  "chrono-tz",
  "datafusion-common",
  "env_logger",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
 name = "testcontainers"
-version = "0.27.2"
+version = "0.27.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bd36b06a2a6c0c3c81a83be1ab05fe86460d054d4d51bf513bc56b3e15bdc22"
+checksum = "bfd5785b5483672915ed5fe3cddf9f546802779fc1eceff0a6fb7321fac81c1e"
 dependencies = [
  "astral-tokio-tar",
  "async-trait",
@@ -6147,9 +6203,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -6182,9 +6238,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.52.0"
+version = "1.52.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a91135f59b1cbf38c91e73cf3386fca9bb77915c45ce2771460c9d92f0f3d776"
+checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6"
 dependencies = [
  "bytes",
  "libc",
@@ -6271,18 +6327,18 @@ dependencies = [
 
 [[package]]
 name = "toml_datetime"
-version = "1.0.1+spec-1.1.0"
+version = "1.1.1+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9"
+checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7"
 dependencies = [
  "serde_core",
 ]
 
 [[package]]
 name = "toml_edit"
-version = "0.25.5+spec-1.1.0"
+version = "0.25.11+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ca1a40644a28bce036923f6a431df0b34236949d111cc07cb6dca830c9ef2e1"
+checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b"
 dependencies = [
  "indexmap 2.14.0",
  "toml_datetime",
@@ -6292,9 +6348,9 @@ dependencies = [
 
 [[package]]
 name = "toml_parser"
-version = "1.0.10+spec-1.1.0"
+version = "1.1.2+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420"
+checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
 dependencies = [
  "winnow",
 ]
@@ -6480,15 +6536,15 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
 
 [[package]]
 name = "typenum"
-version = "1.19.0"
+version = "1.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
+checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
 
 [[package]]
 name = "typewit"
-version = "1.14.2"
+version = "1.15.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71"
+checksum = "214ca0b2191785cbc06209b9ca1861e048e39b5ba33574b3cedd58363d5bb5f6"
 
 [[package]]
 name = "typify"
@@ -6608,9 +6664,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "ureq"
-version = "3.2.1"
+version = "3.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ab5172ab0c2b6d01a9bb4f9332f7c1211193ea002742188040d09ea4eafe867"
+checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0"
 dependencies = [
  "base64 0.22.1",
  "log",
@@ -6623,9 +6679,9 @@ dependencies = [
 
 [[package]]
 name = "ureq-proto"
-version = "0.5.3"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f"
+checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c"
 dependencies = [
  "base64 0.22.1",
  "http 1.4.0",
@@ -6672,9 +6728,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.23.0"
+version = "1.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
+checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
 dependencies = [
  "getrandom 0.4.2",
  "js-sys",
@@ -6735,11 +6791,11 @@ dependencies = [
 
 [[package]]
 name = "wasip2"
-version = "1.0.2+wasi-0.2.9"
+version = "1.0.3+wasi-0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
 dependencies = [
- "wit-bindgen",
+ "wit-bindgen 0.57.1",
 ]
 
 [[package]]
@@ -6748,7 +6804,7 @@ version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
 dependencies = [
- "wit-bindgen",
+ "wit-bindgen 0.51.0",
 ]
 
 [[package]]
@@ -6762,9 +6818,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.116"
+version = "0.2.118"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dc0882f7b5bb01ae8c5215a1230832694481c1a4be062fd410e12ea3da5b631"
+checksum = "0bf938a0bacb0469e83c1e148908bd7d5a6010354cf4fb73279b7447422e3a89"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -6775,9 +6831,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.66"
+version = "0.4.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19280959e2844181895ef62f065c63e0ca07ece4771b53d89bfdb967d97cbf05"
+checksum = "f371d383f2fb139252e0bfac3b81b265689bf45b6874af544ffa4c975ac1ebf8"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -6785,9 +6841,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.116"
+version = "0.2.118"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75973d3066e01d035dbedaad2864c398df42f8dd7b1ea057c35b8407c015b537"
+checksum = "eeff24f84126c0ec2db7a449f0c2ec963c6a49efe0698c4242929da037ca28ed"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -6795,9 +6851,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.116"
+version = "0.2.118"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91af5e4be765819e0bcfee7322c14374dc821e35e72fa663a830bbc7dc199eac"
+checksum = "9d08065faf983b2b80a79fd87d8254c409281cf7de75fc4b773019824196c904"
 dependencies = [
  "bumpalo",
  "proc-macro2",
@@ -6808,18 +6864,18 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.116"
+version = "0.2.118"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9bf0406a78f02f336bf1e451799cca198e8acde4ffa278f0fb20487b150a633"
+checksum = "5fd04d9e306f1907bd13c6361b5c6bfc7b3b3c095ed3f8a9246390f8dbdee129"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "wasm-bindgen-test"
-version = "0.3.66"
+version = "0.3.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea88331fc76766356287e79bb0bc032157feea8eff8f2c3f1d9ea4b94255ae1c"
+checksum = "6bb55e2540ad1c56eec35fd63e2aea15f83b11ce487fd2de9ad11578dfc047ea"
 dependencies = [
  "async-trait",
  "cast",
@@ -6839,9 +6895,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-test-macro"
-version = "0.3.66"
+version = "0.3.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92437fa87f58743befb3003c4f4e3e9010dd50c6935561be7645981c0de05dfd"
+checksum = "caf0ca1bd612b988616bac1ab34c4e4290ef18f7148a1d8b7f31c150080e9295"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6850,9 +6906,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-test-shared"
-version = "0.2.116"
+version = "0.2.118"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10091e48e3231b0f567b098ddb9a107310eb2629ae0eaa7c98dd746d5e80ee78"
+checksum = "23cda5ecc67248c48d3e705d3e03e00af905769b78b9d2a1678b663b8b9d4472"
 
 [[package]]
 name = "wasm-encoder"
@@ -6903,9 +6959,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.93"
+version = "0.3.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "749466a37ee189057f54748b200186b59a03417a117267baf3fd89cecc9fb837"
+checksum = "4f2dfbb17949fa2088e5d39408c48368947b86f7834484e87b73de55bc14d97d"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -7242,9 +7298,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
 
 [[package]]
 name = "winnow"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8"
+checksum = "09dac053f1cd375980747450bfc7250c264eaae0583872e845c0c7cd578872b5"
 dependencies = [
  "memchr",
 ]
@@ -7258,6 +7314,12 @@ dependencies = [
  "wit-bindgen-rust-macro",
 ]
 
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
 [[package]]
 name = "wit-bindgen-core"
 version = "0.51.0"
@@ -7339,9 +7401,9 @@ dependencies = [
 
 [[package]]
 name = "writeable"
-version = "0.6.2"
+version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
 
 [[package]]
 name = "xattr"
@@ -7367,9 +7429,9 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
 
 [[package]]
 name = "yoke"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
 dependencies = [
  "stable_deref_trait",
  "yoke-derive",
@@ -7378,9 +7440,9 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -7390,18 +7452,18 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.47"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87"
+checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.47"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89"
+checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -7410,18 +7472,18 @@ dependencies = [
 
 [[package]]
 name = "zerofrom"
-version = "0.1.6"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df"
 dependencies = [
  "zerofrom-derive",
 ]
 
 [[package]]
 name = "zerofrom-derive"
-version = "0.1.6"
+version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -7437,9 +7499,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
 
 [[package]]
 name = "zerotrie"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
 dependencies = [
  "displaydoc",
  "yoke",
@@ -7448,9 +7510,9 @@ dependencies = [
 
 [[package]]
 name = "zerovec"
-version = "0.11.5"
+version = "0.11.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -7459,9 +7521,9 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index 8d90a11858a45..59707ba8e3f27 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion"
 # Define Minimum Supported Rust Version (MSRV)
 rust-version = "1.88.0"
 # Define DataFusion version
-version = "53.0.0"
+version = "53.1.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -117,43 +117,43 @@ chrono = { version = "0.4.44", default-features = false }
 criterion = "0.8"
 ctor = "0.10.0"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "53.0.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "53.0.0" }
-datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "53.0.0" }
-datafusion-common = { path = "datafusion/common", version = "53.0.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "53.0.0" }
-datafusion-datasource = { path = "datafusion/datasource", version = "53.0.0", default-features = false }
-datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "53.0.0", default-features = false }
-datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "53.0.0", default-features = false }
-datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "53.0.0", default-features = false }
-datafusion-datasource-json = { path = "datafusion/datasource-json", version = "53.0.0", default-features = false }
-datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "53.0.0", default-features = false }
-datafusion-doc = { path = "datafusion/doc", version = "53.0.0" }
-datafusion-execution = { path = "datafusion/execution", version = "53.0.0", default-features = false }
-datafusion-expr = { path = "datafusion/expr", version = "53.0.0", default-features = false }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "53.0.0" }
-datafusion-ffi = { path = "datafusion/ffi", version = "53.0.0" }
-datafusion-functions = { path = "datafusion/functions", version = "53.0.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "53.0.0" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "53.0.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "53.0.0", default-features = false }
-datafusion-functions-table = { path = "datafusion/functions-table", version = "53.0.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "53.0.0" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "53.0.0" }
-datafusion-macros = { path = "datafusion/macros", version = "53.0.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "53.0.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "53.0.0", default-features = false }
-datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "53.0.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "53.0.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "53.0.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "53.0.0" }
-datafusion-proto = { path = "datafusion/proto", version = "53.0.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "53.0.0" }
-datafusion-pruning = { path = "datafusion/pruning", version = "53.0.0" }
-datafusion-session = { path = "datafusion/session", version = "53.0.0" }
-datafusion-spark = { path = "datafusion/spark", version = "53.0.0" }
-datafusion-sql = { path = "datafusion/sql", version = "53.0.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "53.0.0" }
+datafusion = { path = "datafusion/core", version = "53.1.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "53.1.0" }
+datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "53.1.0" }
+datafusion-common = { path = "datafusion/common", version = "53.1.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "53.1.0" }
+datafusion-datasource = { path = "datafusion/datasource", version = "53.1.0", default-features = false }
+datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "53.1.0", default-features = false }
+datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "53.1.0", default-features = false }
+datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "53.1.0", default-features = false }
+datafusion-datasource-json = { path = "datafusion/datasource-json", version = "53.1.0", default-features = false }
+datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "53.1.0", default-features = false }
+datafusion-doc = { path = "datafusion/doc", version = "53.1.0" }
+datafusion-execution = { path = "datafusion/execution", version = "53.1.0", default-features = false }
+datafusion-expr = { path = "datafusion/expr", version = "53.1.0", default-features = false }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "53.1.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "53.1.0" }
+datafusion-functions = { path = "datafusion/functions", version = "53.1.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "53.1.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "53.1.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "53.1.0", default-features = false }
+datafusion-functions-table = { path = "datafusion/functions-table", version = "53.1.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "53.1.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "53.1.0" }
+datafusion-macros = { path = "datafusion/macros", version = "53.1.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "53.1.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "53.1.0", default-features = false }
+datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "53.1.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "53.1.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "53.1.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "53.1.0" }
+datafusion-proto = { path = "datafusion/proto", version = "53.1.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "53.1.0" }
+datafusion-pruning = { path = "datafusion/pruning", version = "53.1.0" }
+datafusion-session = { path = "datafusion/session", version = "53.1.0" }
+datafusion-spark = { path = "datafusion/spark", version = "53.1.0" }
+datafusion-sql = { path = "datafusion/sql", version = "53.1.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "53.1.0" }
 
 doc-comment = "0.3"
 env_logger = "0.11"
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 49c9eea29ef73..46039f3c99c27 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -101,7 +101,7 @@ The following configuration settings are available:
 | datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
-| datafusion.execution.parquet.created_by                                 | datafusion version 53.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.parquet.created_by                                 | datafusion version 53.1.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.execution.parquet.statistics_truncate_length                 | 64                        | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |

From e524f4964344e949e232e08d6e51710685d50871 Mon Sep 17 00:00:00 2001
From: Harrison Crosse <harrison@crosse.dev>
Date: Tue, 21 Apr 2026 01:35:47 -0400
Subject: [PATCH 02/12] fix: array_concat widens container variant for mixed
 List/LargeList inputs (#21704)

## Which issue does this PR close?

- Closes #21702.

## Rationale for this change

`array_concat` hit an internal cast error when given a mix of `List` and
`LargeList` (or `FixedSizeList` and `LargeList`) arguments:

```sql
> select array_concat(make_array(1, 2), arrow_cast([3, 4], 'LargeList(Int64)'));
DataFusion error: Internal error: could not cast array of type List(Int64) to arrow_array::array::list_array::GenericListArray<i64>.
```

`ArrayConcat::coerce_types` was coercing only the base element type,
leaving the outer container alone. When the resolved return type is
`LargeList`, `array_concat_inner` later tries to downcast each arg to
`GenericListArray<i64>`, which fails for any `List` argument that
slipped through.

## What changes are included in this PR?

In `ArrayConcat::coerce_types`, after coercing the base type, also
promote each input's outermost `List` to `LargeList` when the return
type is a `LargeList`. `FixedSizeList` inputs already go through
`FixedSizedListToList` first and then get promoted too. Per-arg
dimensionality is preserved, so nested cases keep working with
`align_array_dimensions`.

## Are these changes tested?

Yes, added sqllogictests in `array_concat.slt` covering:
- `List` + `LargeList`
- `LargeList` + `List`
- `FixedSizeList` + `LargeList`
- Three-way mix `List`, `LargeList`, `List`

Each one also asserts `arrow_typeof(...) = LargeList(Int64)`.

## Are there any user-facing changes?

Queries that previously returned an internal cast error now return the
concatenated `LargeList` as expected. No API changes.
---
 datafusion/functions-nested/src/concat.rs     | 17 ++++++++--
 .../test_files/array/array_concat.slt         | 32 +++++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs
index c3dc4c67cf12c..8d06140889a55 100644
--- a/datafusion/functions-nested/src/concat.rs
+++ b/datafusion/functions-nested/src/concat.rs
@@ -317,10 +317,23 @@ impl ScalarUDFImpl for ArrayConcat {
     }
 
     fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let base_type = base_type(&self.return_type(arg_types)?);
+        let return_type = self.return_type(arg_types)?;
+        let base_type = base_type(&return_type);
         let coercion = Some(&ListCoercion::FixedSizedListToList);
+        // When the return type is a `LargeList`, the outer container of every
+        // input must be widened to `LargeList` as well. Otherwise
+        // `array_concat_inner` would later try to downcast a `List` argument
+        // to `GenericListArray<i64>` and fail.
+        let promote_to_large_list = matches!(return_type, DataType::LargeList(_));
         let arg_types = arg_types.iter().map(|arg_type| {
-            coerced_type_with_base_type_only(arg_type, &base_type, coercion)
+            let coerced =
+                coerced_type_with_base_type_only(arg_type, &base_type, coercion);
+            match coerced {
+                DataType::List(field) if promote_to_large_list => {
+                    DataType::LargeList(field)
+                }
+                other => other,
+            }
         });
 
         Ok(arg_types.collect())
diff --git a/datafusion/sqllogictest/test_files/array/array_concat.slt b/datafusion/sqllogictest/test_files/array/array_concat.slt
index 0f847811615c7..168b307a1e636 100644
--- a/datafusion/sqllogictest/test_files/array/array_concat.slt
+++ b/datafusion/sqllogictest/test_files/array/array_concat.slt
@@ -121,6 +121,38 @@ select
 ----
 [1, 2, 3] List(Utf8View)
 
+# Concatenating mixed list and large list — return type widens to LargeList
+query ?T
+select
+    array_concat(make_array(1, 2), arrow_cast([3, 4], 'LargeList(Int64)')),
+    arrow_typeof(array_concat(make_array(1, 2), arrow_cast([3, 4], 'LargeList(Int64)')));
+----
+[1, 2, 3, 4] LargeList(Int64)
+
+# Reverse argument order: LargeList first, plain list second
+query ?T
+select
+    array_concat(arrow_cast([1, 2], 'LargeList(Int64)'), make_array(3, 4)),
+    arrow_typeof(array_concat(arrow_cast([1, 2], 'LargeList(Int64)'), make_array(3, 4)));
+----
+[1, 2, 3, 4] LargeList(Int64)
+
+# FixedSizeList combined with LargeList — also widens to LargeList
+query ?T
+select
+    array_concat(arrow_cast([1, 2], 'FixedSizeList(2, Int64)'), arrow_cast([3, 4], 'LargeList(Int64)')),
+    arrow_typeof(array_concat(arrow_cast([1, 2], 'FixedSizeList(2, Int64)'), arrow_cast([3, 4], 'LargeList(Int64)')));
+----
+[1, 2, 3, 4] LargeList(Int64)
+
+# Three-way mix: List, LargeList, List
+query ?T
+select
+    array_concat(make_array(1, 2), arrow_cast([3], 'LargeList(Int64)'), make_array(4, 5)),
+    arrow_typeof(array_concat(make_array(1, 2), arrow_cast([3], 'LargeList(Int64)'), make_array(4, 5)));
+----
+[1, 2, 3, 4, 5] LargeList(Int64)
+
 # array_concat with NULL elements inside arrays
 query ?
 select array_concat([1, NULL, 3], [NULL, 5]);

From 9b5e43e5a8c7db5122ef0df4ca8020daba7f8dd8 Mon Sep 17 00:00:00 2001
From: Eren Avsarogullari <eren@apache.org>
Date: Mon, 20 Apr 2026 22:43:37 -0700
Subject: [PATCH 03/12] feat: Expose used `MemoryPool` details in
 `ResourcesExhausted` error messages (#20387)

## Which issue does this PR close?
- Closes #20386.

## Rationale for this change
`memory_limit` (`RuntimeEnvBuilder::new().with_memory_limit()`)
configuration uses `greedy` memory pool as `default`. However, if
`memory_pool` (`RuntimeEnvBuilder::new().with_memory_pool()`) is set, it
overrides by expected `memory_pool` config such as `fair`. Also, if both
`memory_limit` and `memory_pool` configs are not set, `unbounded` memory
pool will be used so it can be useful to expose `ultimately
used/selected pool` as part of `ResourcesExhausted` error message for
the end user awareness and the user may need to switch used memory pool
(`greedy`, `fair`, `unbounded`),
- Also, [this comparison
table](https://github.com/lance-format/lance/issues/3601#issuecomment-2752838168)
is an example use-case for both `greedy` and `fair` memory pools runtime
behaviors and this addition can help for this kind of comparison table
by exposing used memory pool info as part of native logs.

Please find following example use-cases by `datafusion-cli`:
**Case1**: datafusion-cli result when `memory-limit` and
`top-memory-consumers > 0` are set:
```
eren.avsarogullari@AWGNPWVK961 debug % ./datafusion-cli --memory-limit 10M --command 'select * from generate_series(1,500000) as t1(v1) order by v1;' --top-memory-consumers 3

DataFusion CLI v53.0.0
Error: Not enough memory to continue external sort. Consider increasing the memory limit config: 'datafusion.runtime.memory_limit', or decreasing the config: 'datafusion.execution.sort_spill_reservation_bytes'.
caused by
Resources exhausted: Additional allocation failed for ExternalSorter[0] with top memory consumers (across reservations) as:
  ExternalSorterMerge[0]#2(can spill: false) consumed 10.0 MB, peak 10.0 MB,
  DataFusion-Cli#0(can spill: false) consumed 0.0 B, peak 0.0 B,
  ExternalSorter[0]#1(can spill: true) consumed 0.0 B, peak 0.0 B.
Error: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: greedy(used: 10.0 MB, pool_size: 10.0 MB)
```
**Case2**: datafusion-cli result when `memory-limit` and
`top-memory-consumers = 0` (disabling top memory consumers logging) are
set:
```
eren.avsarogullari@AWGNPWVK961 debug % ./datafusion-cli --memory-limit 10M --command 'select * from generate_series(1,500000) as t1(v1) order by v1;' --top-memory-consumers 0

DataFusion CLI v53.0.0
Error: Not enough memory to continue external sort. Consider increasing the memory limit config: 'datafusion.runtime.memory_limit', or decreasing the config: 'datafusion.execution.sort_spill_reservation_bytes'.
caused by
Resources exhausted: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: greedy(used: 10.0 MB, pool_size: 10.0 MB)
```
**Case3**: datafusion-cli result when only `memory-limit`, `memory-pool`
and `top-memory-consumers > 0` are set:
```
eren.avsarogullari@AWGNPWVK961 debug % ./datafusion-cli --memory-limit 10M --mem-pool-type fair --top-memory-consumers 3 --command 'select * from generate_series(1,500000) as t1(v1) order by v1;'

DataFusion CLI v53.0.0
Error: Not enough memory to continue external sort. Consider increasing the memory limit config: 'datafusion.runtime.memory_limit', or decreasing the config: 'datafusion.execution.sort_spill_reservation_bytes'.
caused by
Resources exhausted: Additional allocation failed for ExternalSorter[0] with top memory consumers (across reservations) as:
  ExternalSorterMerge[0]#2(can spill: false) consumed 10.0 MB, peak 10.0 MB,
  ExternalSorter[0]#1(can spill: true) consumed 0.0 B, peak 0.0 B,
  DataFusion-Cli#0(can spill: false) consumed 0.0 B, peak 0.0 B.
Error: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: fair(pool_size: 10.0 MB)
```

## What changes are included in this PR?
- Adding name property to MemoryPool instances,
- Expose used MemoryPool info to Resources Exhausted error messages

## Are these changes tested?
Yes and updating existing test cases.

## Are there any user-facing changes?
Yes, being updated Resources Exhausted error messages.
---
 datafusion-cli/tests/cli_integration.rs       |   4 +-
 .../cli_top_memory_consumers@no_track.snap    |   2 +-
 .../cli_top_memory_consumers@top2.snap        |   2 +-
 ...cli_top_memory_consumers@top3_default.snap |   2 +-
 ...consumers_with_mem_pool_type@no_track.snap |   2 +-
 ...ory_consumers_with_mem_pool_type@top2.snap |   2 +-
 .../memory_pool_tracking.rs                   |   3 +-
 datafusion/execution/src/memory_pool/mod.rs   |   8 +-
 datafusion/execution/src/memory_pool/pool.rs  | 304 +++++++++++++-----
 9 files changed, 243 insertions(+), 86 deletions(-)

diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
index 3cecba75e21b0..be4a2ad4fe197 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -261,11 +261,11 @@ fn bind_to_settings(snapshot_name: &str) -> SettingsBindDropGuard {
         "Consumer(can spill: bool) consumed XB, peak XB",
     );
     settings.add_filter(
-        r"Error: Failed to allocate additional .*? for .*? with .*? already allocated for this reservation - .*? remain available for the total pool",
+        r"Error: Failed to allocate additional .*? for .*? with .*? already allocated for this reservation - .*? remain available for the total memory pool: '.*?'",
         "Error: Failed to allocate ",
     );
     settings.add_filter(
-        r"Resources exhausted: Failed to allocate additional .*? for .*? with .*? already allocated for this reservation - .*? remain available for the total pool",
+        r"Resources exhausted: Failed to allocate additional .*? for .*? with .*? already allocated for this reservation - .*? remain available for the total memory pool: '.*?'",
         "Resources exhausted: Failed to allocate",
     );
 
diff --git a/datafusion-cli/tests/snapshots/cli_top_memory_consumers@no_track.snap b/datafusion-cli/tests/snapshots/cli_top_memory_consumers@no_track.snap
index fe454595eb4bc..c34e1202f55da 100644
--- a/datafusion-cli/tests/snapshots/cli_top_memory_consumers@no_track.snap
+++ b/datafusion-cli/tests/snapshots/cli_top_memory_consumers@no_track.snap
@@ -16,6 +16,6 @@ exit_code: 1
 [CLI_VERSION]
 Error: Not enough memory to continue external sort. Consider increasing the memory limit config: 'datafusion.runtime.memory_limit', or decreasing the config: 'datafusion.execution.sort_spill_reservation_bytes'.
 caused by
-Resources exhausted: Failed to allocate
+Resources exhausted: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: greedy(used: 10.0 MB, pool_size: 10.0 MB)
 
 ----- stderr -----
diff --git a/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top2.snap b/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top2.snap
index bb30e387166bc..ebf7a540d8d44 100644
--- a/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top2.snap
+++ b/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top2.snap
@@ -19,6 +19,6 @@ caused by
 Resources exhausted: Additional allocation failed for ExternalSorter[0] with top memory consumers (across reservations) as:
   Consumer(can spill: bool) consumed XB, peak XB,
   Consumer(can spill: bool) consumed XB, peak XB.
-Error: Failed to allocate 
+Error: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: greedy(used: 10.0 MB, pool_size: 10.0 MB)
 
 ----- stderr -----
diff --git a/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top3_default.snap b/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top3_default.snap
index 891d72e3cc639..9e279ca93ddcd 100644
--- a/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top3_default.snap
+++ b/datafusion-cli/tests/snapshots/cli_top_memory_consumers@top3_default.snap
@@ -18,6 +18,6 @@ Resources exhausted: Additional allocation failed for ExternalSorter[0] with top
   Consumer(can spill: bool) consumed XB, peak XB,
   Consumer(can spill: bool) consumed XB, peak XB,
   Consumer(can spill: bool) consumed XB, peak XB.
-Error: Failed to allocate 
+Error: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: greedy(used: 10.0 MB, pool_size: 10.0 MB)
 
 ----- stderr -----
diff --git a/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@no_track.snap b/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@no_track.snap
index 25267ea1617e5..9a228fcfb6e93 100644
--- a/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@no_track.snap
+++ b/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@no_track.snap
@@ -18,6 +18,6 @@ exit_code: 1
 [CLI_VERSION]
 Error: Not enough memory to continue external sort. Consider increasing the memory limit config: 'datafusion.runtime.memory_limit', or decreasing the config: 'datafusion.execution.sort_spill_reservation_bytes'.
 caused by
-Resources exhausted: Failed to allocate
+Resources exhausted: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: fair(pool_size: 10.0 MB)
 
 ----- stderr -----
diff --git a/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@top2.snap b/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@top2.snap
index 6515050047107..d7f964a339313 100644
--- a/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@top2.snap
+++ b/datafusion-cli/tests/snapshots/cli_top_memory_consumers_with_mem_pool_type@top2.snap
@@ -21,6 +21,6 @@ caused by
 Resources exhausted: Additional allocation failed for ExternalSorter[0] with top memory consumers (across reservations) as:
   Consumer(can spill: bool) consumed XB, peak XB,
   Consumer(can spill: bool) consumed XB, peak XB.
-Error: Failed to allocate 
+Error: Failed to allocate additional 128.0 KB for ExternalSorter[0] with 0.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: fair(pool_size: 10.0 MB)
 
 ----- stderr -----
diff --git a/datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs b/datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs
index af3031c690fa3..d849a033bc66b 100644
--- a/datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs
+++ b/datafusion-examples/examples/execution_monitoring/memory_pool_tracking.rs
@@ -119,7 +119,8 @@ async fn automatic_usage_example() -> Result<()> {
                     ExternalSorter[1]#93(can spill: true) consumed 69.0 KB, peak 69.0 KB,
                     ExternalSorter[13]#155(can spill: true) consumed 67.6 KB, peak 67.6 KB,
                     ExternalSorter[8]#140(can spill: true) consumed 67.2 KB, peak 67.2 KB.
-                Error: Failed to allocate additional 10.0 MB for ExternalSorterMerge[0] with 0.0 B already allocated for this reservation - 7.1 MB remain available for the total pool
+                Error: Failed to allocate additional 10.0 MB for ExternalSorterMerge[0] with 0.0 B already allocated
+                for this reservation - 7.1 MB remain available for the total memory pool
              */
         }
     }
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index a544cdfdb02e8..829e313d2381e 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -19,6 +19,7 @@
 //! help with allocation accounting.
 
 use datafusion_common::{Result, internal_datafusion_err};
+use std::fmt::Display;
 use std::hash::{Hash, Hasher};
 use std::{cmp::Ordering, sync::Arc, sync::atomic};
 
@@ -181,7 +182,10 @@ pub use pool::*;
 ///
 /// * [`TrackConsumersPool`]: Wraps another [`MemoryPool`] and tracks consumers,
 ///   providing better error messages on the largest memory users.
-pub trait MemoryPool: Send + Sync + std::fmt::Debug {
+pub trait MemoryPool: Send + Sync + std::fmt::Debug + Display {
+    /// Return pool name
+    fn name(&self) -> &str;
+
     /// Registers a new [`MemoryConsumer`]
     ///
     /// Note: Subsequent calls to [`Self::grow`] must be made to reserve memory
@@ -232,7 +236,7 @@ pub enum MemoryLimit {
 /// [`MemoryReservation`] in a [`MemoryPool`]. All allocations are registered to
 /// a particular `MemoryConsumer`;
 ///
-/// Each `MemoryConsumer` is identifiable by a process-unique id, and is therefor not cloneable,
+/// Each `MemoryConsumer` is identifiable by a process-unique id, and is therefore not cloneable,
 /// If you want a clone of a `MemoryConsumer`, you should look into [`MemoryConsumer::clone_with_new_id`],
 /// but note that this `MemoryConsumer` may be treated as a separate entity based on the used pool,
 /// and is only guaranteed to share the name and inner properties.
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index 19aaa0371ada3..aac95b9d6a81f 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -22,6 +22,7 @@ use datafusion_common::HashMap;
 use datafusion_common::{DataFusionError, Result, resources_datafusion_err};
 use log::debug;
 use parking_lot::Mutex;
+use std::fmt::{Display, Formatter};
 use std::{
     num::NonZeroUsize,
     sync::atomic::{AtomicUsize, Ordering},
@@ -34,6 +35,10 @@ pub struct UnboundedMemoryPool {
 }
 
 impl MemoryPool for UnboundedMemoryPool {
+    fn name(&self) -> &str {
+        "unbounded"
+    }
+
     fn grow(&self, _reservation: &MemoryReservation, additional: usize) {
         self.used.fetch_add(additional, Ordering::Relaxed);
     }
@@ -56,6 +61,13 @@ impl MemoryPool for UnboundedMemoryPool {
     }
 }
 
+impl Display for UnboundedMemoryPool {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let used = self.used.load(Ordering::Relaxed);
+        write!(f, "{}(used: {})", &self.name(), human_readable_size(used))
+    }
+}
+
 /// A [`MemoryPool`] that implements a greedy first-come first-serve limit.
 ///
 /// This pool works well for queries that do not need to spill or have
@@ -79,6 +91,10 @@ impl GreedyMemoryPool {
 }
 
 impl MemoryPool for GreedyMemoryPool {
+    fn name(&self) -> &str {
+        "greedy"
+    }
+
     fn grow(&self, _reservation: &MemoryReservation, additional: usize) {
         self.used.fetch_add(additional, Ordering::Relaxed);
     }
@@ -98,6 +114,7 @@ impl MemoryPool for GreedyMemoryPool {
                     reservation,
                     additional,
                     self.pool_size.saturating_sub(used),
+                    self,
                 )
             })?;
         Ok(())
@@ -112,6 +129,19 @@ impl MemoryPool for GreedyMemoryPool {
     }
 }
 
+impl Display for GreedyMemoryPool {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let used = self.used.load(Ordering::Relaxed);
+        write!(
+            f,
+            "{}(used: {}, pool_size: {})",
+            &self.name(),
+            human_readable_size(used),
+            human_readable_size(self.pool_size)
+        )
+    }
+}
+
 /// A [`MemoryPool`] that prevents spillable reservations from using more than
 /// an even fraction of the available memory sans any unspillable reservations
 /// (i.e. `(pool_size - unspillable_memory) / num_spillable_reservations`)
@@ -170,6 +200,10 @@ impl FairSpillPool {
 }
 
 impl MemoryPool for FairSpillPool {
+    fn name(&self) -> &str {
+        "fair"
+    }
+
     fn register(&self, consumer: &MemoryConsumer) {
         if consumer.can_spill {
             self.state.lock().num_spill += 1;
@@ -217,6 +251,7 @@ impl MemoryPool for FairSpillPool {
                         reservation,
                         additional,
                         available,
+                        self,
                     ));
                 }
                 state.spillable += additional;
@@ -231,6 +266,7 @@ impl MemoryPool for FairSpillPool {
                         reservation,
                         additional,
                         available,
+                        self,
                     ));
                 }
                 state.unspillable += additional;
@@ -249,6 +285,17 @@ impl MemoryPool for FairSpillPool {
     }
 }
 
+impl Display for FairSpillPool {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}(pool_size: {})",
+            &self.name(),
+            human_readable_size(self.pool_size),
+        )
+    }
+}
+
 /// Constructs a resources error based upon the individual [`MemoryReservation`].
 ///
 /// The error references the `bytes already allocated` for the reservation,
@@ -259,13 +306,15 @@ fn insufficient_capacity_err(
     reservation: &MemoryReservation,
     additional: usize,
     available: usize,
+    pool: &impl MemoryPool,
 ) -> DataFusionError {
     resources_datafusion_err!(
-        "Failed to allocate additional {} for {} with {} already allocated for this reservation - {} remain available for the total pool",
+        "Failed to allocate additional {} for {} with {} already allocated for this reservation - {} remain available for the total memory pool: {}",
         human_readable_size(additional),
         reservation.registration.consumer.name,
         human_readable_size(reservation.size()),
-        human_readable_size(available)
+        human_readable_size(available),
+        pool
     )
 }
 
@@ -362,6 +411,18 @@ pub struct TrackConsumersPool<I> {
     tracked_consumers: Mutex<HashMap<usize, TrackedConsumer>>,
 }
 
+impl<I: MemoryPool> Display for TrackConsumersPool<I> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}(inner_pool: {}, num_of_top_consumers: {})",
+            &self.name(),
+            &self.inner,
+            &self.top,
+        )
+    }
+}
+
 impl<I: MemoryPool> TrackConsumersPool<I> {
     /// Creates a new [`TrackConsumersPool`].
     ///
@@ -407,6 +468,11 @@ impl<I: MemoryPool> TrackConsumersPool<I> {
         }
     }
 
+    /// Returns a reference to the wrapped inner [`MemoryPool`].
+    pub fn inner(&self) -> &I {
+        &self.inner
+    }
+
     /// Returns a snapshot of all currently tracked consumers.
     pub fn metrics(&self) -> Vec<MemoryConsumerMetrics> {
         self.tracked_consumers
@@ -452,6 +518,10 @@ impl<I: MemoryPool> TrackConsumersPool<I> {
 }
 
 impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
+    fn name(&self) -> &str {
+        "track_consumers"
+    }
+
     fn register(&self, consumer: &MemoryConsumer) {
         self.inner.register(consumer);
 
@@ -545,7 +615,7 @@ fn provide_top_memory_consumers_to_error_msg(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use insta::{Settings, allow_duplicates, assert_snapshot};
+    use insta::{Settings, allow_duplicates, assert_snapshot, with_settings};
     use std::sync::Arc;
 
     fn make_settings() -> Settings {
@@ -575,10 +645,10 @@ mod tests {
         assert_eq!(pool.reserved(), 4000);
 
         let err = r2.try_grow(1).unwrap_err().strip_backtrace();
-        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 1.0 B for r2 with 2000.0 B already allocated for this reservation - 0.0 B remain available for the total pool");
+        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 1.0 B for r2 with 2000.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: fair(pool_size: 100.0 B)");
 
         let err = r2.try_grow(1).unwrap_err().strip_backtrace();
-        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 1.0 B for r2 with 2000.0 B already allocated for this reservation - 0.0 B remain available for the total pool");
+        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 1.0 B for r2 with 2000.0 B already allocated for this reservation - 0.0 B remain available for the total memory pool: fair(pool_size: 100.0 B)");
 
         r1.shrink(1990);
         r2.shrink(2000);
@@ -603,12 +673,12 @@ mod tests {
             .register(&pool);
 
         let err = r3.try_grow(70).unwrap_err().strip_backtrace();
-        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 70.0 B for r3 with 0.0 B already allocated for this reservation - 40.0 B remain available for the total pool");
+        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 70.0 B for r3 with 0.0 B already allocated for this reservation - 40.0 B remain available for the total memory pool: fair(pool_size: 100.0 B)");
 
         //Shrinking r2 to zero doesn't allow a3 to allocate more than 45
         r2.free();
         let err = r3.try_grow(70).unwrap_err().strip_backtrace();
-        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 70.0 B for r3 with 0.0 B already allocated for this reservation - 40.0 B remain available for the total pool");
+        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 70.0 B for r3 with 0.0 B already allocated for this reservation - 40.0 B remain available for the total memory pool: fair(pool_size: 100.0 B)");
 
         // But dropping r2 does
         drop(r2);
@@ -621,7 +691,7 @@ mod tests {
 
         let r4 = MemoryConsumer::new("s4").register(&pool);
         let err = r4.try_grow(30).unwrap_err().strip_backtrace();
-        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 30.0 B for s4 with 0.0 B already allocated for this reservation - 20.0 B remain available for the total pool");
+        assert_snapshot!(err, @"Resources exhausted: Failed to allocate additional 30.0 B for s4 with 0.0 B already allocated for this reservation - 20.0 B remain available for the total memory pool: fair(pool_size: 100.0 B)");
     }
 
     #[test]
@@ -669,7 +739,7 @@ mod tests {
           r1#[ID](can spill: false) consumed 50.0 B, peak 70.0 B,
           r3#[ID](can spill: false) consumed 20.0 B, peak 25.0 B,
           r2#[ID](can spill: false) consumed 15.0 B, peak 15.0 B.
-        Error: Failed to allocate additional 150.0 B for r5 with 0.0 B already allocated for this reservation - 5.0 B remain available for the total pool
+        Error: Failed to allocate additional 150.0 B for r5 with 0.0 B already allocated for this reservation - 5.0 B remain available for the total memory pool: greedy(used: 95.0 B, pool_size: 100.0 B)
         ");
     }
 
@@ -692,7 +762,7 @@ mod tests {
         assert_snapshot!(error, @r"
         Resources exhausted: Additional allocation failed for foo with top memory consumers (across reservations) as:
           foo#[ID](can spill: false) consumed 0.0 B, peak 0.0 B.
-        Error: Failed to allocate additional 150.0 B for foo with 0.0 B already allocated for this reservation - 100.0 B remain available for the total pool
+        Error: Failed to allocate additional 150.0 B for foo with 0.0 B already allocated for this reservation - 100.0 B remain available for the total memory pool: greedy(used: 0.0 B, pool_size: 100.0 B)
         ");
 
         // API: multiple registrations using the same hashed consumer,
@@ -710,7 +780,7 @@ mod tests {
         Resources exhausted: Additional allocation failed for foo with top memory consumers (across reservations) as:
           foo#[ID](can spill: false) consumed 10.0 B, peak 10.0 B,
           foo#[ID](can spill: false) consumed 0.0 B, peak 0.0 B.
-        Error: Failed to allocate additional 150.0 B for foo with 0.0 B already allocated for this reservation - 90.0 B remain available for the total pool
+        Error: Failed to allocate additional 150.0 B for foo with 0.0 B already allocated for this reservation - 90.0 B remain available for the total memory pool: greedy(used: 10.0 B, pool_size: 100.0 B)
         ");
 
         // Test: will accumulate size changes per consumer, not per reservation
@@ -723,7 +793,7 @@ mod tests {
         Resources exhausted: Additional allocation failed for foo with top memory consumers (across reservations) as:
           foo#[ID](can spill: false) consumed 20.0 B, peak 20.0 B,
           foo#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-        Error: Failed to allocate additional 150.0 B for foo with 20.0 B already allocated for this reservation - 70.0 B remain available for the total pool
+        Error: Failed to allocate additional 150.0 B for foo with 20.0 B already allocated for this reservation - 70.0 B remain available for the total memory pool: greedy(used: 30.0 B, pool_size: 100.0 B)
         ");
 
         // Test: different hashed consumer, (even with the same name),
@@ -739,78 +809,86 @@ mod tests {
           foo#[ID](can spill: false) consumed 20.0 B, peak 20.0 B,
           foo#[ID](can spill: false) consumed 10.0 B, peak 10.0 B,
           foo#[ID](can spill: true) consumed 0.0 B, peak 0.0 B.
-        Error: Failed to allocate additional 150.0 B for foo with 0.0 B already allocated for this reservation - 70.0 B remain available for the total pool
+        Error: Failed to allocate additional 150.0 B for foo with 0.0 B already allocated for this reservation - 70.0 B remain available for the total memory pool: greedy(used: 30.0 B, pool_size: 100.0 B)
         ");
     }
 
     #[test]
     fn test_tracked_consumers_pool_deregister() {
-        fn test_per_pool_type(pool: Arc<dyn MemoryPool>) {
-            // Baseline: see the 2 memory consumers
-            let setting = make_settings();
-            let _bound = setting.bind_to_scope();
-            let r0 = MemoryConsumer::new("r0").register(&pool);
-            r0.grow(10);
-            let r1_consumer = MemoryConsumer::new("r1");
-            let r1 = r1_consumer.register(&pool);
-            r1.grow(20);
-
-            let res = r0.try_grow(150);
-            assert!(res.is_err());
-            let error = res.unwrap_err().strip_backtrace();
-            allow_duplicates!(assert_snapshot!(error, @r"
-            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-              r1#[ID](can spill: false) consumed 20.0 B, peak 20.0 B,
-              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 70.0 B remain available for the total pool
-            "));
-
-            // Test: unregister one
-            // only the remaining one should be listed
-            drop(r1);
-            let res = r0.try_grow(150);
-            assert!(res.is_err());
-            let error = res.unwrap_err().strip_backtrace();
-            allow_duplicates!(assert_snapshot!(error, @r"
-            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
-            "));
-
-            // Test: actual message we see is the `available is 70`. When it should be `available is 90`.
-            // This is because the pool.shrink() does not automatically occur within the inner_pool.deregister().
-            let res = r0.try_grow(150);
-            assert!(res.is_err());
-            let error = res.unwrap_err().strip_backtrace();
-            allow_duplicates!(assert_snapshot!(error, @r"
-            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
-            "));
-
-            // Test: the registration needs to free itself (or be dropped),
-            // for the proper error message
-            let res = r0.try_grow(150);
-            assert!(res.is_err());
-            let error = res.unwrap_err().strip_backtrace();
-            allow_duplicates!(assert_snapshot!(error, @r"
-            Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
-              r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
-            Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total pool
-            "));
+        fn test_per_pool_type<P: MemoryPool + 'static>(pool: Arc<TrackConsumersPool<P>>) {
+            // `snapshot_suffix` ties each insta snapshot to this pool's inner backend; filters
+            // normalize inner pool `Display` so fair vs greedy share the same `@` reference text.
+            with_settings!({
+                snapshot_suffix => pool.inner().name().to_string(),
+                filters => vec![
+                    (
+                        r"([^\s]+)\#\d+\(can spill: (true|false)\)",
+                        "$1#[ID](can spill: $2)",
+                    ),
+                    (
+                        r"for the total memory pool: [^\n]+",
+                        "for the total memory pool: [INNER_POOL]",
+                    ),
+                ],
+            }, {
+                let memory_pool: Arc<dyn MemoryPool> = Arc::<TrackConsumersPool<P>>::clone(&pool);
+                let r0 = MemoryConsumer::new("r0").register(&memory_pool);
+                r0.grow(10);
+                let r1 = MemoryConsumer::new("r1").register(&memory_pool);
+                r1.grow(20);
+
+                // Baseline: see the 2 memory consumers
+                let error = r0.try_grow(150).unwrap_err().strip_backtrace();
+                assert_snapshot!(error, @r"
+                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+                  r1#[ID](can spill: false) consumed 20.0 B, peak 20.0 B,
+                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 70.0 B remain available for the total memory pool: [INNER_POOL]
+                ");
+
+                // Test: unregister one — only the remaining consumer should be listed
+                drop(r1);
+                let error = r0.try_grow(150).unwrap_err().strip_backtrace();
+                assert_snapshot!(error, @r"
+                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total memory pool: [INNER_POOL]
+                ");
+
+                // Test: actual message we see is the `available is 70`. When it should be `available is 90`.
+                // This is because the pool.shrink() does not automatically occur within the inner_pool.deregister().
+                let error = r0.try_grow(150).unwrap_err().strip_backtrace();
+                assert_snapshot!(error, @r"
+                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total memory pool: [INNER_POOL]
+                ");
+
+                // Test: the registration needs to free itself (or be dropped),
+                // for the proper error message
+                let error = r0.try_grow(150).unwrap_err().strip_backtrace();
+                assert_snapshot!(error, @r"
+                Resources exhausted: Additional allocation failed for r0 with top memory consumers (across reservations) as:
+                  r0#[ID](can spill: false) consumed 10.0 B, peak 10.0 B.
+                Error: Failed to allocate additional 150.0 B for r0 with 10.0 B already allocated for this reservation - 90.0 B remain available for the total memory pool: [INNER_POOL]
+                ");
+                }
+            );
         }
 
-        let tracked_spill_pool: Arc<dyn MemoryPool> = Arc::new(TrackConsumersPool::new(
-            FairSpillPool::new(100),
-            NonZeroUsize::new(3).unwrap(),
-        ));
-        test_per_pool_type(tracked_spill_pool);
+        allow_duplicates! {
+            let tracked_spill_pool = Arc::new(TrackConsumersPool::new(
+                FairSpillPool::new(100),
+                NonZeroUsize::new(3).unwrap(),
+            ));
+            test_per_pool_type(tracked_spill_pool);
 
-        let tracked_greedy_pool: Arc<dyn MemoryPool> = Arc::new(TrackConsumersPool::new(
-            GreedyMemoryPool::new(100),
-            NonZeroUsize::new(3).unwrap(),
-        ));
-        test_per_pool_type(tracked_greedy_pool);
+            let tracked_greedy_pool = Arc::new(TrackConsumersPool::new(
+                GreedyMemoryPool::new(100),
+                NonZeroUsize::new(3).unwrap(),
+            ));
+            test_per_pool_type(tracked_greedy_pool);
+        }
     }
 
     #[test]
@@ -894,4 +972,78 @@ mod tests {
         r1#[ID](can spill: false) consumed 20.0 B, peak 20.0 B.
         ");
     }
+
+    #[test]
+    fn test_memory_pool_display_fmt() {
+        let top = NonZeroUsize::new(5).unwrap();
+
+        // UnboundedMemoryPool Display with default allocation: 0.0B
+        let unbounded = UnboundedMemoryPool::default();
+        assert_eq!(
+            unbounded.to_string(),
+            "unbounded(used: 0.0 B)",
+            "UnboundedMemoryPool Display"
+        );
+
+        // UnboundedMemoryPool Display with reservations
+        let unbounded_arc: Arc<dyn MemoryPool> = Arc::new(UnboundedMemoryPool::default());
+        let r = MemoryConsumer::new("u").register(&unbounded_arc);
+        r.grow(2048);
+        assert_eq!(
+            unbounded_arc.as_ref().to_string(),
+            "unbounded(used: 2.0 KB)",
+            "UnboundedMemoryPool Display with reservations"
+        );
+
+        // GreedyMemoryPool Display with default allocation: 100.0B
+        let greedy = GreedyMemoryPool::new(100);
+        assert_eq!(
+            greedy.to_string(),
+            "greedy(used: 0.0 B, pool_size: 100.0 B)",
+            "GreedyMemoryPool Display"
+        );
+
+        // GreedyMemoryPool Display with reservations
+        let greedy_arc: Arc<dyn MemoryPool> = Arc::new(GreedyMemoryPool::new(100));
+        let r = MemoryConsumer::new("g").register(&greedy_arc);
+        r.grow(50);
+        assert_eq!(
+            greedy_arc.as_ref().to_string(),
+            "greedy(used: 50.0 B, pool_size: 100.0 B)",
+            "GreedyMemoryPool Display with reservations"
+        );
+
+        // FairSpillPool Display with default allocation: 4.0KB and without reservations
+        let fair = FairSpillPool::new(4096);
+        assert_eq!(
+            fair.to_string(),
+            "fair(pool_size: 4.0 KB)",
+            "FairSpillPool Display"
+        );
+
+        // TrackConsumersPool<GreedyMemoryPool> Display with default allocation: 128.0B and without reservations
+        let tracked_greedy = TrackConsumersPool::new(GreedyMemoryPool::new(128), top);
+        assert_eq!(
+            tracked_greedy.to_string(),
+            "track_consumers(inner_pool: greedy(used: 0.0 B, pool_size: 128.0 B), num_of_top_consumers: 5)",
+            "TrackConsumersPool<GreedyMemoryPool> Display"
+        );
+
+        // TrackConsumersPool<FairSpillPool> Display with default allocation: 256.0B and without reservations
+        let tracked_fair = TrackConsumersPool::new(FairSpillPool::new(256), top);
+        assert_eq!(
+            tracked_fair.to_string(),
+            "track_consumers(inner_pool: fair(pool_size: 256.0 B), num_of_top_consumers: 5)",
+            "TrackConsumersPool<FairSpillPool> Display"
+        );
+
+        // TrackConsumersPool<UnboundedMemoryPool> Display without reservations
+        let tracked_unbounded =
+            TrackConsumersPool::new(UnboundedMemoryPool::default(), top);
+        assert_eq!(
+            tracked_unbounded.to_string(),
+            "track_consumers(inner_pool: unbounded(used: 0.0 B), num_of_top_consumers: 5)",
+            "TrackConsumersPool<UnboundedMemoryPool> Display"
+        );
+    }
 }

From 526f0cb10ecabeb91b4e615be666a03137e75f5b Mon Sep 17 00:00:00 2001
From: Neil Conway <neil.conway@gmail.com>
Date: Tue, 21 Apr 2026 02:41:08 -0400
Subject: [PATCH 04/12] perf: Reduce `Box` and `Arc` allocation churn during
 tree rewriting (#21749)

## Which issue does this PR close?

- Closes #21751.

## Rationale for this change

Profiling the planner suggests that a surprising amount of time was
being spent doing tree rewriting in the logical optimizer. One culprit
is `TreeNodeContainer::map_elements()` for `Box<C>` and `Arc<C>`, which
do the following:

* Fetch the inner `C` value from the `Box`/`Arc`
* Pass the innter value to the closure
* Wrap the return value of the closure in a newly allocated `Box` /
`Arc`, respectively

This allocates a fresh `Box` or `Arc` for every node visited while
walking an expression or logical plan, even if the tree rewrite we're
doing didn't modify the expression/plan node.

Instead, we can reuse the current `Box<C>` or `Arc<C>`: use
`std::mem::take()` to swap the inner value with `C::default()`, pass the
inner value to the closure, and put the result back in the original
container. Swapping the inner value with `C::default()` means the
container always has a valid value, which is important if the closure
panics.

For `Arc<C>`, we need to use `Arc::make_mut()`, which only clones if the
`Arc` is not unique.

This reduces the bytes allocated to plan TPC-H Q13 by ~22% (988 kB ->
765 kB), and reduces allocated blocks by 8.5% (210k -> 192k).

## What changes are included in this PR?

* Optimize `Box<C>::map_elements()` and `Arc<C>::map_elements()` as
described above
* Change `map_children()` for `Expr::Alias` to use `map_elements()`,
rather than invoking `f(*expr)` directly; this ensures that it can take
advantage of this optimization
* Make `LogicalPlan::default()` use a shared `DFSchema`, rather than
allocating a fresh `DFSchema` for every call. Because `default()` is not
in the hot path for tree rewriting, it is important that it is cheap
* Add unit tests for new `map_elements()` behavior
* Add note to migration guide for breaking API change

## Are these changes tested?

Yes, plus new unit tests added.

## Are there any user-facing changes?

Yes: `TreeNodeContainer` impls for `Box<C>` and `Arc<C>` now require `C:
Default`. This is a breaking API change for third-party code that
implements `TreeNodeContainer` for a custom type. The fix is usually
straightforward.
---
 datafusion/common/src/dfschema.rs             |  9 ++-
 datafusion/common/src/tree_node.rs            | 79 ++++++++++++++++---
 datafusion/expr/src/logical_plan/plan.rs      |  5 +-
 datafusion/expr/src/logical_plan/statement.rs |  7 +-
 datafusion/expr/src/tree_node.rs              | 11 ++-
 .../library-user-guide/upgrading/54.0.0.md    | 25 ++++++
 6 files changed, 117 insertions(+), 19 deletions(-)

diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index e7d9e809daecc..e3da99163ed69 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -21,7 +21,7 @@
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::fmt::{Display, Formatter};
 use std::hash::Hash;
-use std::sync::Arc;
+use std::sync::{Arc, LazyLock};
 
 use crate::error::{_plan_err, _schema_err, DataFusionError, Result};
 use crate::{
@@ -129,6 +129,13 @@ impl DFSchema {
         }
     }
 
+    /// Returns a reference to a shared empty [`DFSchema`].
+    pub fn empty_ref() -> &'static DFSchemaRef {
+        static EMPTY: LazyLock<DFSchemaRef> =
+            LazyLock::new(|| Arc::new(DFSchema::empty()));
+        &EMPTY
+    }
+
     /// Return a reference to the inner Arrow [`Schema`]
     ///
     /// Note this does not have the qualifier information
diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs
index 1e7c02e424256..39300b9564621 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -796,7 +796,9 @@ pub trait TreeNodeContainer<'a, T: 'a>: Sized {
     ) -> Result<Transformed<Self>>;
 }
 
-impl<'a, T: 'a, C: TreeNodeContainer<'a, T>> TreeNodeContainer<'a, T> for Box<C> {
+impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Default> TreeNodeContainer<'a, T>
+    for Box<C>
+{
     fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>(
         &'a self,
         f: F,
@@ -805,14 +807,24 @@ impl<'a, T: 'a, C: TreeNodeContainer<'a, T>> TreeNodeContainer<'a, T> for Box<C>
     }
 
     fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>(
-        self,
+        mut self,
         f: F,
     ) -> Result<Transformed<Self>> {
-        (*self).map_elements(f)?.map_data(|c| Ok(Self::new(c)))
+        // Rewrite in place so the existing heap allocation can be reused.
+        // `mem::take` hands the inner `C` to `f` while leaving
+        // `C::default()` in the slot, so an unwinding drop finds a valid
+        // `C` even if `f` panics or the `?` short-circuits.
+        let inner = std::mem::take(&mut *self);
+        Ok(inner.map_elements(f)?.update_data(|c| {
+            *self = c;
+            self
+        }))
     }
 }
 
-impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Clone> TreeNodeContainer<'a, T> for Arc<C> {
+impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Clone + Default> TreeNodeContainer<'a, T>
+    for Arc<C>
+{
     fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>(
         &'a self,
         f: F,
@@ -821,12 +833,18 @@ impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Clone> TreeNodeContainer<'a, T> fo
     }
 
     fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>(
-        self,
+        mut self,
         f: F,
     ) -> Result<Transformed<Self>> {
-        Arc::unwrap_or_clone(self)
-            .map_elements(f)?
-            .map_data(|c| Ok(Arc::new(c)))
+        // Rewrite in place using the same `mem::take` strategy as
+        // `Box<C>::map_elements`. `Arc::make_mut` gives us exclusive
+        // access (cloning `C` first if we were sharing), after which
+        // `get_mut` is infallible.
+        let inner = std::mem::take(Arc::make_mut(&mut self));
+        Ok(inner.map_elements(f)?.update_data(|c| {
+            *Arc::get_mut(&mut self).unwrap() = c;
+            self
+        }))
     }
 }
 
@@ -1335,6 +1353,7 @@ impl<T: ConcreteTreeNode> TreeNode for T {
 pub(crate) mod tests {
     use std::collections::HashMap;
     use std::fmt::Display;
+    use std::sync::Arc;
 
     use crate::Result;
     use crate::tree_node::{
@@ -1342,7 +1361,7 @@ pub(crate) mod tests {
         TreeNodeVisitor,
     };
 
-    #[derive(Debug, Eq, Hash, PartialEq, Clone)]
+    #[derive(Debug, Default, Eq, Hash, PartialEq, Clone)]
     pub struct TestTreeNode<T> {
         pub(crate) children: Vec<TestTreeNode<T>>,
         pub(crate) data: T,
@@ -2431,4 +2450,46 @@ pub(crate) mod tests {
 
         item.visit(&mut visitor).unwrap();
     }
+
+    #[test]
+    fn box_map_elements_reuses_allocation() {
+        let boxed = Box::new(TestTreeNode::new_leaf(42i32));
+        let before: *const TestTreeNode<i32> = &*boxed;
+        let out = boxed.map_elements(|n| Ok(Transformed::no(n))).unwrap();
+        let after: *const TestTreeNode<i32> = &*out.data;
+        assert_eq!(after, before);
+    }
+
+    #[test]
+    fn arc_map_elements_reuses_allocation_when_unique() {
+        let arc = Arc::new(TestTreeNode::new_leaf(42i32));
+        let before = Arc::as_ptr(&arc);
+        let out = arc.map_elements(|n| Ok(Transformed::no(n))).unwrap();
+        assert_eq!(Arc::as_ptr(&out.data), before);
+    }
+
+    #[test]
+    fn arc_map_elements_clones_when_shared() {
+        // When the input `Arc` is shared, `make_mut` clones into a fresh
+        // allocation, so the reuse optimization does not apply.
+        let arc = Arc::new(TestTreeNode::new_leaf(42i32));
+        let _keepalive = Arc::clone(&arc);
+        let before = Arc::as_ptr(&arc);
+        let out = arc.map_elements(|n| Ok(Transformed::no(n))).unwrap();
+        assert_ne!(Arc::as_ptr(&out.data), before);
+    }
+
+    #[test]
+    fn box_map_elements_panic() {
+        use std::panic::{AssertUnwindSafe, catch_unwind};
+        let boxed = Box::new(TestTreeNode::new_leaf(42i32));
+        let result = catch_unwind(AssertUnwindSafe(|| {
+            boxed
+                .map_elements(|_: TestTreeNode<i32>| -> Result<_> {
+                    panic!("simulated panic during rewrite")
+                })
+                .ok()
+        }));
+        assert!(result.is_err());
+    }
 }
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 4f73169ad2827..d86024295a061 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -294,9 +294,12 @@ pub enum LogicalPlan {
 
 impl Default for LogicalPlan {
     fn default() -> Self {
+        // `Default` is used as a transient placeholder on hot paths (e.g.
+        // `Box`/`Arc` `map_elements`), so use a shared empty schema to avoid
+        // allocating.
         LogicalPlan::EmptyRelation(EmptyRelation {
             produce_one_row: false,
-            schema: Arc::new(DFSchema::empty()),
+            schema: Arc::clone(DFSchema::empty_ref()),
         })
     }
 }
diff --git a/datafusion/expr/src/logical_plan/statement.rs b/datafusion/expr/src/logical_plan/statement.rs
index 384d99ca0899e..daf29d7c81d3f 100644
--- a/datafusion/expr/src/logical_plan/statement.rs
+++ b/datafusion/expr/src/logical_plan/statement.rs
@@ -20,7 +20,7 @@ use datafusion_common::metadata::format_type_and_metadata;
 use datafusion_common::{DFSchema, DFSchemaRef};
 use itertools::Itertools as _;
 use std::fmt::{self, Display};
-use std::sync::{Arc, LazyLock};
+use std::sync::Arc;
 
 use crate::{Expr, LogicalPlan, expr_vec_fmt};
 
@@ -55,10 +55,7 @@ impl Statement {
     /// Get a reference to the logical plan's schema
     pub fn schema(&self) -> &DFSchemaRef {
         // Statements have an unchanging empty schema.
-        static STATEMENT_EMPTY_SCHEMA: LazyLock<DFSchemaRef> =
-            LazyLock::new(|| Arc::new(DFSchema::empty()));
-
-        &STATEMENT_EMPTY_SCHEMA
+        DFSchema::empty_ref()
     }
 
     /// Return a descriptive string describing the type of this
diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs
index f3bec6bbf9954..f43b138a284ea 100644
--- a/datafusion/expr/src/tree_node.rs
+++ b/datafusion/expr/src/tree_node.rs
@@ -116,7 +116,7 @@ impl TreeNode for Expr {
     /// indicating whether the expression was transformed or left unchanged.
     fn map_children<F: FnMut(Self) -> Result<Transformed<Self>>>(
         self,
-        mut f: F,
+        f: F,
     ) -> Result<Transformed<Self>> {
         Ok(match self {
             // TODO: remove the next line after `Expr::Wildcard` is removed
@@ -150,8 +150,13 @@ impl TreeNode for Expr {
                 relation,
                 name,
                 metadata,
-            }) => f(*expr)?.update_data(|e| {
-                e.alias_qualified_with_metadata(relation, name, metadata)
+            }) => expr.map_elements(f)?.update_data(|expr| {
+                Expr::Alias(Alias {
+                    expr,
+                    relation,
+                    name,
+                    metadata,
+                })
             }),
             Expr::InSubquery(InSubquery {
                 expr,
diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md
index c277f69d0bee2..030ca729f265a 100644
--- a/docs/source/library-user-guide/upgrading/54.0.0.md
+++ b/docs/source/library-user-guide/upgrading/54.0.0.md
@@ -347,3 +347,28 @@ SELECT CAST(approx_percentile_cont(quantity, 0.5) AS BIGINT) FROM orders;
 ```
 
 [#21074]: https://github.com/apache/datafusion/pull/21074
+
+### `Box<C>` and `Arc<C>` `TreeNodeContainer` impls now require `C: Default`
+
+The generic `TreeNodeContainer` implementations for `Box<C>` and `Arc<C>` now
+require `C: Default`. This change was necessary as part of optimizing tree
+rewriting to reduce heap allocations.
+
+**Who is affected:**
+
+- Users that implement `TreeNodeContainer` on a custom type and wrap it in
+  `Box` or `Arc` when walking trees.
+
+**Migration guide:**
+
+Add a `Default` implementation to your type. The default value is used as a
+temporary placeholder during query optimization, so when possible, pick a cheap,
+allocation-free variant:
+
+```rust,ignore
+impl Default for MyTreeNode {
+    fn default() -> Self {
+        MyTreeNode::Leaf // or whichever variant is cheapest to construct
+    }
+}
+```

From a737c27b775f02e88cf8183dc257f9bb3d6a9024 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Burak=20=C5=9Een?= <buraksenb@gmail.com>
Date: Tue, 21 Apr 2026 09:48:18 +0300
Subject: [PATCH 05/12] feat: estimate cardinality for semi and anti-joins
 using distinct counts (#20904)

## Which issue does this PR close?
Does not close but part of
https://github.com/apache/datafusion/issues/20766


## Rationale for this change
Details are in #20766. But main idea is to use existing distinct count
information to optimize joins similar to how Spark/Trino does

## What changes are included in this PR?
This PR extends cardinality estimation for semi/anti joins using
distinct counts

## Are these changes tested?
I've added cases but not sure if I should've added benchmarks on this.

## Are there any user-facing changes?
No

---------

Co-authored-by: Alessandro Solimando <alessandro.solimando@gmail.com>
---
 datafusion/physical-plan/src/joins/utils.rs | 411 ++++++++++++++++++--
 1 file changed, 383 insertions(+), 28 deletions(-)

diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index a5ac8901635b7..90cab7246d71c 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -531,35 +531,48 @@ fn estimate_join_cardinality(
             })
         }
 
-        // For SemiJoins estimation result is either zero, in cases when inputs
-        // are non-overlapping according to statistics, or equal to number of rows
-        // for outer input
-        JoinType::LeftSemi | JoinType::RightSemi => {
-            let (outer_stats, inner_stats) = match join_type {
-                JoinType::LeftSemi => (left_stats, right_stats),
-                _ => (right_stats, left_stats),
-            };
-            let cardinality = match estimate_disjoint_inputs(&outer_stats, &inner_stats) {
-                Some(estimation) => *estimation.get_value()?,
-                None => *outer_stats.num_rows.get_value()?,
-            };
+        JoinType::LeftSemi
+        | JoinType::RightSemi
+        | JoinType::LeftAnti
+        | JoinType::RightAnti => {
+            let is_left = matches!(join_type, JoinType::LeftSemi | JoinType::LeftAnti);
+            let is_anti = matches!(join_type, JoinType::LeftAnti | JoinType::RightAnti);
+
+            let ((outer_stats, inner_stats), (outer_col_stats, inner_col_stats)) =
+                if is_left {
+                    (
+                        (&left_stats, &right_stats),
+                        (&left_col_stats, &right_col_stats),
+                    )
+                } else {
+                    (
+                        (&right_stats, &left_stats),
+                        (&right_col_stats, &left_col_stats),
+                    )
+                };
 
-            Some(PartialJoinStatistics {
-                num_rows: cardinality,
-                column_statistics: outer_stats.column_statistics,
-            })
-        }
+            let outer_rows = *outer_stats.num_rows.get_value()?;
 
-        // For AntiJoins estimation always equals to outer statistics, as
-        // non-overlapping inputs won't affect estimation
-        JoinType::LeftAnti | JoinType::RightAnti => {
-            let outer_stats = match join_type {
-                JoinType::LeftAnti => left_stats,
-                _ => right_stats,
-            };
+            let cardinality =
+                if estimate_disjoint_inputs(outer_stats, inner_stats).is_some() {
+                    // Disjoint inputs: semi produces 0, anti keeps all rows.
+                    if is_anti { outer_rows } else { 0 }
+                } else {
+                    match estimate_semi_join_cardinality(
+                        &outer_stats.num_rows,
+                        &inner_stats.num_rows,
+                        outer_col_stats,
+                        inner_col_stats,
+                    ) {
+                        Some(semi) if is_anti => outer_rows.saturating_sub(semi),
+                        Some(semi) => semi,
+                        None => outer_rows,
+                    }
+                };
 
+            let outer_stats = if is_left { left_stats } else { right_stats };
             Some(PartialJoinStatistics {
-                num_rows: *outer_stats.num_rows.get_value()?,
+                num_rows: cardinality,
                 column_statistics: outer_stats.column_statistics,
             })
         }
@@ -699,6 +712,95 @@ fn estimate_disjoint_inputs(
     None
 }
 
+/// Estimates the number of outer rows that have at least one matching
+/// key on the inner side (i.e. semi join cardinality) using NDV
+/// (Number of Distinct Values) statistics.
+///
+/// Assuming the smaller domain is contained in the larger, the number
+/// of overlapping distinct values is `min(outer_ndv, inner_ndv)`.
+/// Under the uniformity assumption (each distinct value contributes
+/// equally to row counts), the surviving fraction of outer rows is:
+///
+/// Null rows cannot match, so each column's selectivity is further
+/// reduced by the outer null fraction:
+///
+/// ```text
+/// null_frac_i = outer_null_count_i / outer_rows
+/// selectivity_i = min(outer_ndv_i, inner_ndv_i) / outer_ndv_i * (1 - null_frac_i)
+/// ```
+///
+/// For multi-column join keys the overall selectivity is the product
+/// of per-column factors:
+///
+/// ```text
+/// semi_cardinality = outer_rows * product_i(selectivity_i)
+/// ```
+///
+/// Anti join cardinality is derived as the complement:
+/// `outer_rows - semi_cardinality`.
+///
+/// Boundary cases:
+/// * `inner_ndv >= outer_ndv` → selectivity = `1.0 - null_frac`
+/// * `null_frac = 1.0` → selectivity = 0.0 (no non-null rows can match)
+/// * Missing NDV statistics → returns `None` (fallback to `outer_rows`)
+///
+/// PostgreSQL uses a similar approach in `eqjoinsel_semi`
+/// (`src/backend/utils/adt/selfuncs.c`). When NDV statistics are
+/// available on both sides it computes selectivity as `nd2 / nd1`,
+/// which is equivalent to `min(outer_ndv, inner_ndv) / outer_ndv`.
+/// If either side lacks statistics it falls back to a default.
+fn estimate_semi_join_cardinality(
+    outer_num_rows: &Precision<usize>,
+    inner_num_rows: &Precision<usize>,
+    outer_col_stats: &[ColumnStatistics],
+    inner_col_stats: &[ColumnStatistics],
+) -> Option<usize> {
+    let outer_rows = *outer_num_rows.get_value()?;
+    if outer_rows == 0 {
+        return Some(0);
+    }
+    let inner_rows = *inner_num_rows.get_value()?;
+    if inner_rows == 0 {
+        return Some(0);
+    }
+
+    let mut selectivity = 1.0_f64;
+    let mut has_selectivity_estimate = false;
+
+    for (outer_stat, inner_stat) in outer_col_stats.iter().zip(inner_col_stats.iter()) {
+        let outer_has_stats = outer_stat.distinct_count.get_value().is_some()
+            || (outer_stat.min_value.get_value().is_some()
+                && outer_stat.max_value.get_value().is_some());
+        let inner_has_stats = inner_stat.distinct_count.get_value().is_some()
+            || (inner_stat.min_value.get_value().is_some()
+                && inner_stat.max_value.get_value().is_some());
+        if !outer_has_stats || !inner_has_stats {
+            continue;
+        }
+
+        let outer_ndv = max_distinct_count(outer_num_rows, outer_stat);
+        let inner_ndv = max_distinct_count(inner_num_rows, inner_stat);
+
+        if let (Some(&o), Some(&i)) = (outer_ndv.get_value(), inner_ndv.get_value())
+            && o > 0
+        {
+            let null_frac = outer_stat
+                .null_count
+                .get_value()
+                .map(|&nc| nc as f64 / outer_rows as f64)
+                .unwrap_or(0.0);
+            selectivity *= (o.min(i) as f64) / (o as f64) * (1.0 - null_frac);
+            has_selectivity_estimate = true;
+        }
+    }
+
+    if has_selectivity_estimate {
+        Some((outer_rows as f64 * selectivity).ceil() as usize)
+    } else {
+        None
+    }
+}
+
 /// Estimate the number of maximum distinct values that can be present in the
 /// given column from its statistics. If distinct_count is available, uses it
 /// directly. Otherwise, if the column is numeric and has min/max values, it
@@ -2697,7 +2799,7 @@ mod tests {
                 JoinType::LeftSemi,
                 (50, Inexact(10), Inexact(20), Absent, Absent),
                 (10, Inexact(15), Inexact(25), Absent, Absent),
-                Some(50),
+                Some(46),
             ),
             (
                 JoinType::RightSemi,
@@ -2733,13 +2835,13 @@ mod tests {
                 JoinType::LeftAnti,
                 (50, Inexact(10), Inexact(20), Absent, Absent),
                 (10, Inexact(15), Inexact(25), Absent, Absent),
-                Some(50),
+                Some(4),
             ),
             (
                 JoinType::RightAnti,
                 (50, Inexact(10), Inexact(20), Absent, Absent),
                 (10, Inexact(15), Inexact(25), Absent, Absent),
-                Some(10),
+                Some(0),
             ),
             (
                 JoinType::LeftAnti,
@@ -2765,6 +2867,108 @@ mod tests {
                 (10, Inexact(30), Absent, Absent, Absent),
                 Some(50),
             ),
+            // NDV-based semi join: outer_ndv=20, inner_ndv=10
+            // selectivity = 10/20 = 0.5, cardinality = ceil(50 * 0.5) = 25
+            (
+                JoinType::LeftSemi,
+                (50, Inexact(1), Inexact(100), Inexact(20), Absent),
+                (10, Inexact(1), Inexact(100), Inexact(10), Absent),
+                Some(25),
+            ),
+            // inner_ndv(30) >= outer_ndv(20) -> selectivity 1.0, no reduction
+            (
+                JoinType::LeftSemi,
+                (50, Inexact(1), Inexact(100), Inexact(20), Absent),
+                (100, Inexact(1), Inexact(100), Inexact(30), Absent),
+                Some(50),
+            ),
+            // NDV-based anti join: semi=25, anti = 50 - 25 = 25
+            (
+                JoinType::LeftAnti,
+                (50, Inexact(1), Inexact(100), Inexact(20), Absent),
+                (10, Inexact(1), Inexact(100), Inexact(10), Absent),
+                Some(25),
+            ),
+            // inner covers all outer: semi=50, anti = 0
+            (
+                JoinType::LeftAnti,
+                (50, Inexact(1), Inexact(100), Inexact(20), Absent),
+                (100, Inexact(1), Inexact(100), Inexact(30), Absent),
+                Some(0),
+            ),
+            // RightSemi with explicit NDV (NDV within row count, used as-is):
+            // For RightSemi, sides are swapped: outer = right (20 rows, ndv=10),
+            // inner = left (50 rows, ndv=5). selectivity = min(10,5)/10 = 0.5,
+            // cardinality = ceil(20 * 0.5) = 10.
+            (
+                JoinType::RightSemi,
+                (50, Inexact(1), Inexact(100), Inexact(5), Absent),
+                (20, Inexact(1), Inexact(100), Inexact(10), Absent),
+                Some(10),
+            ),
+            // RightAnti with explicit NDV: anti = outer_rows - semi = 20 - 10 = 10.
+            (
+                JoinType::RightAnti,
+                (50, Inexact(1), Inexact(100), Inexact(5), Absent),
+                (20, Inexact(1), Inexact(100), Inexact(10), Absent),
+                Some(10),
+            ),
+            // RightSemi where right-side NDV (20) exceeds right-side row count (10):
+            // NDV is clamped to 10, so outer_ndv=10, inner_ndv=10,
+            // selectivity = min(10,10)/10 = 1.0, cardinality = ceil(10 * 1.0) = 10.
+            (
+                JoinType::RightSemi,
+                (50, Inexact(1), Inexact(100), Inexact(10), Absent),
+                (10, Inexact(1), Inexact(100), Inexact(20), Absent),
+                Some(10),
+            ),
+            // RightAnti with NDV clamped by row count: anti = 10 - 10 = 0.
+            (
+                JoinType::RightAnti,
+                (50, Inexact(1), Inexact(100), Inexact(10), Absent),
+                (10, Inexact(1), Inexact(100), Inexact(20), Absent),
+                Some(0),
+            ),
+            // Empty inner table: no match possible, semi → 0
+            (
+                JoinType::LeftSemi,
+                (100, Absent, Absent, Absent, Absent),
+                (0, Absent, Absent, Absent, Absent),
+                Some(0),
+            ),
+            // NDV-based semi with nulls on outer side:
+            // outer_ndv=20, inner_ndv=10, null_frac=10/100=0.1
+            // selectivity = 10/20 * (1-0.1) = 0.5 * 0.9 = 0.45
+            // semi = ceil(100 * 0.45) = 45
+            (
+                JoinType::LeftSemi,
+                (100, Absent, Absent, Inexact(20), Inexact(10)),
+                (200, Absent, Absent, Inexact(10), Absent),
+                Some(45),
+            ),
+            // Anti-join with nulls on outer side:
+            // semi=45, anti = 100 - 45 = 55
+            (
+                JoinType::LeftAnti,
+                (100, Absent, Absent, Inexact(20), Inexact(10)),
+                (200, Absent, Absent, Inexact(10), Absent),
+                Some(55),
+            ),
+            // All outer rows are null: null_frac=1.0
+            // selectivity = 10/20 * (1-1.0) = 0.0, semi = 0
+            (
+                JoinType::LeftSemi,
+                (100, Absent, Absent, Inexact(20), Inexact(100)),
+                (200, Absent, Absent, Inexact(10), Absent),
+                Some(0),
+            ),
+            // All outer rows are null (anti): anti = 100 - 0 = 100
+            (
+                JoinType::LeftAnti,
+                (100, Absent, Absent, Inexact(20), Inexact(100)),
+                (200, Absent, Absent, Inexact(10), Absent),
+                Some(100),
+            ),
         ];
 
         let join_on = vec![(
@@ -2884,6 +3088,157 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_semi_join_multi_column_and_mixed_stats() -> Result<()> {
+        let join_on = vec![
+            (
+                Arc::new(Column::new("l_col0", 0)) as _,
+                Arc::new(Column::new("r_col0", 0)) as _,
+            ),
+            (
+                Arc::new(Column::new("l_col1", 1)) as _,
+                Arc::new(Column::new("r_col1", 1)) as _,
+            ),
+        ];
+
+        // Multi-column: both columns have NDV on both sides.
+        // col0: outer_ndv=20, inner_ndv=10 → selectivity = 10/20 = 0.5
+        // col1: outer_ndv=40, inner_ndv=10 → selectivity = 10/40 = 0.25
+        // total selectivity = 0.5 * 0.25 = 0.125
+        // semi = ceil(100 * 0.125) = 13
+        let result = estimate_join_cardinality(
+            &JoinType::LeftSemi,
+            Statistics {
+                num_rows: Inexact(100),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(20), Absent),
+                    create_column_stats(Absent, Absent, Inexact(40), Absent),
+                ],
+            },
+            Statistics {
+                num_rows: Inexact(200),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                ],
+            },
+            &join_on,
+        )
+        .map(|c| c.num_rows);
+        assert_eq!(result, Some(13), "multi-column semi join");
+
+        // Multi-column anti: anti = 100 - 13 = 87
+        let result = estimate_join_cardinality(
+            &JoinType::LeftAnti,
+            Statistics {
+                num_rows: Inexact(100),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(20), Absent),
+                    create_column_stats(Absent, Absent, Inexact(40), Absent),
+                ],
+            },
+            Statistics {
+                num_rows: Inexact(200),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                ],
+            },
+            &join_on,
+        )
+        .map(|c| c.num_rows);
+        assert_eq!(result, Some(87), "multi-column anti join");
+
+        // Mixed stats: col0 has NDV on both sides, col1 has NDV only on outer.
+        // col1 is skipped (either side missing), so selectivity comes from col0 only.
+        // col0: outer_ndv=20, inner_ndv=10 → selectivity = 0.5
+        // semi = ceil(100 * 0.5) = 50
+        let result = estimate_join_cardinality(
+            &JoinType::LeftSemi,
+            Statistics {
+                num_rows: Inexact(100),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(20), Absent),
+                    create_column_stats(Absent, Absent, Inexact(40), Absent),
+                ],
+            },
+            Statistics {
+                num_rows: Inexact(200),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                    create_column_stats(Absent, Absent, Absent, Absent),
+                ],
+            },
+            &join_on,
+        )
+        .map(|c| c.num_rows);
+        assert_eq!(result, Some(50), "mixed stats: col1 skipped");
+
+        // Mixed stats: neither column has stats on both sides → fallback to outer_rows
+        let result = estimate_join_cardinality(
+            &JoinType::LeftSemi,
+            Statistics {
+                num_rows: Inexact(100),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(20), Absent),
+                    create_column_stats(Absent, Absent, Absent, Absent),
+                ],
+            },
+            Statistics {
+                num_rows: Inexact(200),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Absent, Absent),
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                ],
+            },
+            &join_on,
+        )
+        .map(|c| c.num_rows);
+        assert_eq!(result, Some(100), "no column has stats on both sides");
+
+        // Multi-column with nulls on one column:
+        // col0: outer_ndv=20, inner_ndv=10, null_frac=0.0 → 10/20 * 1.0 = 0.5
+        // col1: outer_ndv=40, inner_ndv=10, null_frac=20/100=0.2 → 10/40 * 0.8 = 0.2
+        // total selectivity = 0.5 * 0.2 = 0.1
+        // semi = ceil(100 * 0.1) = 10
+        let result = estimate_join_cardinality(
+            &JoinType::LeftSemi,
+            Statistics {
+                num_rows: Inexact(100),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(20), Absent),
+                    create_column_stats(Absent, Absent, Inexact(40), Inexact(20)),
+                ],
+            },
+            Statistics {
+                num_rows: Inexact(200),
+                total_byte_size: Absent,
+                column_statistics: vec![
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                    create_column_stats(Absent, Absent, Inexact(10), Absent),
+                ],
+            },
+            &join_on,
+        )
+        .map(|c| c.num_rows);
+        assert_eq!(
+            result,
+            Some(10),
+            "multi-column semi join with nulls on one column"
+        );
+
+        Ok(())
+    }
+
     #[test]
     fn test_calculate_join_output_ordering() -> Result<()> {
         let left_ordering = LexOrdering::new(vec![

From 5baa6efa9c59c089ef09b849b7738ee49dc89f83 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Apr 2026 14:51:04 +0200
Subject: [PATCH 06/12] chore(deps): bump astral-sh/setup-uv from 8.0.0 to
 8.1.0 (#21759)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from
8.0.0 to 8.1.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/astral-sh/setup-uv/releases">astral-sh/setup-uv's
releases</a>.</em></p>
<blockquote>
<h2>v8.1.0 🌈 New input <code>no-project</code></h2>
<h2>Changes</h2>
<p>This add the a new boolean input <code>no-project</code>.
It only makes sense to use in combination with
<code>activate-environment: true</code> and will append <code>--no
project</code> to the <code>uv venv</code> call. This is for example
useful <a
href="https://redirect.github.com/astral-sh/setup-uv/issues/854">if you
have a pyproject.toml file with parts unparseable by uv</a></p>
<h2>🚀 Enhancements</h2>
<ul>
<li>Add input no-project in combination with activate-environment <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/856">#856</a>)</li>
</ul>
<h2>🧰 Maintenance</h2>
<ul>
<li>fix: grant contents:write to validate-release job <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/860">#860</a>)</li>
<li>Add a release-gate step to the release workflow <a
href="https://github.com/zanieb"><code>@​zanieb</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/859">#859</a>)</li>
<li>Draft commitish releases <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/858">#858</a>)</li>
<li>Add action-types.yml to instructions <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/857">#857</a>)</li>
<li>chore: update known checksums for 0.11.7 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/853">#853</a>)</li>
<li>Refactor version resolving <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/852">#852</a>)</li>
<li>chore: update known checksums for 0.11.6 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/850">#850</a>)</li>
<li>chore: update known checksums for 0.11.5 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/845">#845</a>)</li>
<li>chore: update known checksums for 0.11.4 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/843">#843</a>)</li>
<li>Add a release workflow <a
href="https://github.com/zanieb"><code>@​zanieb</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/839">#839</a>)</li>
<li>chore: update known checksums for 0.11.3 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/836">#836</a>)</li>
</ul>
<h2>📚 Documentation</h2>
<ul>
<li>Update ignore-nothing-to-cache documentation <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/833">#833</a>)</li>
<li>Pin setup-uv docs to v8 <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/829">#829</a>)</li>
</ul>
<h2>⬆️ Dependency updates</h2>
<ul>
<li>chore(deps): bump release-drafter/release-drafter from 7.1.1 to
7.2.0 @<a href="https://github.com/apps/dependabot">dependabot[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/855">#855</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/08807647e7069bb48b6ef5acd8ec9567f424441b"><code>0880764</code></a>
fix: grant contents:write to validate-release job (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/860">#860</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/717d6aba0f15312f509f5c4999e34d71ecbab8a9"><code>717d6ab</code></a>
Add a release-gate step to the release workflow (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/859">#859</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/5a911eb3a3983b5e650f2dad95c1ce698ca94378"><code>5a911eb</code></a>
Draft commitish releases (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/858">#858</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/080c31e04cd7155b0ca676d08c7bc260a4476a23"><code>080c31e</code></a>
Add action-types.yml to instructions (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/857">#857</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/b3e97d2ba1a1eed7e9d1f8456dd06c3b725bc3a6"><code>b3e97d2</code></a>
Add input no-project in combination with activate-environment (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/856">#856</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/7dd591db9557f680290587fcc578372813b9ff64"><code>7dd591d</code></a>
chore(deps): bump release-drafter/release-drafter from 7.1.1 to 7.2.0
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/855">#855</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/1541b7762698877904805605192ecd63d0e4787a"><code>1541b77</code></a>
chore: update known checksums for 0.11.7 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/853">#853</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/cdfb2ee6dde255817c739680168ad81e184c4bfb"><code>cdfb2ee</code></a>
Refactor version resolving (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/852">#852</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/cb84d12dc6a0d495b82fcae14fa4559b90698660"><code>cb84d12</code></a>
chore: update known checksums for 0.11.6 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/850">#850</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/1912cc65f2e839707d7a16f2372f30b57d35fd80"><code>1912cc6</code></a>
chore: update known checksums for 0.11.5 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/845">#845</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/astral-sh/setup-uv/compare/cec208311dfd045dd5311c1add060b2062131d57...08807647e7069bb48b6ef5acd8ec9567f424441b">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=astral-sh/setup-uv&package-manager=github_actions&previous-version=8.0.0&new-version=8.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/docs.yaml    | 2 +-
 .github/workflows/docs_pr.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 89bd77670c12d..7713d5dd31422 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -41,7 +41,7 @@ jobs:
           path: asf-site
 
       - name: Setup uv
-        uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57  # v8.0.0
+        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b  # v8.1.0
 
       - name: Install dependencies
         run: uv sync --package datafusion-docs
diff --git a/.github/workflows/docs_pr.yaml b/.github/workflows/docs_pr.yaml
index 5abf9a119d2f5..dab81fd6452da 100644
--- a/.github/workflows/docs_pr.yaml
+++ b/.github/workflows/docs_pr.yaml
@@ -45,7 +45,7 @@ jobs:
           submodules: true
           fetch-depth: 1
       - name: Setup uv
-        uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57  # v8.0.0
+        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b  # v8.1.0
       - name: Install doc dependencies
         run: uv sync --package datafusion-docs
       - name: Install dependency graph tooling

From 9a851d6c74a61250d1b005fa0eeb68143f8cc508 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Tue, 21 Apr 2026 20:51:26 +0800
Subject: [PATCH 07/12] fix: Fix local `datafusion-cli` test failure (#21761)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #.

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
One test case in `datafusion-cli` crate is failing locally if you run
all tests through `cargo nextest run`, but passes for `cargo test`

```
        FAIL [   0.375s] datafusion-cli::cli_integration cli_explain_environment_overrides
```

The reason is `nextest` triggers a different build graph, which enforces
a feature flag in `serde_json` dependency.

This PR enforces this feature in the `dev-dependencies` in
`datafusion-cli` crate, so the test become deterministic under different
test setup.

https://github.com/apache/datafusion/pull/21502 Fixed a similar issue,
and also explains why not enabling it in the global dependencies inside
`Cargo.toml`

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 Cargo.lock                                           |  1 +
 datafusion-cli/Cargo.toml                            |  8 ++++++++
 ...overrides@explain_plan_environment_overrides.snap | 12 ++++++------
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 02da8661eedea..e1ba084184c36 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1925,6 +1925,7 @@ dependencies = [
  "regex",
  "rstest",
  "rustyline",
+ "serde_json",
  "testcontainers-modules",
  "tokio",
  "url",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 40e0e50dacd7a..19dc83d518b35 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -78,3 +78,11 @@ insta = { workspace = true }
 insta-cmd = "0.6.0"
 rstest = { workspace = true }
 testcontainers-modules = { workspace = true, features = ["minio"] }
+# Makes sure `test_display_pg_json` behaves in a consistent way regardless of
+# feature unification with dependencies
+serde_json = { workspace = true, features = ["preserve_order"] }
+
+# Required because we pull serde_json with a feature to get consistent pg display,
+# but its not directly used.
+[package.metadata.cargo-machete]
+ignored = "serde_json"
diff --git a/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
index 1359cefbe71c7..5f43ca88dc9d7 100644
--- a/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
+++ b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
@@ -18,19 +18,19 @@ exit_code: 0
 | logical_plan | [                                       |
 |              |   {                                     |
 |              |     "Plan": {                           |
-|              |       "Expressions": [                  |
-|              |         "Int64(123)"                    |
-|              |       ],                                |
 |              |       "Node Type": "Projection",        |
-|              |       "Output": [                       |
+|              |       "Expressions": [                  |
 |              |         "Int64(123)"                    |
 |              |       ],                                |
 |              |       "Plans": [                        |
 |              |         {                               |
 |              |           "Node Type": "EmptyRelation", |
-|              |           "Output": [],                 |
-|              |           "Plans": []                   |
+|              |           "Plans": [],                  |
+|              |           "Output": []                  |
 |              |         }                               |
+|              |       ],                                |
+|              |       "Output": [                       |
+|              |         "Int64(123)"                    |
 |              |       ]                                 |
 |              |     }                                   |
 |              |   }                                     |

From c6412628aabf6eea75cbb927f343ec1c184321b5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Apr 2026 12:52:08 +0000
Subject: [PATCH 08/12] chore(deps): bump aws-config from 1.8.15 to 1.8.16 in
 the all-other-cargo-deps group (#21760)

Bumps the all-other-cargo-deps group with 1 update:
[aws-config](https://github.com/smithy-lang/smithy-rs).

Updates `aws-config` from 1.8.15 to 1.8.16
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a
href="https://github.com/smithy-lang/smithy-rs/commits">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=aws-config&package-manager=cargo&previous-version=1.8.15&new-version=1.8.16)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore <dependency name> major version` will close this
group update PR and stop Dependabot creating any more for the specific
dependency's major version (unless you unignore this specific
dependency's major version or upgrade to it yourself)
- `@dependabot ignore <dependency name> minor version` will close this
group update PR and stop Dependabot creating any more for the specific
dependency's minor version (unless you unignore this specific
dependency's minor version or upgrade to it yourself)
- `@dependabot ignore <dependency name>` will close this group update PR
and stop Dependabot creating any more for the specific dependency
(unless you unignore this specific dependency or upgrade to it yourself)
- `@dependabot unignore <dependency name>` will remove all of the ignore
conditions of the specified dependency
- `@dependabot unignore <dependency name> <ignore condition>` will
remove the ignore condition of the specified dependency and ignore
conditions


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock                | 43 ++++++++++++++++-----------------------
 datafusion-cli/Cargo.toml |  2 +-
 2 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e1ba084184c36..eaac23828b72e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -596,9 +596,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "aws-config"
-version = "1.8.15"
+version = "1.8.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc"
+checksum = "50f156acdd2cf55f5aa53ee416c4ac851cf1222694506c0b1f78c85695e9ca9d"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -660,9 +660,9 @@ dependencies = [
 
 [[package]]
 name = "aws-runtime"
-version = "1.7.2"
+version = "1.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17"
+checksum = "5dcd93c82209ac7413532388067dce79be5a8780c1786e5fae3df22e4dee2864"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
@@ -685,9 +685,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.97.0"
+version = "1.98.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567"
+checksum = "d69c77aafa20460c68b6b3213c84f6423b6e76dbf89accd3e1789a686ffd9489"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -709,9 +709,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.99.0"
+version = "1.100.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8"
+checksum = "1c7e7b09346d5ca22a2a08267555843a6a0127fb20d8964cb6ecfb8fdb190225"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -733,9 +733,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.102.0"
+version = "1.103.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fc35b7a14cabdad13795fbbbd26d5ddec0882c01492ceedf2af575aad5f37dd"
+checksum = "c2249b81a2e73a8027c41c378463a81ec39b8510f184f2caab87de912af0f49b"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -758,9 +758,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "1.4.2"
+version = "1.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4"
+checksum = "68dc0b907359b120170613b5c09ccc61304eac3998ff6274b97d93ee6490115a"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-http",
@@ -769,11 +769,11 @@ dependencies = [
  "bytes",
  "form_urlencoded",
  "hex",
- "hmac 0.12.1",
+ "hmac",
  "http 0.2.12",
  "http 1.4.0",
  "percent-encoding",
- "sha2 0.10.9",
+ "sha2 0.11.0",
  "time",
  "tracing",
 ]
@@ -950,9 +950,9 @@ dependencies = [
 
 [[package]]
 name = "aws-types"
-version = "1.3.14"
+version = "1.3.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9"
+checksum = "2f4bbcaa9304ea40902d3d5f42a0428d1bd895a2b0f6999436fb279ffddc58ac"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -3385,15 +3385,6 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
-[[package]]
-name = "hmac"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
-dependencies = [
- "digest 0.10.7",
-]
-
 [[package]]
 name = "hmac"
 version = "0.13.0"
@@ -4760,7 +4751,7 @@ dependencies = [
  "byteorder",
  "bytes",
  "fallible-iterator",
- "hmac 0.13.0",
+ "hmac",
  "md-5 0.11.0",
  "memchr",
  "rand 0.10.1",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 19dc83d518b35..414b8c6444869 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -37,7 +37,7 @@ backtrace = ["datafusion/backtrace"]
 [dependencies]
 arrow = { workspace = true }
 async-trait = { workspace = true }
-aws-config = "1.8.14"
+aws-config = "1.8.16"
 aws-credential-types = "1.2.13"
 chrono = { workspace = true }
 clap = { version = "4.5.60", features = ["cargo", "derive"] }

From af67cdd409a79737d1179e429bd1fde1ea5a2bee Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Apr 2026 14:52:21 +0200
Subject: [PATCH 09/12] chore(deps): bump github/codeql-action from 4.35.1 to
 4.35.2 (#21758)

Bumps [github/codeql-action](https://github.com/github/codeql-action)
from 4.35.1 to 4.35.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/github/codeql-action/releases">github/codeql-action's
releases</a>.</em></p>
<blockquote>
<h2>v4.35.2</h2>
<ul>
<li>The undocumented TRAP cache cleanup feature that could be enabled
using the <code>CODEQL_ACTION_CLEANUP_TRAP_CACHES</code> environment
variable is deprecated and will be removed in May 2026. If you are
affected by this, we recommend disabling TRAP caching by passing the
<code>trap-caching: false</code> input to the <code>init</code> Action.
<a
href="https://redirect.github.com/github/codeql-action/pull/3795">#3795</a></li>
<li>The Git version 2.36.0 requirement for improved incremental analysis
now only applies to repositories that contain submodules. <a
href="https://redirect.github.com/github/codeql-action/pull/3789">#3789</a></li>
<li>Python analysis on GHES no longer extracts the standard library,
relying instead on models of the standard library. This should result in
significantly faster extraction and analysis times, while the effect on
alerts should be minimal. <a
href="https://redirect.github.com/github/codeql-action/pull/3794">#3794</a></li>
<li>Fixed a bug in the validation of OIDC configurations for private
registries that was added in CodeQL Action 4.33.0 / 3.33.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3807">#3807</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.2">2.25.2</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3823">#3823</a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/github/codeql-action/blob/main/CHANGELOG.md">github/codeql-action's
changelog</a>.</em></p>
<blockquote>
<h1>CodeQL Action Changelog</h1>
<p>See the <a
href="https://github.com/github/codeql-action/releases">releases
page</a> for the relevant changes to the CodeQL CLI and language
packs.</p>
<h2>[UNRELEASED]</h2>
<p>No user facing changes.</p>
<h2>4.35.2 - 15 Apr 2026</h2>
<ul>
<li>The undocumented TRAP cache cleanup feature that could be enabled
using the <code>CODEQL_ACTION_CLEANUP_TRAP_CACHES</code> environment
variable is deprecated and will be removed in May 2026. If you are
affected by this, we recommend disabling TRAP caching by passing the
<code>trap-caching: false</code> input to the <code>init</code> Action.
<a
href="https://redirect.github.com/github/codeql-action/pull/3795">#3795</a></li>
<li>The Git version 2.36.0 requirement for improved incremental analysis
now only applies to repositories that contain submodules. <a
href="https://redirect.github.com/github/codeql-action/pull/3789">#3789</a></li>
<li>Python analysis on GHES no longer extracts the standard library,
relying instead on models of the standard library. This should result in
significantly faster extraction and analysis times, while the effect on
alerts should be minimal. <a
href="https://redirect.github.com/github/codeql-action/pull/3794">#3794</a></li>
<li>Fixed a bug in the validation of OIDC configurations for private
registries that was added in CodeQL Action 4.33.0 / 3.33.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3807">#3807</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.2">2.25.2</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3823">#3823</a></li>
</ul>
<h2>4.35.1 - 27 Mar 2026</h2>
<ul>
<li>Fix incorrect minimum required Git version for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a>: it should have been 2.36.0, not 2.11.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3781">#3781</a></li>
</ul>
<h2>4.35.0 - 27 Mar 2026</h2>
<ul>
<li>Reduced the minimum Git version required for <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> from 2.38.0 to 2.11.0. <a
href="https://redirect.github.com/github/codeql-action/pull/3767">#3767</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.1">2.25.1</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3773">#3773</a></li>
</ul>
<h2>4.34.1 - 20 Mar 2026</h2>
<ul>
<li>Downgrade default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.24.3">2.24.3</a>
due to issues with a small percentage of Actions and JavaScript
analyses. <a
href="https://redirect.github.com/github/codeql-action/pull/3762">#3762</a></li>
</ul>
<h2>4.34.0 - 20 Mar 2026</h2>
<ul>
<li>Added an experimental change which disables TRAP caching when <a
href="https://redirect.github.com/github/roadmap/issues/1158">improved
incremental analysis</a> is enabled, since improved incremental analysis
supersedes TRAP caching. This will improve performance and reduce
Actions cache usage. We expect to roll this change out to everyone in
March. <a
href="https://redirect.github.com/github/codeql-action/pull/3569">#3569</a></li>
<li>We are rolling out improved incremental analysis to C/C++ analyses
that use build mode <code>none</code>. We expect this rollout to be
complete by the end of April 2026. <a
href="https://redirect.github.com/github/codeql-action/pull/3584">#3584</a></li>
<li>Update default CodeQL bundle version to <a
href="https://github.com/github/codeql-action/releases/tag/codeql-bundle-v2.25.0">2.25.0</a>.
<a
href="https://redirect.github.com/github/codeql-action/pull/3585">#3585</a></li>
</ul>
<h2>4.33.0 - 16 Mar 2026</h2>
<ul>
<li>
<p>Upcoming change: Starting April 2026, the CodeQL Action will skip
collecting file coverage information on pull requests to improve
analysis performance. File coverage information will still be computed
on non-PR analyses. Pull request analyses will log a warning about this
upcoming change. <a
href="https://redirect.github.com/github/codeql-action/pull/3562">#3562</a></p>
<p>To opt out of this change:</p>
<ul>
<li><strong>Repositories owned by an organization:</strong> Create a
custom repository property with the name
<code>github-codeql-file-coverage-on-prs</code> and the type
&quot;True/false&quot;, then set this property to <code>true</code> in
the repository's settings. For more information, see <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">Managing
custom properties for repositories in your organization</a>.
Alternatively, if you are using an advanced setup workflow, you can set
the <code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable
to <code>true</code> in your workflow.</li>
<li><strong>User-owned repositories using default setup:</strong> Switch
to an advanced setup workflow and set the
<code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable to
<code>true</code> in your workflow.</li>
<li><strong>User-owned repositories using advanced setup:</strong> Set
the <code>CODEQL_ACTION_FILE_COVERAGE_ON_PRS</code> environment variable
to <code>true</code> in your workflow.</li>
</ul>
</li>
<li>
<p>Fixed <a
href="https://redirect.github.com/github/codeql-action/issues/3555">a
bug</a> which caused the CodeQL Action to fail loading repository
properties if a &quot;Multi select&quot; repository property was
configured for the repository. <a
href="https://redirect.github.com/github/codeql-action/pull/3557">#3557</a></p>
</li>
<li>
<p>The CodeQL Action now loads <a
href="https://docs.github.com/en/organizations/managing-organization-settings/managing-custom-properties-for-repositories-in-your-organization">custom
repository properties</a> on GitHub Enterprise Server, enabling the
customization of features such as
<code>github-codeql-disable-overlay</code> that was previously only
available on GitHub.com. <a
href="https://redirect.github.com/github/codeql-action/pull/3559">#3559</a></p>
</li>
<li>
<p>Once <a
href="https://docs.github.com/en/code-security/how-tos/secure-at-scale/configure-organization-security/manage-usage-and-access/giving-org-access-private-registries">private
package registries</a> can be configured with OIDC-based authentication
for organizations, the CodeQL Action will now be able to accept such
configurations. <a
href="https://redirect.github.com/github/codeql-action/pull/3563">#3563</a></p>
</li>
<li>
<p>Fixed the retry mechanism for database uploads. Previously this would
fail with the error &quot;Response body object should not be disturbed
or locked&quot;. <a
href="https://redirect.github.com/github/codeql-action/pull/3564">#3564</a></p>
</li>
<li>
<p>A warning is now emitted if the CodeQL Action detects a repository
property whose name suggests that it relates to the CodeQL Action, but
which is not one of the properties recognised by the current version of
the CodeQL Action. <a
href="https://redirect.github.com/github/codeql-action/pull/3570">#3570</a></p>
</li>
</ul>
<h2>4.32.6 - 05 Mar 2026</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/github/codeql-action/commit/95e58e9a2cdfd71adc6e0353d5c52f41a045d225"><code>95e58e9</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3824">#3824</a>
from github/update-v4.35.2-d2e135a73</li>
<li><a
href="https://github.com/github/codeql-action/commit/6f31bfe060e817d81e938dbec767969d20031e25"><code>6f31bfe</code></a>
Update changelog for v4.35.2</li>
<li><a
href="https://github.com/github/codeql-action/commit/d2e135a73a39154e3a231aeb49163c4661c5b8b1"><code>d2e135a</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3823">#3823</a>
from github/update-bundle/codeql-bundle-v2.25.2</li>
<li><a
href="https://github.com/github/codeql-action/commit/60abb65df09fcf213c398e064c8a80db1f15cdaf"><code>60abb65</code></a>
Add changelog note</li>
<li><a
href="https://github.com/github/codeql-action/commit/5a0a562209255e956ad8aafcee303294e64eefa2"><code>5a0a562</code></a>
Update default bundle to codeql-bundle-v2.25.2</li>
<li><a
href="https://github.com/github/codeql-action/commit/65216971a11ded447a6b76263d5a144519e5eee1"><code>6521697</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3820">#3820</a>
from github/dependabot/github_actions/dot-github/wor...</li>
<li><a
href="https://github.com/github/codeql-action/commit/3c45af2dd258e1623af1898da5c86545b514e028"><code>3c45af2</code></a>
Merge pull request <a
href="https://redirect.github.com/github/codeql-action/issues/3821">#3821</a>
from github/dependabot/npm_and_yarn/npm-minor-345b93...</li>
<li><a
href="https://github.com/github/codeql-action/commit/f1c339364c12f922998186ed897e45e3b4ae8874"><code>f1c3393</code></a>
Rebuild</li>
<li><a
href="https://github.com/github/codeql-action/commit/1024fc496c87e944a93e98d8cf2c09e2c7602a30"><code>1024fc4</code></a>
Rebuild</li>
<li><a
href="https://github.com/github/codeql-action/commit/9dd4cfed96030ccdfe1af4daf7a7964322704fed"><code>9dd4cfe</code></a>
Bump the npm-minor group across 1 directory with 6 updates</li>
<li>Additional commits viewable in <a
href="https://github.com/github/codeql-action/compare/c10b8064de6f491fea524254123dbe5e09572f13...95e58e9a2cdfd71adc6e0353d5c52f41a045d225">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github/codeql-action&package-manager=github_actions&previous-version=4.35.1&new-version=4.35.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/codeql.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 920e1e79c8540..70d38b28112de 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -45,11 +45,11 @@ jobs:
         persist-credentials: false
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@c10b8064de6f491fea524254123dbe5e09572f13 # v4
+      uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4
       with:
         languages: actions
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@c10b8064de6f491fea524254123dbe5e09572f13 # v4
+      uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4
       with:
         category: "/language:actions"

From 9a1ed57859398e36bb09b0af07ac20422f86b151 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Apr 2026 15:59:42 +0300
Subject: [PATCH 10/12] chore(deps): bump taiki-e/install-action from 2.75.10
 to 2.75.18 (#21757)

Bumps
[taiki-e/install-action](https://github.com/taiki-e/install-action) from
2.75.10 to 2.75.18.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's
releases</a>.</em></p>
<blockquote>
<h2>2.75.18</h2>
<ul>
<li>
<p>Update <code>vacuum@latest</code> to 0.26.1.</p>
</li>
<li>
<p>Update <code>wasm-tools@latest</code> to 1.247.0.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.16.</p>
</li>
<li>
<p>Update <code>espup@latest</code> to 0.17.1.</p>
</li>
<li>
<p>Update <code>trivy@latest</code> to 0.70.0.</p>
</li>
</ul>
<h2>2.75.17</h2>
<ul>
<li>
<p>Update <code>tombi@latest</code> to 0.9.18.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.15.</p>
</li>
</ul>
<h2>2.75.16</h2>
<ul>
<li>
<p>Update <code>uv@latest</code> to 0.11.7.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.14.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.25.9.</p>
</li>
<li>
<p>Update <code>cargo-machete@latest</code> to 0.9.2.</p>
</li>
<li>
<p>Update <code>cargo-deny@latest</code> to 0.19.4.</p>
</li>
</ul>
<h2>2.75.15</h2>
<ul>
<li>
<p>Update <code>cargo-nextest@latest</code> to 0.9.133.</p>
</li>
<li>
<p>Update <code>biome@latest</code> to 2.4.12.</p>
</li>
</ul>
<h2>2.75.14</h2>
<ul>
<li>
<p>Implement potential workaround for <a
href="https://redirect.github.com/actions/partner-runner-images/issues/169">windows-11-arm
runner bug</a> which sometimes causes installation failure.</p>
<p>The issue where this bug affected the startup of bash was addressed
in 2.71.2, but we received a report that the <a
href="https://redirect.github.com/taiki-e/install-action/pull/1657#issuecomment-4252717651">same
problem seems to occur when starting other commands as well</a>.</p>
</li>
<li>
<p>Update <code>cargo-deny@latest</code> to 0.19.2.</p>
</li>
</ul>
<h2>2.75.13</h2>
<ul>
<li>Update <code>zizmor@latest</code> to 1.24.1.</li>
</ul>
<h2>2.75.12</h2>
<ul>
<li>
<p>Update <code>typos@latest</code> to 1.45.1.</p>
</li>
<li>
<p>Update <code>cargo-xwin@latest</code> to 0.21.5.</p>
</li>
<li>
<p>Update <code>cargo-binstall@latest</code> to 1.18.1.</p>
</li>
</ul>
<h2>2.75.11</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<p>All notable changes to this project will be documented in this
file.</p>
<p>This project adheres to <a href="https://semver.org">Semantic
Versioning</a>.</p>
<!-- raw HTML omitted -->
<h2>[Unreleased]</h2>
<ul>
<li>
<p>Update <code>tombi@latest</code> to 0.9.20.</p>
</li>
<li>
<p>Update <code>martin@latest</code> to 1.6.0.</p>
</li>
<li>
<p>Update <code>just@latest</code> to 1.50.0.</p>
</li>
<li>
<p>Update <code>tombi@latest</code> to 0.9.19.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.18.</p>
</li>
<li>
<p>Update <code>rclone@latest</code> to 1.73.5.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.17.</p>
</li>
</ul>
<h2>[2.75.18] - 2026-04-19</h2>
<ul>
<li>
<p>Update <code>vacuum@latest</code> to 0.26.1.</p>
</li>
<li>
<p>Update <code>wasm-tools@latest</code> to 1.247.0.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.16.</p>
</li>
<li>
<p>Update <code>espup@latest</code> to 0.17.1.</p>
</li>
<li>
<p>Update <code>trivy@latest</code> to 0.70.0.</p>
</li>
</ul>
<h2>[2.75.17] - 2026-04-17</h2>
<ul>
<li>
<p>Update <code>tombi@latest</code> to 0.9.18.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.15.</p>
</li>
</ul>
<h2>[2.75.16] - 2026-04-17</h2>
<ul>
<li>
<p>Update <code>uv@latest</code> to 0.11.7.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2026.4.14.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/taiki-e/install-action/commit/055f5df8c3f65ea01cd41e9dc855becd88953486"><code>055f5df</code></a>
Release 2.75.18</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/eabf60349346950549ed65f6beb018b4680f7968"><code>eabf603</code></a>
Add note about unset</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/4637b48a5ac188fd1395ec47093a2f53f6e1a2b3"><code>4637b48</code></a>
Early handle inputs</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/7a6306ece23f52d1c9356f8fe0d0dd0f791c7825"><code>7a6306e</code></a>
Update <code>vacuum@latest</code> to 0.26.1</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/cb13f5ef5263e03d2a7c5675b24ba8374dab72b4"><code>cb13f5e</code></a>
Update mise manifest</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/18cc1a4fb7bd8a9c7c6fc69fda6c5b6b6c477b3c"><code>18cc1a4</code></a>
Update <code>wasm-tools@latest</code> to 1.247.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/c7b05077fec4d0c69ebf2b84456491ae0e31295d"><code>c7b0507</code></a>
Update <code>mise@latest</code> to 2026.4.16</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/0ef4e7650f60cd0dce197648e865d433e0a15151"><code>0ef4e76</code></a>
Update <code>espup@latest</code> to 0.17.1</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/56ec35f1c0ea059ed79d67351a8376410b7a3c87"><code>56ec35f</code></a>
Update <code>trivy@latest</code> to 0.70.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/6874db14a159fb7865d830a7d60c4414d45c4031"><code>6874db1</code></a>
Update vacuum manifest</li>
<li>Additional commits viewable in <a
href="https://github.com/taiki-e/install-action/compare/85b24a67ef0c632dfefad70b9d5ce8fddb040754...055f5df8c3f65ea01cd41e9dc855becd88953486">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=2.75.10&new-version=2.75.18)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/audit.yml | 2 +-
 .github/workflows/rust.yml  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index d64fba6c22113..b3c1e10584a0e 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
       - name: Install cargo-audit
-        uses: taiki-e/install-action@85b24a67ef0c632dfefad70b9d5ce8fddb040754  # v2.75.10
+        uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486  # v2.75.18
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index b176535616546..5ef886c66f0ef 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -430,7 +430,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@85b24a67ef0c632dfefad70b9d5ce8fddb040754  # v2.75.10
+        uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486  # v2.75.18
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -770,7 +770,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@85b24a67ef0c632dfefad70b9d5ce8fddb040754  # v2.75.10
+        uses: taiki-e/install-action@055f5df8c3f65ea01cd41e9dc855becd88953486  # v2.75.18
         with:
           tool: cargo-msrv
 

From ff805cf67ecb5378dbabb3b561100c5ed3ef2b42 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Mon, 20 Apr 2026 18:42:53 -0500
Subject: [PATCH 11/12] feat: split Parquet files into row-group-sized morsels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each Parquet file previously produced a single morsel containing one
`ParquetPushDecoder` over the full pruned `ParquetAccessPlan`. Morselize
at row-group granularity instead: after all pruning work is done, pack
surviving row groups into chunks bounded by a per-morsel row budget and
compressed-byte budget (defaults: 100k rows, 64 MiB). Each chunk becomes
its own stream so the executor can interleave row-group decode work with
other operators and — in a follow-up — let sibling `FileStream`s steal
row-group-sized units of work across partitions.

A single oversized row group still becomes its own morsel; no
sub-row-group splitting is introduced.

`EarlyStoppingStream` (which is driven by the non-Clone `FilePruner`) is
attached only to the first morsel's stream so the whole file can still
short-circuit on dynamic-filter narrowing. Row-group reversal is applied
per-chunk on the `PreparedAccessPlan` and the chunk list is reversed so
reverse output order is preserved.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../datasource-parquet/src/access_plan.rs     | 245 +++++++
 datafusion/datasource-parquet/src/opener.rs   | 607 ++++++++++++++----
 datafusion/datasource-parquet/src/source.rs   |   3 +
 3 files changed, 718 insertions(+), 137 deletions(-)

diff --git a/datafusion/datasource-parquet/src/access_plan.rs b/datafusion/datasource-parquet/src/access_plan.rs
index ca4d097c37a44..32d9795d605de 100644
--- a/datafusion/datasource-parquet/src/access_plan.rs
+++ b/datafusion/datasource-parquet/src/access_plan.rs
@@ -349,6 +349,77 @@ impl ParquetAccessPlan {
 
         PreparedAccessPlan::new(row_group_indexes, row_selection)
     }
+
+    /// Split this plan into an ordered list of sub-plans ("chunks"), each of
+    /// which represents a contiguous prefix of work packed together.
+    ///
+    /// Each returned plan has the same `len()` as `self`. Row groups outside
+    /// the chunk are set to [`RowGroupAccess::Skip`]; row groups inside the
+    /// chunk keep their original [`RowGroupAccess`].
+    ///
+    /// Chunks are formed by walking `self.row_groups` in order and grouping
+    /// consecutive entries with `should_scan() == true`. A new chunk is started
+    /// whenever adding the next scannable row group would push the accumulated
+    /// row count past `max_rows` or compressed byte size past `max_bytes`. A
+    /// single row group that already exceeds either limit becomes its own
+    /// chunk (no sub-row-group split is performed).
+    ///
+    /// [`RowGroupAccess::Skip`] entries are carried silently in whichever chunk
+    /// is active at that point; they contribute no rows or bytes.
+    ///
+    /// If there are no scannable row groups, the result is empty.
+    pub(crate) fn split_into_chunks(
+        self,
+        row_group_meta_data: &[RowGroupMetaData],
+        max_rows: u64,
+        max_bytes: u64,
+    ) -> Vec<ParquetAccessPlan> {
+        assert_eq!(self.row_groups.len(), row_group_meta_data.len());
+
+        let len = self.row_groups.len();
+        let mut chunks: Vec<ParquetAccessPlan> = Vec::new();
+        let mut current: Option<(ParquetAccessPlan, u64, u64)> = None;
+
+        for (idx, access) in self.row_groups.into_iter().enumerate() {
+            if !access.should_scan() {
+                // Skip entries are attached to the currently open chunk (if
+                // any) so they do not force a chunk boundary. They contribute
+                // zero rows/bytes.
+                if let Some((plan, _, _)) = current.as_mut() {
+                    plan.row_groups[idx] = access;
+                }
+                continue;
+            }
+
+            let rg_meta = &row_group_meta_data[idx];
+            let rg_rows = rg_meta.num_rows().max(0) as u64;
+            let rg_bytes = rg_meta.compressed_size().max(0) as u64;
+
+            if let Some((plan, acc_rows, acc_bytes)) = current.as_mut() {
+                let exceeds = acc_rows.saturating_add(rg_rows) > max_rows
+                    || acc_bytes.saturating_add(rg_bytes) > max_bytes;
+                if exceeds {
+                    chunks.push(current.take().unwrap().0);
+                } else {
+                    plan.row_groups[idx] = access;
+                    *acc_rows += rg_rows;
+                    *acc_bytes += rg_bytes;
+                    continue;
+                }
+            }
+
+            // Start a new chunk with this row group.
+            let mut plan = ParquetAccessPlan::new_none(len);
+            plan.row_groups[idx] = access;
+            current = Some((plan, rg_rows, rg_bytes));
+        }
+
+        if let Some((plan, _, _)) = current {
+            chunks.push(plan);
+        }
+
+        chunks
+    }
 }
 
 /// Represents a prepared, fully resolved [`ParquetAccessPlan`]
@@ -600,6 +671,180 @@ mod test {
             .collect()
     });
 
+    /// Build metadata for row groups with the given `(num_rows, compressed_bytes)`
+    /// pairs. Returned metadata has one `BYTE_ARRAY` column per row group.
+    fn row_groups_with_bytes(specs: &[(i64, i64)]) -> Vec<RowGroupMetaData> {
+        let schema_descr = get_test_schema_descr();
+        specs
+            .iter()
+            .map(|(num_rows, compressed)| {
+                let column = ColumnChunkMetaData::builder(schema_descr.column(0))
+                    .set_num_values(*num_rows)
+                    .set_total_compressed_size(*compressed)
+                    .build()
+                    .unwrap();
+
+                RowGroupMetaData::builder(schema_descr.clone())
+                    .set_num_rows(*num_rows)
+                    .set_column_metadata(vec![column])
+                    .build()
+                    .unwrap()
+            })
+            .collect()
+    }
+
+    fn access_kinds(plan: &ParquetAccessPlan) -> Vec<&'static str> {
+        plan.inner()
+            .iter()
+            .map(|rg| match rg {
+                RowGroupAccess::Skip => "skip",
+                RowGroupAccess::Scan => "scan",
+                RowGroupAccess::Selection(_) => "sel",
+            })
+            .collect()
+    }
+
+    #[test]
+    fn test_split_into_chunks_empty() {
+        let plan = ParquetAccessPlan::new(vec![]);
+        let chunks = plan.split_into_chunks(&[], 1000, 1000);
+        assert!(chunks.is_empty());
+    }
+
+    #[test]
+    fn test_split_into_chunks_all_skip() {
+        let meta = row_groups_with_bytes(&[(100, 1_000), (100, 1_000)]);
+        let plan = ParquetAccessPlan::new_none(2);
+        let chunks = plan.split_into_chunks(&meta, 1000, 10_000);
+        assert!(chunks.is_empty());
+    }
+
+    #[test]
+    fn test_split_into_chunks_one_per_row_group() {
+        // Each row group is already at the per-morsel limit, so each becomes
+        // its own chunk.
+        let meta = row_groups_with_bytes(&[(100, 1_000), (100, 1_000), (100, 1_000)]);
+        let plan = ParquetAccessPlan::new_all(3);
+        let chunks = plan.split_into_chunks(&meta, 100, 1_000);
+        assert_eq!(chunks.len(), 3);
+        assert_eq!(access_kinds(&chunks[0]), vec!["scan", "skip", "skip"]);
+        assert_eq!(access_kinds(&chunks[1]), vec!["skip", "scan", "skip"]);
+        assert_eq!(access_kinds(&chunks[2]), vec!["skip", "skip", "scan"]);
+    }
+
+    #[test]
+    fn test_split_into_chunks_packs_small() {
+        // Three small row groups fit within one chunk by rows AND bytes.
+        let meta = row_groups_with_bytes(&[(30, 100), (30, 100), (30, 100)]);
+        let plan = ParquetAccessPlan::new_all(3);
+        let chunks = plan.split_into_chunks(&meta, 100, 1_000);
+        assert_eq!(chunks.len(), 1);
+        assert_eq!(access_kinds(&chunks[0]), vec!["scan", "scan", "scan"]);
+    }
+
+    #[test]
+    fn test_split_into_chunks_oversized_single() {
+        // First row group alone exceeds max_rows; still becomes its own chunk
+        // (no sub-row-group split).
+        let meta = row_groups_with_bytes(&[(1_000, 100), (10, 100), (10, 100)]);
+        let plan = ParquetAccessPlan::new_all(3);
+        let chunks = plan.split_into_chunks(&meta, 100, 10_000);
+        assert_eq!(chunks.len(), 2);
+        assert_eq!(access_kinds(&chunks[0]), vec!["scan", "skip", "skip"]);
+        assert_eq!(access_kinds(&chunks[1]), vec!["skip", "scan", "scan"]);
+    }
+
+    #[test]
+    fn test_split_into_chunks_respects_bytes() {
+        // All row groups are small in rows but the second one is big enough
+        // that it must start a new chunk on byte budget alone.
+        let meta = row_groups_with_bytes(&[(10, 500), (10, 600), (10, 100), (10, 100)]);
+        let plan = ParquetAccessPlan::new_all(4);
+        let chunks = plan.split_into_chunks(&meta, 1_000_000, 1_000);
+        assert_eq!(chunks.len(), 2);
+        assert_eq!(
+            access_kinds(&chunks[0]),
+            vec!["scan", "skip", "skip", "skip"]
+        );
+        assert_eq!(
+            access_kinds(&chunks[1]),
+            vec!["skip", "scan", "scan", "scan"]
+        );
+    }
+
+    #[test]
+    fn test_split_into_chunks_with_skip_preserved() {
+        // Skip entries are carried by whichever chunk is currently being
+        // grown and never contribute to the row/byte budget, so here all
+        // three scan row groups fit together despite the wide skip in the
+        // middle.
+        let meta =
+            row_groups_with_bytes(&[(30, 100), (1_000, 500), (30, 100), (30, 100)]);
+        let plan = ParquetAccessPlan::new(vec![
+            RowGroupAccess::Scan,
+            RowGroupAccess::Skip,
+            RowGroupAccess::Scan,
+            RowGroupAccess::Scan,
+        ]);
+        let chunks = plan.split_into_chunks(&meta, 100, 1_000);
+        assert_eq!(chunks.len(), 1);
+        assert_eq!(
+            access_kinds(&chunks[0]),
+            vec!["scan", "skip", "scan", "scan"]
+        );
+    }
+
+    #[test]
+    fn test_split_into_chunks_skip_between_chunks() {
+        // When a chunk closes on budget, a following Skip is picked up by the
+        // next chunk rather than creating an empty one.
+        let meta = row_groups_with_bytes(&[(50, 100), (50, 100), (50, 100), (50, 100)]);
+        let plan = ParquetAccessPlan::new(vec![
+            RowGroupAccess::Scan,
+            RowGroupAccess::Scan,
+            RowGroupAccess::Skip,
+            RowGroupAccess::Scan,
+        ]);
+        let chunks = plan.split_into_chunks(&meta, 100, 10_000);
+        assert_eq!(chunks.len(), 2);
+        assert_eq!(
+            access_kinds(&chunks[0]),
+            vec!["scan", "scan", "skip", "skip"]
+        );
+        // rg2's Skip still lives in chunk 0 because chunk 0 was still open
+        // when we hit rg2; chunk 1 only covers rg3.
+        assert_eq!(
+            access_kinds(&chunks[1]),
+            vec!["skip", "skip", "skip", "scan"]
+        );
+    }
+
+    #[test]
+    fn test_split_into_chunks_preserves_selection() {
+        let meta = row_groups_with_bytes(&[(10, 100), (20, 100), (30, 100)]);
+        let selection: RowSelection =
+            vec![RowSelector::select(5), RowSelector::skip(15)].into();
+        let plan = ParquetAccessPlan::new(vec![
+            RowGroupAccess::Scan,
+            RowGroupAccess::Selection(selection),
+            RowGroupAccess::Scan,
+        ]);
+        // Budget forces each row group into its own chunk.
+        let chunks = plan.split_into_chunks(&meta, 15, 10_000);
+        assert_eq!(chunks.len(), 3);
+        assert_eq!(access_kinds(&chunks[0]), vec!["scan", "skip", "skip"]);
+        assert_eq!(access_kinds(&chunks[1]), vec!["skip", "sel", "skip"]);
+        assert_eq!(access_kinds(&chunks[2]), vec!["skip", "skip", "scan"]);
+        // The Selection must be preserved verbatim in its chunk.
+        let RowGroupAccess::Selection(sel) = &chunks[1].inner()[1] else {
+            panic!("expected Selection preserved in chunk");
+        };
+        let selectors: Vec<_> = sel.clone().into();
+        assert_eq!(selectors.len(), 2);
+        assert_eq!((selectors[0].skip, selectors[0].row_count), (false, 5));
+        assert_eq!((selectors[1].skip, selectors[1].row_count), (true, 15));
+    }
+
     /// Single column schema with a single column named "a" of type `BYTE_ARRAY`/`String`
     fn get_test_schema_descr() -> SchemaDescPtr {
         use parquet::basic::Type as PhysicalType;
diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index bad1c684b47f5..7dac3e4a1cbd7 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -77,6 +77,15 @@ use parquet::basic::Type;
 use parquet::bloom_filter::Sbbf;
 use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader};
 
+/// Default soft upper bound on the number of rows packed into a single
+/// row-group morsel. Adjacent row groups are coalesced until this limit would
+/// be exceeded. A single oversized row group still becomes its own morsel.
+pub(crate) const DEFAULT_MORSEL_MAX_ROWS: u64 = 100_000;
+
+/// Default soft upper bound on the compressed byte size of a single row-group
+/// morsel. See [`DEFAULT_MORSEL_MAX_ROWS`].
+pub(crate) const DEFAULT_MORSEL_MAX_COMPRESSED_BYTES: u64 = 64 * 1024 * 1024;
+
 /// Stateless Parquet morselizer implementation.
 ///
 /// Reading a Parquet file is a multi-stage process, with multiple CPU-intensive
@@ -136,6 +145,15 @@ pub(super) struct ParquetMorselizer {
     pub max_predicate_cache_size: Option<usize>,
     /// Whether to read row groups in reverse order
     pub reverse_row_groups: bool,
+    /// Upper bound on the number of rows coalesced into a single morsel.
+    ///
+    /// Row groups are packed greedily until the next row group would push
+    /// the total past this limit; a single oversized row group still becomes
+    /// its own morsel.
+    pub morsel_max_rows: u64,
+    /// Upper bound on the compressed byte size coalesced into a single
+    /// morsel. See [`Self::morsel_max_rows`].
+    pub morsel_max_compressed_bytes: u64,
 }
 
 impl fmt::Debug for ParquetMorselizer {
@@ -228,8 +246,12 @@ enum ParquetOpenState {
     ///
     /// TODO: split state as this currently does both I/O and CPU work.
     BuildStream(Box<RowGroupsPrunedParquetOpen>),
-    /// Terminal state: the final opened stream is ready to return.
-    Ready(BoxStream<'static, Result<RecordBatch>>),
+    /// Terminal state: one or more per-morsel streams are ready to return.
+    ///
+    /// Each stream corresponds to one row-group-sized chunk of the file and
+    /// will be wrapped in a [`ParquetStreamMorsel`] so sibling
+    /// `FileStream`s can consume them independently.
+    Ready(Vec<BoxStream<'static, Result<RecordBatch>>>),
     /// Terminal state: reading complete
     Done,
 }
@@ -287,6 +309,8 @@ struct PreparedParquetOpen {
     max_predicate_cache_size: Option<usize>,
     reverse_row_groups: bool,
     preserve_order: bool,
+    morsel_max_rows: u64,
+    morsel_max_compressed_bytes: u64,
     #[cfg(feature = "parquet_encryption")]
     file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
 }
@@ -399,7 +423,7 @@ impl ParquetOpenState {
             ParquetOpenState::BuildStream(prepared) => {
                 Ok(ParquetOpenState::Ready(prepared.build_stream()?))
             }
-            ParquetOpenState::Ready(stream) => Ok(ParquetOpenState::Ready(stream)),
+            ParquetOpenState::Ready(streams) => Ok(ParquetOpenState::Ready(streams)),
             ParquetOpenState::Done => {
                 panic!("ParquetOpenFuture polled after completion");
             }
@@ -515,9 +539,18 @@ impl MorselPlanner for ParquetMorselPlanner {
                     )))
                 })))
             }
-            ParquetOpenState::Ready(stream) => {
-                let morsels: Vec<Box<dyn Morsel>> =
-                    vec![Box::new(ParquetStreamMorsel::new(stream))];
+            ParquetOpenState::Ready(streams) => {
+                if streams.is_empty() {
+                    // No row groups survived pruning, so there's nothing to
+                    // feed the executor — terminate this file's planner.
+                    return Ok(None);
+                }
+                let morsels: Vec<Box<dyn Morsel>> = streams
+                    .into_iter()
+                    .map(|stream| {
+                        Box::new(ParquetStreamMorsel::new(stream)) as Box<dyn Morsel>
+                    })
+                    .collect();
                 Ok(Some(MorselPlan::new().with_morsels(morsels)))
             }
             ParquetOpenState::Done => Ok(None),
@@ -656,6 +689,8 @@ impl ParquetMorselizer {
             max_predicate_cache_size: self.max_predicate_cache_size,
             reverse_row_groups: self.reverse_row_groups,
             preserve_order: self.preserve_order,
+            morsel_max_rows: self.morsel_max_rows,
+            morsel_max_compressed_bytes: self.morsel_max_compressed_bytes,
             #[cfg(feature = "parquet_encryption")]
             file_decryption_properties: None,
         })
@@ -1055,8 +1090,15 @@ impl BloomFiltersLoadedParquetOpen {
 }
 
 impl RowGroupsPrunedParquetOpen {
-    /// Build the final parquet stream once all pruning work is complete.
-    fn build_stream(self) -> Result<BoxStream<'static, Result<RecordBatch>>> {
+    /// Build one or more per-morsel streams once all pruning work is complete.
+    ///
+    /// Row groups are packed into chunks of up to [`MORSEL_MAX_ROWS`] rows and
+    /// [`MORSEL_MAX_COMPRESSED_BYTES`] compressed bytes. Each chunk becomes an
+    /// independent stream that can be wrapped in a `ParquetStreamMorsel`,
+    /// letting the driver interleave row-group work with other operators and
+    /// unblocking the follow-on work of sharing row-group-level work across
+    /// sibling `FileStream`s.
+    fn build_stream(self) -> Result<Vec<BoxStream<'static, Result<RecordBatch>>>> {
         let RowGroupsPrunedParquetOpen {
             prepared,
             mut row_groups,
@@ -1075,32 +1117,6 @@ impl RowGroupsPrunedParquetOpen {
         let file_metadata = Arc::clone(reader_metadata.metadata());
         let rg_metadata = file_metadata.row_groups();
 
-        // Filter pushdown: evaluate predicates during scan
-        let row_filter = if let Some(predicate) = prepared
-            .pushdown_filters
-            .then_some(prepared.predicate.clone())
-            .flatten()
-        {
-            let row_filter = row_filter::build_row_filter(
-                &predicate,
-                &prepared.physical_file_schema,
-                file_metadata.as_ref(),
-                prepared.reorder_predicates,
-                &prepared.file_metrics,
-            );
-
-            match row_filter {
-                Ok(Some(filter)) => Some(filter),
-                Ok(None) => None,
-                Err(e) => {
-                    debug!("Ignoring error building row filter for '{predicate:?}': {e}");
-                    None
-                }
-            }
-        } else {
-            None
-        };
-
         // Prune by limit if limit is set and limit order is not sensitive
         if let (Some(limit), false) = (prepared.limit, prepared.preserve_order) {
             row_groups.prune_by_limit(limit, rg_metadata, &prepared.file_metrics);
@@ -1123,98 +1139,170 @@ impl RowGroupsPrunedParquetOpen {
             );
         }
 
-        // Prepare the access plan (extract row groups and row selection)
-        let mut prepared_plan = access_plan.prepare(rg_metadata)?;
+        if access_plan.row_group_index_iter().next().is_none() {
+            return Ok(Vec::new());
+        }
+
+        let mut chunk_plans = access_plan.split_into_chunks(
+            rg_metadata,
+            prepared.morsel_max_rows,
+            prepared.morsel_max_compressed_bytes,
+        );
 
-        // Potentially reverse the access plan for performance.
-        // See `ParquetSource::try_pushdown_sort` for the rationale.
+        // Reverse chunk order so that, when `reverse_row_groups` is set, the
+        // first emitted morsel corresponds to the file's last row groups.
+        // Each chunk's `PreparedAccessPlan` is also reversed below so that
+        // within a chunk the row-group read order mirrors the file-wide
+        // reversal. See `ParquetSource::try_pushdown_sort` for the rationale.
         if prepared.reverse_row_groups {
-            prepared_plan = prepared_plan.reverse(file_metadata.as_ref())?;
+            chunk_plans.reverse();
         }
 
-        let arrow_reader_metrics = ArrowReaderMetrics::enabled();
-        let read_plan = build_projection_read_plan(
-            prepared.projection.expr_iter(),
-            &prepared.physical_file_schema,
-            reader_metadata.parquet_schema(),
-        );
+        // The reader that was used for metadata / page index / bloom filter
+        // loads may have warmed object-store caches. Hand it to the first
+        // chunk so no work is wasted; mint fresh readers for the rest from
+        // the same factory.
+        let mut initial_reader: Option<Box<dyn AsyncFileReader>> =
+            Some(prepared.async_file_reader);
+        let mut file_pruner = prepared.file_pruner;
+        let mut streams: Vec<BoxStream<'static, Result<RecordBatch>>> =
+            Vec::with_capacity(chunk_plans.len());
+
+        for (chunk_idx, chunk_plan) in chunk_plans.into_iter().enumerate() {
+            let mut prepared_plan = chunk_plan.prepare(rg_metadata)?;
+            if prepared.reverse_row_groups {
+                prepared_plan = prepared_plan.reverse(file_metadata.as_ref())?;
+            }
+
+            // `RowFilter` is not `Clone` because it owns `Box<dyn ArrowPredicate>`s,
+            // so a fresh filter has to be built per chunk.
+            let row_filter = if let Some(predicate) = prepared
+                .pushdown_filters
+                .then_some(prepared.predicate.clone())
+                .flatten()
+            {
+                match row_filter::build_row_filter(
+                    &predicate,
+                    &prepared.physical_file_schema,
+                    file_metadata.as_ref(),
+                    prepared.reorder_predicates,
+                    &prepared.file_metrics,
+                ) {
+                    Ok(Some(filter)) => Some(filter),
+                    Ok(None) => None,
+                    Err(e) => {
+                        debug!(
+                            "Ignoring error building row filter for '{predicate:?}': {e}"
+                        );
+                        None
+                    }
+                }
+            } else {
+                None
+            };
 
-        let mut decoder_builder =
-            ParquetPushDecoderBuilder::new_with_metadata(reader_metadata)
-                .with_projection(read_plan.projection_mask)
-                .with_batch_size(prepared.batch_size)
-                .with_metrics(arrow_reader_metrics.clone());
+            let arrow_reader_metrics = ArrowReaderMetrics::enabled();
+            let read_plan = build_projection_read_plan(
+                prepared.projection.expr_iter(),
+                &prepared.physical_file_schema,
+                reader_metadata.parquet_schema(),
+            );
 
-        if let Some(row_filter) = row_filter {
-            decoder_builder = decoder_builder.with_row_filter(row_filter);
-        }
-        if prepared.force_filter_selections {
-            decoder_builder =
-                decoder_builder.with_row_selection_policy(RowSelectionPolicy::Selectors);
-        }
-        if let Some(row_selection) = prepared_plan.row_selection {
-            decoder_builder = decoder_builder.with_row_selection(row_selection);
-        }
-        decoder_builder =
-            decoder_builder.with_row_groups(prepared_plan.row_group_indexes);
-        if let Some(limit) = prepared.limit {
-            decoder_builder = decoder_builder.with_limit(limit);
-        }
-        if let Some(max_predicate_cache_size) = prepared.max_predicate_cache_size {
+            let mut decoder_builder =
+                ParquetPushDecoderBuilder::new_with_metadata(reader_metadata.clone())
+                    .with_projection(read_plan.projection_mask)
+                    .with_batch_size(prepared.batch_size)
+                    .with_metrics(arrow_reader_metrics.clone());
+
+            if let Some(row_filter) = row_filter {
+                decoder_builder = decoder_builder.with_row_filter(row_filter);
+            }
+            if prepared.force_filter_selections {
+                decoder_builder = decoder_builder
+                    .with_row_selection_policy(RowSelectionPolicy::Selectors);
+            }
+            if let Some(row_selection) = prepared_plan.row_selection {
+                decoder_builder = decoder_builder.with_row_selection(row_selection);
+            }
             decoder_builder =
-                decoder_builder.with_max_predicate_cache_size(max_predicate_cache_size);
-        }
+                decoder_builder.with_row_groups(prepared_plan.row_group_indexes);
+            // `ScanState.remain` enforces the true outer limit across all
+            // morsels; passing the per-chunk limit here is a conservative
+            // per-chunk cap that bounds wasted decode once the outer cap is
+            // hit.
+            if let Some(limit) = prepared.limit {
+                decoder_builder = decoder_builder.with_limit(limit);
+            }
+            if let Some(max_predicate_cache_size) = prepared.max_predicate_cache_size {
+                decoder_builder = decoder_builder
+                    .with_max_predicate_cache_size(max_predicate_cache_size);
+            }
 
-        let decoder = decoder_builder.build()?;
-
-        let predicate_cache_inner_records =
-            prepared.file_metrics.predicate_cache_inner_records.clone();
-        let predicate_cache_records =
-            prepared.file_metrics.predicate_cache_records.clone();
-
-        // Check if we need to replace the schema to handle things like differing nullability or metadata.
-        // See note below about file vs. output schema.
-        let stream_schema = read_plan.projected_schema;
-        let replace_schema = stream_schema != prepared.output_schema;
-
-        // Rebase column indices to match the narrowed stream schema.
-        // The projection expressions have indices based on physical_file_schema,
-        // but the stream only contains the columns selected by the ProjectionMask.
-        let projection = prepared
-            .projection
-            .try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?;
-        let projector = projection.make_projector(&stream_schema)?;
-        let output_schema = Arc::clone(&prepared.output_schema);
-        let files_ranges_pruned_statistics =
-            prepared.file_metrics.files_ranges_pruned_statistics.clone();
-        let stream = futures::stream::unfold(
-            PushDecoderStreamState {
-                decoder,
-                reader: prepared.async_file_reader,
-                projector,
-                output_schema,
-                replace_schema,
-                arrow_reader_metrics,
-                predicate_cache_inner_records,
-                predicate_cache_records,
-                baseline_metrics: prepared.baseline_metrics,
-            },
-            |state| async move { state.transition().await },
-        )
-        .fuse();
-
-        // Wrap the stream so a dynamic filter can stop the file scan early.
-        if let Some(file_pruner) = prepared.file_pruner {
-            let stream = stream.boxed();
-            Ok(EarlyStoppingStream::new(
-                stream,
-                file_pruner,
-                files_ranges_pruned_statistics,
+            let decoder = decoder_builder.build()?;
+
+            let reader = match initial_reader.take() {
+                Some(r) => r,
+                None => prepared.parquet_file_reader_factory.create_reader(
+                    prepared.partition_index,
+                    prepared.partitioned_file.clone(),
+                    prepared.metadata_size_hint,
+                    &prepared.metrics,
+                )?,
+            };
+
+            // Rebase column indices to match the narrowed stream schema.
+            // The projection expressions have indices based on physical_file_schema,
+            // but the stream only contains the columns selected by the ProjectionMask.
+            let stream_schema = read_plan.projected_schema;
+            let replace_schema = stream_schema != prepared.output_schema;
+            let projection = prepared
+                .projection
+                .clone()
+                .try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?;
+            let projector = projection.make_projector(&stream_schema)?;
+
+            let predicate_cache_inner_records =
+                prepared.file_metrics.predicate_cache_inner_records.clone();
+            let predicate_cache_records =
+                prepared.file_metrics.predicate_cache_records.clone();
+
+            let stream = futures::stream::unfold(
+                PushDecoderStreamState {
+                    decoder,
+                    reader,
+                    projector,
+                    output_schema: Arc::clone(&prepared.output_schema),
+                    replace_schema,
+                    arrow_reader_metrics,
+                    predicate_cache_inner_records,
+                    predicate_cache_records,
+                    baseline_metrics: prepared.baseline_metrics.clone(),
+                },
+                |state| async move { state.transition().await },
             )
-            .boxed())
-        } else {
-            Ok(stream.boxed())
+            .fuse();
+
+            // `FilePruner` is not `Clone` and holds stateful predicate-generation
+            // counters, so it can only wrap a single stream. Attach it to the
+            // first chunk so the whole file scan can still early-stop when a
+            // dynamic filter narrows.
+            let boxed: BoxStream<'static, Result<RecordBatch>> = if chunk_idx == 0
+                && let Some(pruner) = file_pruner.take()
+            {
+                EarlyStoppingStream::new(
+                    stream.boxed(),
+                    pruner,
+                    prepared.file_metrics.files_ranges_pruned_statistics.clone(),
+                )
+                .boxed()
+            } else {
+                stream.boxed()
+            };
+
+            streams.push(boxed);
         }
+
+        Ok(streams)
     }
 }
 
@@ -1629,8 +1717,7 @@ mod test {
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
     use bytes::{BufMut, BytesMut};
     use datafusion_common::{
-        ColumnStatistics, ScalarValue, Statistics, internal_err, record_batch,
-        stats::Precision,
+        ColumnStatistics, ScalarValue, Statistics, record_batch, stats::Precision,
     };
     use datafusion_datasource::morsel::{Morsel, Morselizer};
     use datafusion_datasource::{PartitionedFile, TableSchema};
@@ -1676,6 +1763,8 @@ mod test {
         max_predicate_cache_size: Option<usize>,
         reverse_row_groups: bool,
         preserve_order: bool,
+        morsel_max_rows: u64,
+        morsel_max_compressed_bytes: u64,
     }
 
     impl ParquetMorselizerBuilder {
@@ -1702,6 +1791,8 @@ mod test {
                 max_predicate_cache_size: None,
                 reverse_row_groups: false,
                 preserve_order: false,
+                morsel_max_rows: DEFAULT_MORSEL_MAX_ROWS,
+                morsel_max_compressed_bytes: DEFAULT_MORSEL_MAX_COMPRESSED_BYTES,
             }
         }
 
@@ -1765,6 +1856,19 @@ mod test {
             self
         }
 
+        /// Override the per-morsel row budget.
+        fn with_morsel_max_rows(mut self, limit: u64) -> Self {
+            self.morsel_max_rows = limit;
+            self
+        }
+
+        /// Override the per-morsel compressed byte budget.
+        #[expect(dead_code)]
+        fn with_morsel_max_compressed_bytes(mut self, limit: u64) -> Self {
+            self.morsel_max_compressed_bytes = limit;
+            self
+        }
+
         /// Build the ParquetMorselizer instance.
         ///
         /// # Panics
@@ -1816,6 +1920,8 @@ mod test {
                 encryption_factory: None,
                 max_predicate_cache_size: self.max_predicate_cache_size,
                 reverse_row_groups: self.reverse_row_groups,
+                morsel_max_rows: self.morsel_max_rows,
+                morsel_max_compressed_bytes: self.morsel_max_compressed_bytes,
             }
         }
     }
@@ -1830,32 +1936,49 @@ mod test {
         morselizer: &ParquetMorselizer,
         file: PartitionedFile,
     ) -> Result<BoxStream<'static, Result<RecordBatch>>> {
-        let mut planners = VecDeque::from([morselizer.plan_file(file)?]);
-        let mut morsels: VecDeque<Box<dyn Morsel>> = VecDeque::new();
+        let morsels = collect_all_morsels(morselizer, file).await?;
+        if let Some(first) = morsels.into_iter().next() {
+            Ok(Box::pin(first.into_stream()))
+        } else {
+            Ok(Box::pin(futures::stream::empty()))
+        }
+    }
 
-        loop {
-            if let Some(morsel) = morsels.pop_front() {
-                return Ok(Box::pin(morsel.into_stream()));
-            }
+    /// Drives the morselizer to completion and returns every morsel it
+    /// produced, in order. Useful for asserting how a file is split into
+    /// row-group morsels.
+    async fn collect_all_morsels(
+        morselizer: &ParquetMorselizer,
+        file: PartitionedFile,
+    ) -> Result<Vec<Box<dyn Morsel>>> {
+        let mut planners = VecDeque::from([morselizer.plan_file(file)?]);
+        let mut morsels: Vec<Box<dyn Morsel>> = Vec::new();
 
-            let Some(planner) = planners.pop_front() else {
-                return Ok(Box::pin(futures::stream::empty()));
+        while let Some(planner) = planners.pop_front() {
+            let Some(mut plan) = planner.plan()? else {
+                continue;
             };
+            morsels.extend(plan.take_morsels());
+            planners.extend(plan.take_ready_planners());
 
-            if let Some(mut plan) = planner.plan()? {
-                morsels.extend(plan.take_morsels());
-                planners.extend(plan.take_ready_planners());
+            if let Some(pending_planner) = plan.take_pending_planner() {
+                planners.push_front(pending_planner.await?);
+            }
+        }
 
-                if let Some(pending_planner) = plan.take_pending_planner() {
-                    planners.push_front(pending_planner.await?);
-                    continue;
-                }
+        Ok(morsels)
+    }
 
-                if morsels.is_empty() && planners.is_empty() {
-                    return internal_err!("planner returned an empty morsel plan");
-                }
-            }
+    /// Concatenate all batches produced by `streams`, returning the int32
+    /// values from the first column of each batch.
+    async fn collect_int32_values_across(
+        streams: Vec<BoxStream<'static, Result<RecordBatch>>>,
+    ) -> Vec<i32> {
+        let mut values = vec![];
+        for stream in streams {
+            values.extend(collect_int32_values(stream).await);
         }
+        values
     }
 
     fn constant_int_stats() -> (Statistics, SchemaRef) {
@@ -2651,6 +2774,216 @@ mod test {
         );
     }
 
+    /// A multi-row-group file whose pruned access plan exceeds the per-morsel
+    /// row budget produces multiple morsels, and their concatenated output
+    /// matches the single-morsel reference.
+    #[tokio::test]
+    async fn test_row_group_split_produces_multiple_morsels() {
+        use parquet::file::properties::WriterProperties;
+
+        let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+
+        // Three row groups of 3 rows each. Packing stops at 3 rows/morsel, so
+        // we expect three morsels.
+        let batch1 =
+            record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap();
+        let batch2 =
+            record_batch!(("a", Int32, vec![Some(4), Some(5), Some(6)])).unwrap();
+        let batch3 =
+            record_batch!(("a", Int32, vec![Some(7), Some(8), Some(9)])).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_max_row_group_row_count(Some(3))
+            .build();
+        let data_len = write_parquet_batches(
+            Arc::clone(&store),
+            "test.parquet",
+            vec![batch1.clone(), batch2, batch3],
+            Some(props),
+        )
+        .await;
+        let schema = batch1.schema();
+        let file = PartitionedFile::new(
+            "test.parquet".to_string(),
+            u64::try_from(data_len).unwrap(),
+        );
+
+        let morselizer = ParquetMorselizerBuilder::new()
+            .with_store(Arc::clone(&store))
+            .with_schema(Arc::clone(&schema))
+            .with_projection_indices(&[0])
+            .with_morsel_max_rows(3)
+            .build();
+        let morsels = collect_all_morsels(&morselizer, file.clone())
+            .await
+            .unwrap();
+        assert_eq!(morsels.len(), 3, "one morsel per row group");
+
+        let streams = morsels
+            .into_iter()
+            .map(|m| Box::pin(m.into_stream()) as BoxStream<_>)
+            .collect();
+        let values = collect_int32_values_across(streams).await;
+        assert_eq!(values, vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
+
+        // Reference: default budget keeps everything in one morsel.
+        let reference_morselizer = ParquetMorselizerBuilder::new()
+            .with_store(Arc::clone(&store))
+            .with_schema(schema)
+            .with_projection_indices(&[0])
+            .build();
+        let reference_stream = open_file(&reference_morselizer, file).await.unwrap();
+        assert_eq!(
+            collect_int32_values(reference_stream).await,
+            vec![1, 2, 3, 4, 5, 6, 7, 8, 9]
+        );
+    }
+
+    /// When adjacent row groups fit inside the morsel budget they should be
+    /// packed together rather than emitted one-per-morsel.
+    #[tokio::test]
+    async fn test_row_group_split_packs_within_budget() {
+        use parquet::file::properties::WriterProperties;
+
+        let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+
+        let batch1 =
+            record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap();
+        let batch2 =
+            record_batch!(("a", Int32, vec![Some(4), Some(5), Some(6)])).unwrap();
+        let batch3 =
+            record_batch!(("a", Int32, vec![Some(7), Some(8), Some(9)])).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_max_row_group_row_count(Some(3))
+            .build();
+        let data_len = write_parquet_batches(
+            Arc::clone(&store),
+            "test.parquet",
+            vec![batch1.clone(), batch2, batch3],
+            Some(props),
+        )
+        .await;
+        let schema = batch1.schema();
+        let file = PartitionedFile::new(
+            "test.parquet".to_string(),
+            u64::try_from(data_len).unwrap(),
+        );
+
+        // Budget fits exactly 2 row groups; expect two morsels: [0+1], [2].
+        let morselizer = ParquetMorselizerBuilder::new()
+            .with_store(store)
+            .with_schema(schema)
+            .with_projection_indices(&[0])
+            .with_morsel_max_rows(6)
+            .build();
+        let morsels = collect_all_morsels(&morselizer, file).await.unwrap();
+        assert_eq!(morsels.len(), 2);
+    }
+
+    /// A user-supplied access plan with a `Skip` entry between scanned row
+    /// groups should preserve the skip across chunking.
+    #[tokio::test]
+    async fn test_row_group_split_honors_user_skip() {
+        use crate::ParquetAccessPlan;
+        use parquet::file::properties::WriterProperties;
+
+        let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+
+        let batch1 =
+            record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap();
+        let batch2 =
+            record_batch!(("a", Int32, vec![Some(4), Some(5), Some(6)])).unwrap();
+        let batch3 =
+            record_batch!(("a", Int32, vec![Some(7), Some(8), Some(9)])).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_max_row_group_row_count(Some(3))
+            .build();
+        let data_len = write_parquet_batches(
+            Arc::clone(&store),
+            "test.parquet",
+            vec![batch1.clone(), batch2, batch3],
+            Some(props),
+        )
+        .await;
+        let schema = batch1.schema();
+
+        let mut access_plan = ParquetAccessPlan::new_all(3);
+        access_plan.skip(1);
+
+        let file = PartitionedFile::new(
+            "test.parquet".to_string(),
+            u64::try_from(data_len).unwrap(),
+        )
+        .with_extensions(Arc::new(access_plan));
+
+        let morselizer = ParquetMorselizerBuilder::new()
+            .with_store(store)
+            .with_schema(schema)
+            .with_projection_indices(&[0])
+            .with_morsel_max_rows(3)
+            .build();
+        let morsels = collect_all_morsels(&morselizer, file).await.unwrap();
+        let streams = morsels
+            .into_iter()
+            .map(|m| Box::pin(m.into_stream()) as BoxStream<_>)
+            .collect();
+        let values = collect_int32_values_across(streams).await;
+        assert_eq!(values, vec![1, 2, 3, 7, 8, 9], "row group 1 is skipped");
+    }
+
+    /// When `reverse_row_groups` is set the per-morsel split should preserve
+    /// the reverse output order: the first morsel emits the file's last row
+    /// group.
+    #[tokio::test]
+    async fn test_row_group_split_with_reverse() {
+        use parquet::file::properties::WriterProperties;
+
+        let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+
+        let batch1 =
+            record_batch!(("a", Int32, vec![Some(1), Some(2), Some(3)])).unwrap();
+        let batch2 =
+            record_batch!(("a", Int32, vec![Some(4), Some(5), Some(6)])).unwrap();
+        let batch3 =
+            record_batch!(("a", Int32, vec![Some(7), Some(8), Some(9)])).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_max_row_group_row_count(Some(3))
+            .build();
+        let data_len = write_parquet_batches(
+            Arc::clone(&store),
+            "test.parquet",
+            vec![batch1.clone(), batch2, batch3],
+            Some(props),
+        )
+        .await;
+        let schema = batch1.schema();
+        let file = PartitionedFile::new(
+            "test.parquet".to_string(),
+            u64::try_from(data_len).unwrap(),
+        );
+
+        let morselizer = ParquetMorselizerBuilder::new()
+            .with_store(store)
+            .with_schema(schema)
+            .with_projection_indices(&[0])
+            .with_morsel_max_rows(3)
+            .with_reverse_row_groups(true)
+            .build();
+        let morsels = collect_all_morsels(&morselizer, file).await.unwrap();
+        assert_eq!(morsels.len(), 3);
+
+        // First morsel should emit the originally-last row group.
+        let streams: Vec<_> = morsels
+            .into_iter()
+            .map(|m| Box::pin(m.into_stream()) as BoxStream<_>)
+            .collect();
+        let values = collect_int32_values_across(streams).await;
+        assert_eq!(values, vec![7, 8, 9, 4, 5, 6, 1, 2, 3]);
+    }
+
     /// Test that page pruning predicates are only built and applied when `enable_page_index` is true.
     ///
     /// The file has a single row group with 10 pages (10 rows each, values 1..100).
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index a014c8b2726e7..0d0840655bf26 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -580,6 +580,9 @@ impl FileSource for ParquetSource {
             encryption_factory: self.get_encryption_factory_with_config(),
             max_predicate_cache_size: self.max_predicate_cache_size(),
             reverse_row_groups: self.reverse_row_groups,
+            morsel_max_rows: crate::opener::DEFAULT_MORSEL_MAX_ROWS,
+            morsel_max_compressed_bytes:
+                crate::opener::DEFAULT_MORSEL_MAX_COMPRESSED_BYTES,
         }))
     }
 

From 8ef53b7ff0facea593f72deeb2f63108d7ff2858 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Tue, 21 Apr 2026 12:13:47 -0500
Subject: [PATCH 12/12] refactor: defer morsel decoder build to
 Morsel::into_stream
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous `build_stream` built every morsel's `RowFilter`,
`ParquetPushDecoder`, `AsyncFileReader`, and `Projector` eagerly in a
single loop inside the file planner — before any morsel was scheduled.
That loop ran on the scheduler thread and was visible as a 10–15%
regression vs. main on ClickBench-partitioned queries that have many
row-group morsels per file (e.g. Q15, Q16 at pushdown=off).

Replace `ParquetStreamMorsel` (which held a pre-built `BoxStream`) with
`ParquetLazyMorsel`, which holds only the per-chunk `ParquetAccessPlan`
plus an `Arc<LazyMorselShared>` of the file-level state. The decoder
and reader are constructed inside `Morsel::into_stream`, so each
morsel pays its setup cost only when the scheduler actually picks it
up, and the work is distributed across worker threads instead of
serialised on the planner.

`FilePruner` is `!Clone` and drives whole-file early-stop via
`EarlyStoppingStream`, so it still lives on chunk 0's morsel only.
The warm `async_file_reader` from metadata / page-index / bloom-filter
load is dropped at the end of `build_stream` — every morsel mints a
fresh reader via the factory at `into_stream` time. For both built-in
factories (`DefaultParquetFileReaderFactory`,
`CachedParquetFileReaderFactory`) the "warm cache" benefit of reusing
a reader is negligible because the underlying `Arc<dyn ObjectStore>` /
`Arc<dyn FileMetadataCache>` is already shared across readers, so the
simplification is free.

Local ClickBench-partitioned, 10 iterations, pushdown=off (M-series):

| Query | main  | eager (before) | lazy (this commit) |
|-------|------:|---------------:|-------------------:|
| Q14   |  325  | 335            |             313 ms |
| Q15   |  309  | 358            |             302 ms |
| Q16   |  911  | 1049           |             786 ms |
| Q24   |   48  | 55             |              56 ms |
| Q26   |   41  | 45             |              45 ms |

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 datafusion/datasource-parquet/src/opener.rs | 406 ++++++++++++--------
 1 file changed, 239 insertions(+), 167 deletions(-)

diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index 7dac3e4a1cbd7..b548f1f2bde36 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -75,7 +75,7 @@ use parquet::arrow::parquet_column;
 use parquet::arrow::push_decoder::{ParquetPushDecoder, ParquetPushDecoderBuilder};
 use parquet::basic::Type;
 use parquet::bloom_filter::Sbbf;
-use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader};
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
 
 /// Default soft upper bound on the number of rows packed into a single
 /// row-group morsel. Adjacent row groups are coalesced until this limit would
@@ -246,12 +246,15 @@ enum ParquetOpenState {
     ///
     /// TODO: split state as this currently does both I/O and CPU work.
     BuildStream(Box<RowGroupsPrunedParquetOpen>),
-    /// Terminal state: one or more per-morsel streams are ready to return.
+    /// Terminal state: one or more per-morsel lazy builders are ready to
+    /// return.
     ///
-    /// Each stream corresponds to one row-group-sized chunk of the file and
-    /// will be wrapped in a [`ParquetStreamMorsel`] so sibling
-    /// `FileStream`s can consume them independently.
-    Ready(Vec<BoxStream<'static, Result<RecordBatch>>>),
+    /// Each morsel corresponds to one row-group-sized chunk of the file.
+    /// Morsels defer row-filter compilation, decoder construction, and
+    /// reader acquisition until [`Morsel::into_stream`] is actually
+    /// invoked — so construction work for a morsel only happens when the
+    /// scheduler picks it up.
+    Ready(Vec<Box<dyn Morsel>>),
     /// Terminal state: reading complete
     Done,
 }
@@ -431,27 +434,200 @@ impl ParquetOpenState {
     }
 }
 
-/// Implements the Morsel API
-struct ParquetStreamMorsel {
-    stream: BoxStream<'static, Result<RecordBatch>>,
+/// File-level state shared across every lazy morsel from a single file open.
+///
+/// Each [`ParquetLazyMorsel`] holds an `Arc` to one of these so the
+/// expensive-to-clone pieces (metadata, schemas, metrics, Arc predicates)
+/// are not duplicated. The only non-shareable resource is the
+/// [`FilePruner`], which is held on chunk 0's morsel because it's
+/// `!Clone`.
+struct LazyMorselShared {
+    partition_index: usize,
+    partitioned_file: PartitionedFile,
+    metadata_size_hint: Option<usize>,
+    metrics: ExecutionPlanMetricsSet,
+    file_metrics: ParquetFileMetrics,
+    baseline_metrics: BaselineMetrics,
+    parquet_file_reader_factory: Arc<dyn ParquetFileReaderFactory>,
+    batch_size: usize,
+    physical_file_schema: SchemaRef,
+    output_schema: SchemaRef,
+    projection: ProjectionExprs,
+    predicate: Option<Arc<dyn PhysicalExpr>>,
+    pushdown_filters: bool,
+    force_filter_selections: bool,
+    reorder_predicates: bool,
+    limit: Option<usize>,
+    max_predicate_cache_size: Option<usize>,
+    reverse_row_groups: bool,
+    reader_metadata: ArrowReaderMetadata,
+    file_metadata: Arc<ParquetMetaData>,
 }
 
-impl ParquetStreamMorsel {
-    fn new(stream: BoxStream<'static, Result<RecordBatch>>) -> Self {
-        Self { stream }
-    }
+/// Lazy per-morsel builder.
+///
+/// Holds everything needed to construct the parquet decoder stream for a
+/// single chunk of row groups, but defers the actual construction —
+/// `build_row_filter`, decoder build, reader acquisition — to
+/// [`Morsel::into_stream`]. This means a file's morsel construction cost
+/// is paid only as each morsel is scheduled, not all-at-once at
+/// `build_stream` time.
+struct ParquetLazyMorsel {
+    shared: Arc<LazyMorselShared>,
+    chunk_plan: ParquetAccessPlan,
+    chunk_idx: usize,
+    /// The file-level [`FilePruner`] used for dynamic-filter early-stop.
+    /// `FilePruner` is not `Clone` and holds stateful predicate-generation
+    /// counters, so it's attached only to chunk 0's stream.
+    file_pruner: Option<FilePruner>,
 }
 
-impl fmt::Debug for ParquetStreamMorsel {
+impl fmt::Debug for ParquetLazyMorsel {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ParquetStreamMorsel")
+        f.debug_struct("ParquetLazyMorsel")
+            .field("chunk_idx", &self.chunk_idx)
             .finish_non_exhaustive()
     }
 }
 
-impl Morsel for ParquetStreamMorsel {
+impl Morsel for ParquetLazyMorsel {
     fn into_stream(self: Box<Self>) -> BoxStream<'static, Result<RecordBatch>> {
-        self.stream
+        match (*self).build_stream_now() {
+            Ok(stream) => stream,
+            Err(e) => futures::stream::once(async move { Err(e) }).boxed(),
+        }
+    }
+}
+
+impl ParquetLazyMorsel {
+    fn build_stream_now(self) -> Result<BoxStream<'static, Result<RecordBatch>>> {
+        let ParquetLazyMorsel {
+            shared,
+            chunk_plan,
+            chunk_idx,
+            file_pruner,
+        } = self;
+
+        let rg_metadata = shared.file_metadata.row_groups();
+        let mut prepared_plan = chunk_plan.prepare(rg_metadata)?;
+        if shared.reverse_row_groups {
+            prepared_plan = prepared_plan.reverse(shared.file_metadata.as_ref())?;
+        }
+
+        // `RowFilter` is not `Clone` because it owns `Box<dyn ArrowPredicate>`s,
+        // so a fresh filter has to be built per chunk.
+        let row_filter = if let Some(predicate) = shared
+            .pushdown_filters
+            .then_some(shared.predicate.clone())
+            .flatten()
+        {
+            match row_filter::build_row_filter(
+                &predicate,
+                &shared.physical_file_schema,
+                shared.file_metadata.as_ref(),
+                shared.reorder_predicates,
+                &shared.file_metrics,
+            ) {
+                Ok(Some(filter)) => Some(filter),
+                Ok(None) => None,
+                Err(e) => {
+                    debug!("Ignoring error building row filter for '{predicate:?}': {e}");
+                    None
+                }
+            }
+        } else {
+            None
+        };
+
+        let arrow_reader_metrics = ArrowReaderMetrics::enabled();
+        let read_plan = build_projection_read_plan(
+            shared.projection.expr_iter(),
+            &shared.physical_file_schema,
+            shared.reader_metadata.parquet_schema(),
+        );
+
+        let mut decoder_builder =
+            ParquetPushDecoderBuilder::new_with_metadata(shared.reader_metadata.clone())
+                .with_projection(read_plan.projection_mask)
+                .with_batch_size(shared.batch_size)
+                .with_metrics(arrow_reader_metrics.clone());
+
+        if let Some(row_filter) = row_filter {
+            decoder_builder = decoder_builder.with_row_filter(row_filter);
+        }
+        if shared.force_filter_selections {
+            decoder_builder =
+                decoder_builder.with_row_selection_policy(RowSelectionPolicy::Selectors);
+        }
+        if let Some(row_selection) = prepared_plan.row_selection {
+            decoder_builder = decoder_builder.with_row_selection(row_selection);
+        }
+        decoder_builder =
+            decoder_builder.with_row_groups(prepared_plan.row_group_indexes);
+        // `ScanState.remain` enforces the true outer limit across all
+        // morsels; passing the per-chunk limit here is a conservative
+        // per-chunk cap that bounds wasted decode once the outer cap is hit.
+        if let Some(limit) = shared.limit {
+            decoder_builder = decoder_builder.with_limit(limit);
+        }
+        if let Some(max_predicate_cache_size) = shared.max_predicate_cache_size {
+            decoder_builder =
+                decoder_builder.with_max_predicate_cache_size(max_predicate_cache_size);
+        }
+
+        let decoder = decoder_builder.build()?;
+
+        let reader = shared.parquet_file_reader_factory.create_reader(
+            shared.partition_index,
+            shared.partitioned_file.clone(),
+            shared.metadata_size_hint,
+            &shared.metrics,
+        )?;
+
+        let stream_schema = read_plan.projected_schema;
+        let replace_schema = stream_schema != shared.output_schema;
+        let projection = shared
+            .projection
+            .clone()
+            .try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?;
+        let projector = projection.make_projector(&stream_schema)?;
+
+        let predicate_cache_inner_records =
+            shared.file_metrics.predicate_cache_inner_records.clone();
+        let predicate_cache_records = shared.file_metrics.predicate_cache_records.clone();
+
+        let stream = futures::stream::unfold(
+            PushDecoderStreamState {
+                decoder,
+                reader,
+                projector,
+                output_schema: Arc::clone(&shared.output_schema),
+                replace_schema,
+                arrow_reader_metrics,
+                predicate_cache_inner_records,
+                predicate_cache_records,
+                baseline_metrics: shared.baseline_metrics.clone(),
+            },
+            |state| async move { state.transition().await },
+        )
+        .fuse();
+
+        // Attach `FilePruner` only to chunk 0 so the whole file scan can
+        // still early-stop when a dynamic filter narrows.
+        let boxed: BoxStream<'static, Result<RecordBatch>> = if chunk_idx == 0
+            && let Some(pruner) = file_pruner
+        {
+            EarlyStoppingStream::new(
+                stream.boxed(),
+                pruner,
+                shared.file_metrics.files_ranges_pruned_statistics.clone(),
+            )
+            .boxed()
+        } else {
+            stream.boxed()
+        };
+
+        Ok(boxed)
     }
 }
 
@@ -539,18 +715,12 @@ impl MorselPlanner for ParquetMorselPlanner {
                     )))
                 })))
             }
-            ParquetOpenState::Ready(streams) => {
-                if streams.is_empty() {
+            ParquetOpenState::Ready(morsels) => {
+                if morsels.is_empty() {
                     // No row groups survived pruning, so there's nothing to
                     // feed the executor — terminate this file's planner.
                     return Ok(None);
                 }
-                let morsels: Vec<Box<dyn Morsel>> = streams
-                    .into_iter()
-                    .map(|stream| {
-                        Box::new(ParquetStreamMorsel::new(stream)) as Box<dyn Morsel>
-                    })
-                    .collect();
                 Ok(Some(MorselPlan::new().with_morsels(morsels)))
             }
             ParquetOpenState::Done => Ok(None),
@@ -1098,7 +1268,7 @@ impl RowGroupsPrunedParquetOpen {
     /// letting the driver interleave row-group work with other operators and
     /// unblocking the follow-on work of sharing row-group-level work across
     /// sibling `FileStream`s.
-    fn build_stream(self) -> Result<Vec<BoxStream<'static, Result<RecordBatch>>>> {
+    fn build_stream(self) -> Result<Vec<Box<dyn Morsel>>> {
         let RowGroupsPrunedParquetOpen {
             prepared,
             mut row_groups,
@@ -1158,151 +1328,53 @@ impl RowGroupsPrunedParquetOpen {
             chunk_plans.reverse();
         }
 
-        // The reader that was used for metadata / page index / bloom filter
-        // loads may have warmed object-store caches. Hand it to the first
-        // chunk so no work is wasted; mint fresh readers for the rest from
-        // the same factory.
-        let mut initial_reader: Option<Box<dyn AsyncFileReader>> =
-            Some(prepared.async_file_reader);
+        // `prepared.async_file_reader` served metadata / page-index /
+        // bloom-filter loads and is dropped here: each morsel mints its
+        // own reader via the factory at `into_stream` time. Built-in
+        // factories wrap only `Arc<dyn ObjectStore>` (HTTP/connection
+        // pool already shared) or an `Arc<dyn FileMetadataCache>`, so the
+        // "warm cache" benefit of reusing a reader is negligible.
         let mut file_pruner = prepared.file_pruner;
-        let mut streams: Vec<BoxStream<'static, Result<RecordBatch>>> =
-            Vec::with_capacity(chunk_plans.len());
-
-        for (chunk_idx, chunk_plan) in chunk_plans.into_iter().enumerate() {
-            let mut prepared_plan = chunk_plan.prepare(rg_metadata)?;
-            if prepared.reverse_row_groups {
-                prepared_plan = prepared_plan.reverse(file_metadata.as_ref())?;
-            }
 
-            // `RowFilter` is not `Clone` because it owns `Box<dyn ArrowPredicate>`s,
-            // so a fresh filter has to be built per chunk.
-            let row_filter = if let Some(predicate) = prepared
-                .pushdown_filters
-                .then_some(prepared.predicate.clone())
-                .flatten()
-            {
-                match row_filter::build_row_filter(
-                    &predicate,
-                    &prepared.physical_file_schema,
-                    file_metadata.as_ref(),
-                    prepared.reorder_predicates,
-                    &prepared.file_metrics,
-                ) {
-                    Ok(Some(filter)) => Some(filter),
-                    Ok(None) => None,
-                    Err(e) => {
-                        debug!(
-                            "Ignoring error building row filter for '{predicate:?}': {e}"
-                        );
-                        None
-                    }
-                }
-            } else {
-                None
-            };
-
-            let arrow_reader_metrics = ArrowReaderMetrics::enabled();
-            let read_plan = build_projection_read_plan(
-                prepared.projection.expr_iter(),
-                &prepared.physical_file_schema,
-                reader_metadata.parquet_schema(),
-            );
-
-            let mut decoder_builder =
-                ParquetPushDecoderBuilder::new_with_metadata(reader_metadata.clone())
-                    .with_projection(read_plan.projection_mask)
-                    .with_batch_size(prepared.batch_size)
-                    .with_metrics(arrow_reader_metrics.clone());
-
-            if let Some(row_filter) = row_filter {
-                decoder_builder = decoder_builder.with_row_filter(row_filter);
-            }
-            if prepared.force_filter_selections {
-                decoder_builder = decoder_builder
-                    .with_row_selection_policy(RowSelectionPolicy::Selectors);
-            }
-            if let Some(row_selection) = prepared_plan.row_selection {
-                decoder_builder = decoder_builder.with_row_selection(row_selection);
-            }
-            decoder_builder =
-                decoder_builder.with_row_groups(prepared_plan.row_group_indexes);
-            // `ScanState.remain` enforces the true outer limit across all
-            // morsels; passing the per-chunk limit here is a conservative
-            // per-chunk cap that bounds wasted decode once the outer cap is
-            // hit.
-            if let Some(limit) = prepared.limit {
-                decoder_builder = decoder_builder.with_limit(limit);
-            }
-            if let Some(max_predicate_cache_size) = prepared.max_predicate_cache_size {
-                decoder_builder = decoder_builder
-                    .with_max_predicate_cache_size(max_predicate_cache_size);
-            }
-
-            let decoder = decoder_builder.build()?;
-
-            let reader = match initial_reader.take() {
-                Some(r) => r,
-                None => prepared.parquet_file_reader_factory.create_reader(
-                    prepared.partition_index,
-                    prepared.partitioned_file.clone(),
-                    prepared.metadata_size_hint,
-                    &prepared.metrics,
-                )?,
-            };
-
-            // Rebase column indices to match the narrowed stream schema.
-            // The projection expressions have indices based on physical_file_schema,
-            // but the stream only contains the columns selected by the ProjectionMask.
-            let stream_schema = read_plan.projected_schema;
-            let replace_schema = stream_schema != prepared.output_schema;
-            let projection = prepared
-                .projection
-                .clone()
-                .try_map_exprs(|expr| reassign_expr_columns(expr, &stream_schema))?;
-            let projector = projection.make_projector(&stream_schema)?;
-
-            let predicate_cache_inner_records =
-                prepared.file_metrics.predicate_cache_inner_records.clone();
-            let predicate_cache_records =
-                prepared.file_metrics.predicate_cache_records.clone();
-
-            let stream = futures::stream::unfold(
-                PushDecoderStreamState {
-                    decoder,
-                    reader,
-                    projector,
-                    output_schema: Arc::clone(&prepared.output_schema),
-                    replace_schema,
-                    arrow_reader_metrics,
-                    predicate_cache_inner_records,
-                    predicate_cache_records,
-                    baseline_metrics: prepared.baseline_metrics.clone(),
-                },
-                |state| async move { state.transition().await },
-            )
-            .fuse();
-
-            // `FilePruner` is not `Clone` and holds stateful predicate-generation
-            // counters, so it can only wrap a single stream. Attach it to the
-            // first chunk so the whole file scan can still early-stop when a
-            // dynamic filter narrows.
-            let boxed: BoxStream<'static, Result<RecordBatch>> = if chunk_idx == 0
-                && let Some(pruner) = file_pruner.take()
-            {
-                EarlyStoppingStream::new(
-                    stream.boxed(),
-                    pruner,
-                    prepared.file_metrics.files_ranges_pruned_statistics.clone(),
-                )
-                .boxed()
-            } else {
-                stream.boxed()
-            };
+        let shared = Arc::new(LazyMorselShared {
+            partition_index: prepared.partition_index,
+            partitioned_file: prepared.partitioned_file,
+            metadata_size_hint: prepared.metadata_size_hint,
+            metrics: prepared.metrics,
+            file_metrics: prepared.file_metrics,
+            baseline_metrics: prepared.baseline_metrics,
+            parquet_file_reader_factory: prepared.parquet_file_reader_factory,
+            batch_size: prepared.batch_size,
+            physical_file_schema: prepared.physical_file_schema,
+            output_schema: prepared.output_schema,
+            projection: prepared.projection,
+            predicate: prepared.predicate,
+            pushdown_filters: prepared.pushdown_filters,
+            force_filter_selections: prepared.force_filter_selections,
+            reorder_predicates: prepared.reorder_predicates,
+            limit: prepared.limit,
+            max_predicate_cache_size: prepared.max_predicate_cache_size,
+            reverse_row_groups: prepared.reverse_row_groups,
+            reader_metadata,
+            file_metadata,
+        });
 
-            streams.push(boxed);
-        }
+        // `FilePruner` is `!Clone`, so `take` hands it to the first morsel
+        // and leaves `None` for the rest.
+        let morsels: Vec<Box<dyn Morsel>> = chunk_plans
+            .into_iter()
+            .enumerate()
+            .map(|(chunk_idx, chunk_plan)| {
+                Box::new(ParquetLazyMorsel {
+                    shared: Arc::clone(&shared),
+                    chunk_plan,
+                    chunk_idx,
+                    file_pruner: file_pruner.take(),
+                }) as Box<dyn Morsel>
+            })
+            .collect();
 
-        Ok(streams)
+        Ok(morsels)
     }
 }