From db066de5d8f55167d2319075fb1fa321bc67c35b Mon Sep 17 00:00:00 2001 From: Mathieu Bressolle-Chataigner Date: Mon, 22 Dec 2025 16:31:25 +0100 Subject: [PATCH 1/2] Add custom MultiFileReader to avoid HEAD requests when scanning delete files **Context**: We experience slow read performance when a table has many delete files. **TL;DR**: We can leverage the metadata already available in DuckLake to improve load time of delete files. **Problem & Motivation:** DuckLake stores `file_size` metadata for both data and delete files. For data files, there is already a mechanism to forward this metadata to the MultiFileReader and the underlying filesystem. The Parquet reader requires this `file_size` to access the footer metadata. When using an `HTTPFileSystem` instance (e.g., for S3, Azure), it performs a HEAD request on the file if metadata fields (`file_size`, `etag`, `last_modified`) are not present. Since all files in DuckLake are immutable, we can apply the same optimization logic for delete files to avoid these unnecessary HEAD requests. **Solution:** Implements a custom multi-file reading solution that pre-populates file metadata to eliminate redundant storage HEAD requests when scanning delete files: **Key Changes:** 1. **New `DeleteFileFunctionInfo` struct**: Extends `TableFunctionInfo` to carry `DuckLakeFileData` metadata through the table function binding process. 2. **Custom `DeleteFileMultiFileReader` class**: - Extends DuckDB's `MultiFileReader` to intercept file list creation - Pre-populates `ExtendedOpenFileInfo` with metadata already available from DuckLake: - File size (`file_size_bytes`) - ETag (empty string as placeholder) - Last modified timestamp (set to epoch) - Encryption key (if present) - Creates a `SimpleMultiFileList` with this extended info upfront - Overrides `CreateFileList()` to return the pre-built list, bypassing DuckDB's default file discovery 3. **Modified `ScanDeleteFile()` method**: - Changed `parquet_scan` from const reference to mutable copy to allow modification - Attaches `DeleteFileFunctionInfo` and custom reader factory to the table function - Passes the actual `parquet_scan` function to `TableFunctionBindInput` instead of a dummy function, ensuring proper function context **Performance Impact**: Eliminates HEAD requests to object storage when opening Parquet delete files. This is particularly beneficial when working with remote storage (S3, Azure, etc.) and tables with many delete files, where HEAD requests were causing significant performance bottlenecks. --- src/storage/ducklake_delete_filter.cpp | 57 +++++++++++++++++++++++--- test/sql/delete/delete_metadata.test | 50 ++++++++++++++++++++++ 2 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 test/sql/delete/delete_metadata.test diff --git a/src/storage/ducklake_delete_filter.cpp b/src/storage/ducklake_delete_filter.cpp index 41f43b8ae7d..c60545bcbe6 100644 --- a/src/storage/ducklake_delete_filter.cpp +++ b/src/storage/ducklake_delete_filter.cpp @@ -1,11 +1,53 @@ #include "storage/ducklake_delete_filter.hpp" #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" +#include "duckdb/common/multi_file/multi_file_list.hpp" +#include "duckdb/common/multi_file/multi_file_reader.hpp" +#include "duckdb/common/multi_file/multi_file_states.hpp" #include "duckdb/parser/tableref/table_function_ref.hpp" #include "duckdb/parallel/thread_context.hpp" #include "duckdb/main/database.hpp" namespace duckdb { +//! FunctionInfo to pass delete file metadata to the MultiFileReader +struct DeleteFileFunctionInfo : public TableFunctionInfo { + DuckLakeFileData file_data; +}; + +//! Custom MultiFileReader that creates a SimpleMultiFileList with extended info +struct DeleteFileMultiFileReader : public MultiFileReader { + static unique_ptr CreateInstance(const TableFunction &table_function) { + return make_uniq(table_function); + } + + explicit DeleteFileMultiFileReader(const TableFunction &table_function) { + auto &info = table_function.function_info->Cast(); + auto &delete_file = info.file_data; + + OpenFileInfo file_info(delete_file.path); + auto extended_info = make_shared_ptr(); + extended_info->options["file_size"] = Value::UBIGINT(delete_file.file_size_bytes); + extended_info->options["etag"] = Value(""); + extended_info->options["last_modified"] = Value::TIMESTAMP(timestamp_t(0)); + if (!delete_file.encryption_key.empty()) { + extended_info->options["encryption_key"] = Value::BLOB_RAW(delete_file.encryption_key); + } + file_info.extended_info = std::move(extended_info); + + vector files; + files.push_back(std::move(file_info)); + file_list = make_shared_ptr(std::move(files)); + } + + shared_ptr CreateFileList(ClientContext &context, const vector &paths, + const FileGlobInput &options) override { + return file_list; + } + +private: + shared_ptr file_list; +}; + DuckLakeDeleteFilter::DuckLakeDeleteFilter() : delete_data(make_shared_ptr()) { } @@ -52,7 +94,14 @@ vector DuckLakeDeleteFilter::ScanDeleteFile(ClientContext &context, const auto &instance = DatabaseInstance::GetDatabase(context); ExtensionLoader loader(instance, "ducklake"); auto &parquet_scan_entry = loader.GetTableFunction("parquet_scan"); - auto &parquet_scan = parquet_scan_entry.functions.functions[0]; + auto parquet_scan = parquet_scan_entry.functions.functions[0]; + + // Set up function_info with delete file metadata and custom MultiFileReader + // This allows the bind to use our file list with extended info (file_size, etag, last_modified) + auto function_info = make_shared_ptr(); + function_info->file_data = delete_file; + parquet_scan.function_info = std::move(function_info); + parquet_scan.get_multi_file_reader = DeleteFileMultiFileReader::CreateInstance; // Prepare the inputs for the bind vector children; @@ -68,10 +117,8 @@ vector DuckLakeDeleteFilter::ScanDeleteFile(ClientContext &context, const } TableFunctionRef empty; - TableFunction dummy_table_function; - dummy_table_function.name = "DuckLakeDeleteScan"; - TableFunctionBindInput bind_input(children, named_params, input_types, input_names, nullptr, nullptr, - dummy_table_function, empty); + TableFunctionBindInput bind_input(children, named_params, input_types, input_names, nullptr, nullptr, parquet_scan, + empty); vector return_types; vector return_names; diff --git a/test/sql/delete/delete_metadata.test b/test/sql/delete/delete_metadata.test new file mode 100644 index 00000000000..75cd78d0c7a --- /dev/null +++ b/test/sql/delete/delete_metadata.test @@ -0,0 +1,50 @@ +# name: test/sql/delete/delete_metadata.test +# description: Test ducklake deletes +# group: [delete] + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +require ducklake + +require parquet + +statement ok +SET autoinstall_known_extensions=1; + +statement ok +SET autoload_known_extensions=1; + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH 's3://mybucket') + +# Clean up any existing table from previous runs +statement ok +DROP TABLE IF EXISTS ducklake.test; + +statement ok +CREATE TABLE ducklake.test AS SELECT i id FROM range(1000) t(i); + +statement ok +INSERT INTO ducklake.test SELECT i id FROM range(15000, 16000) t(i) + +statement ok +BEGIN + +query I +DELETE FROM ducklake.test WHERE id%2=0 +---- +1000 + +statement ok +COMMIT + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test +---- +analyzed_plan :.*#HEAD: 0.* + +# we can time travel to see the state of the table before deletes +query II +EXPLAIN ANALYZE SELECT COUNT(*) FILTER(WHERE id%2=0) FROM ducklake.test AT (VERSION => 2) +---- +analyzed_plan :.*#HEAD: 0.* From 6902d7474d1b1c3e792cced39db1e37e64b4b9e2 Mon Sep 17 00:00:00 2001 From: Sylvain Utard Date: Tue, 6 Jan 2026 17:52:32 +0100 Subject: [PATCH 2/2] Ensure this test only runs on MinIO --- .github/workflows/MinIO.yml | 1 + test/sql/delete/delete_metadata.test | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/MinIO.yml b/.github/workflows/MinIO.yml index 83405573419..97b739748b6 100644 --- a/.github/workflows/MinIO.yml +++ b/.github/workflows/MinIO.yml @@ -21,6 +21,7 @@ jobs: VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake PIP_BREAK_SYSTEM_PACKAGES: 1 BUILD_EXTENSION_TEST_DEPS: full + S3_TEST_SERVER_AVAILABLE: 1 steps: - name: Install required ubuntu packages diff --git a/test/sql/delete/delete_metadata.test b/test/sql/delete/delete_metadata.test index 75cd78d0c7a..d52f83ad99f 100644 --- a/test/sql/delete/delete_metadata.test +++ b/test/sql/delete/delete_metadata.test @@ -2,6 +2,10 @@ # description: Test ducklake deletes # group: [delete] +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db require ducklake