From a030dbf198cba590288a2e6589aeefa7d0a2af09 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Mon, 26 May 2025 14:53:12 +0200 Subject: [PATCH 1/2] add the openfileExtended info to the httpfileHandle and the cached entries. Then it should be easier to check any extended info in the future.' --- extension/httpfs/httpfs.cpp | 9 +++++++-- extension/httpfs/include/http_metadata_cache.hpp | 1 + extension/httpfs/include/httpfs.hpp | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/extension/httpfs/httpfs.cpp b/extension/httpfs/httpfs.cpp index 34bcc5d..1001e30 100644 --- a/extension/httpfs/httpfs.cpp +++ b/extension/httpfs/httpfs.cpp @@ -279,13 +279,14 @@ void TimestampToTimeT(timestamp_t timestamp, time_t &result) { HTTPFileHandle::HTTPFileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpenFlags flags, unique_ptr params_p) : FileHandle(fs, file.path, flags), params(std::move(params_p)), http_params(params->Cast()), - flags(flags), length(0), buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) { + flags(flags), buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) { // check if the handle has extended properties that can be set directly in the handle // if we have these properties we don't need to do a head request to obtain them later if (file.extended_info) { auto &info = file.extended_info->options; auto lm_entry = info.find("last_modified"); if (lm_entry != info.end()) { + last_modified = 0; // keep clang-tidy happy TimestampToTimeT(lm_entry->second.GetValue(), last_modified); } auto etag_entry = info.find("etag"); @@ -301,6 +302,8 @@ HTTPFileHandle::HTTPFileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpe // skip head request initialized = true; } + // copy the rest of the extended info + extended_info = file.extended_info; } } unique_ptr HTTPFileSystem::CreateHandle(const OpenFileInfo &file, FileOpenFlags flags, @@ -672,6 +675,8 @@ void HTTPFileHandle::Initialize(optional_ptr opener) { last_modified = value.last_modified; length = value.length; etag = value.etag; + extended_info = value.extended_info; + if (flags.OpenForReading()) { read_buffer = duckdb::unique_ptr(new data_t[READ_BUFFER_LEN]); @@ -689,7 +694,7 @@ void HTTPFileHandle::Initialize(optional_ptr opener) { FullDownload(hfs, should_write_cache); } if (should_write_cache) { - current_cache->Insert(path, {length, last_modified, etag}); + current_cache->Insert(path, {length, last_modified, etag, extended_info}); } // Initialize the read buffer now that we know the file exists diff --git a/extension/httpfs/include/http_metadata_cache.hpp b/extension/httpfs/include/http_metadata_cache.hpp index 8fc7909..18cd01f 100644 --- a/extension/httpfs/include/http_metadata_cache.hpp +++ b/extension/httpfs/include/http_metadata_cache.hpp @@ -19,6 +19,7 @@ struct HTTPMetadataCacheEntry { idx_t length; time_t last_modified; string etag; + shared_ptr extended_info; }; // Simple cache with a max age for an entry to be valid diff --git a/extension/httpfs/include/httpfs.hpp b/extension/httpfs/include/httpfs.hpp index a6b8570..bca8961 100644 --- a/extension/httpfs/include/httpfs.hpp +++ b/extension/httpfs/include/httpfs.hpp @@ -48,6 +48,7 @@ class HTTPFileHandle : public FileHandle { idx_t length; time_t last_modified; string etag; + shared_ptr extended_info; bool initialized = false; // When using full file download, the full file will be written to a cached file handle From 779f84bd39a270be28f81268044d0b9e6b85e72e Mon Sep 17 00:00:00 2001 From: Tmonster Date: Mon, 26 May 2025 14:56:35 +0200 Subject: [PATCH 2/2] length should be 0 --- extension/httpfs/httpfs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/httpfs/httpfs.cpp b/extension/httpfs/httpfs.cpp index 1001e30..dc7f8d6 100644 --- a/extension/httpfs/httpfs.cpp +++ b/extension/httpfs/httpfs.cpp @@ -279,7 +279,7 @@ void TimestampToTimeT(timestamp_t timestamp, time_t &result) { HTTPFileHandle::HTTPFileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpenFlags flags, unique_ptr params_p) : FileHandle(fs, file.path, flags), params(std::move(params_p)), http_params(params->Cast()), - flags(flags), buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) { + flags(flags), length(0), buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) { // check if the handle has extended properties that can be set directly in the handle // if we have these properties we don't need to do a head request to obtain them later if (file.extended_info) {