diff --git a/src/ducklake_extension.cpp b/src/ducklake_extension.cpp index 106425062e5..da5c335eeb1 100644 --- a/src/ducklake_extension.cpp +++ b/src/ducklake_extension.cpp @@ -46,6 +46,9 @@ static void LoadInternal(ExtensionLoader &loader) { DuckLakeCleanupOrphanedFilesFunction cleanup_orphaned_files; loader.RegisterFunction(cleanup_orphaned_files); + DuckLakeStaticBackupFunction backup; + loader.RegisterFunction(backup); + DuckLakeExpireSnapshotsFunction expire_snapshots; loader.RegisterFunction(expire_snapshots); diff --git a/src/functions/CMakeLists.txt b/src/functions/CMakeLists.txt index 91af23d19f2..b9f7b845b04 100644 --- a/src/functions/CMakeLists.txt +++ b/src/functions/CMakeLists.txt @@ -15,6 +15,7 @@ add_library( ducklake_options.cpp ducklake_table_changes.cpp ducklake_table_info.cpp + ducklake_static_backup.cpp ducklake_table_insertions.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/functions/ducklake_static_backup.cpp b/src/functions/ducklake_static_backup.cpp new file mode 100644 index 00000000000..e719622f82e --- /dev/null +++ b/src/functions/ducklake_static_backup.cpp @@ -0,0 +1,95 @@ +#include "functions/ducklake_table_functions.hpp" +#include "duckdb/common/types/uuid.hpp" +#include "duckdb/main/attached_database.hpp" +#include "duckdb/main/database_manager.hpp" +#include "storage/ducklake_catalog.hpp" +#include "storage/ducklake_transaction.hpp" + +namespace duckdb { + +struct BackupBindData : public TableFunctionData { + + explicit BackupBindData(Catalog &catalog) : catalog(catalog) { + } + + Catalog &catalog; + string backup_location; +}; + +static unique_ptr DuckLakeStaticBackupBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto &catalog = BaseMetadataFunction::GetCatalog(context, input.inputs[0]); + auto result = make_uniq(catalog); + + auto &ducklake_catalog = reinterpret_cast(catalog); + string backup_location = ducklake_catalog.GetStaticBackup(); + + if (backup_location.empty()) { + throw InvalidInputException("static_backup not specified as attach option"); + } + + result->backup_location = backup_location; + + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("errors"); + + return std::move(result); +} + +struct DuckLakeBackupData : public GlobalTableFunctionState { + DuckLakeBackupData() : offset(0), executed(false) { + } + + idx_t offset; + bool executed; +}; + +unique_ptr DuckLakeStaticBackupInit(ClientContext &context, TableFunctionInitInput &input) { + auto result = make_uniq(); + return std::move(result); +} + +void DuckLakeStaticBackupExecute(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &data = data_p.bind_data->Cast(); + auto &state = data_p.global_state->Cast(); + + if (!state.executed) { + auto &transaction = DuckLakeTransaction::Get(context, data.catalog); + + auto tmp_uuid = "ducklake_backup_file." + UUID::ToString(UUID::GenerateRandomUUID()); + + auto &fs = FileSystem::GetFileSystem(context); + + if (fs.FileExists(tmp_uuid) || fs.FileExists(tmp_uuid + ".wal")) { + throw BinderException( + "Temporary file \"%s\" is already in use, please cleanup files in the form \"ducklake_backup_file.*\"", + tmp_uuid); + } + + auto result = transaction.Query( + string("") + "ATTACH IF NOT EXISTS '" + tmp_uuid + + "' AS {METADATA_CATALOG_NAME_IDENTIFIER_BACKUP} (STORAGE_VERSION 'v1.4.0');" + + "COPY FROM DATABASE {METADATA_CATALOG_NAME_IDENTIFIER} TO {METADATA_CATALOG_NAME_IDENTIFIER_BACKUP};" + + "DETACH {METADATA_CATALOG_NAME_IDENTIFIER_BACKUP};" + "COPY (SELECT content FROM read_blob('" + tmp_uuid + + "')) TO '" + data.backup_location + "' (FORMAT BLOB);" + "COPY (SELECT content FROM read_blob('" + + tmp_uuid + ".wal')) TO '" + data.backup_location + ".wal' (FORMAT BLOB);" + ""); + + fs.TryRemoveFile(tmp_uuid); + fs.TryRemoveFile(tmp_uuid + ".wal"); + + if (result->HasError()) { + auto &error_obj = result->GetErrorObject(); + error_obj.Throw("Failed to attach temp backup"); + } + state.executed = true; + } + idx_t count = 0; + output.SetCardinality(count); +} + +DuckLakeStaticBackupFunction::DuckLakeStaticBackupFunction() + : TableFunction("ducklake_static_backup", {LogicalType::VARCHAR}, DuckLakeStaticBackupExecute, + DuckLakeStaticBackupBind, DuckLakeStaticBackupInit) { +} + +} // namespace duckdb diff --git a/src/include/common/ducklake_options.hpp b/src/include/common/ducklake_options.hpp index 4b95adfbc88..c403baf13d8 100644 --- a/src/include/common/ducklake_options.hpp +++ b/src/include/common/ducklake_options.hpp @@ -25,6 +25,7 @@ struct DuckLakeOptions { string metadata_path; string metadata_schema; string data_path; + string static_backup; bool override_data_path = false; AccessMode access_mode = AccessMode::AUTOMATIC; DuckLakeEncryption encryption = DuckLakeEncryption::AUTOMATIC; diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp index 1be7ca23c44..3fe603dff80 100644 --- a/src/include/functions/ducklake_table_functions.hpp +++ b/src/include/functions/ducklake_table_functions.hpp @@ -119,4 +119,9 @@ class DuckLakeAddDataFilesFunction : public TableFunction { DuckLakeAddDataFilesFunction(); }; +class DuckLakeStaticBackupFunction : public TableFunction { +public: + DuckLakeStaticBackupFunction(); +}; + } // namespace duckdb diff --git a/src/include/storage/ducklake_catalog.hpp b/src/include/storage/ducklake_catalog.hpp index b4c19bba4b2..d4a4fc2daff 100644 --- a/src/include/storage/ducklake_catalog.hpp +++ b/src/include/storage/ducklake_catalog.hpp @@ -112,6 +112,10 @@ class DuckLakeCatalog : public Catalog { string GetDataPath(); + string GetStaticBackup() const { + return options.static_backup; + } + bool SupportsTimeTravel() const override { return true; } diff --git a/src/storage/ducklake_storage.cpp b/src/storage/ducklake_storage.cpp index 68678f5d705..024edcb9566 100644 --- a/src/storage/ducklake_storage.cpp +++ b/src/storage/ducklake_storage.cpp @@ -15,6 +15,8 @@ static void HandleDuckLakeOption(DuckLakeOptions &options, const string &option, options.override_data_path = value.GetValue(); } else if (lcase == "metadata_schema") { options.metadata_schema = value.ToString(); + } else if (lcase == "static_backup") { + options.static_backup = value.ToString(); } else if (lcase == "metadata_catalog") { options.metadata_database = value.ToString(); } else if (lcase == "metadata_path") { diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 485604590ef..f6e4a59efcd 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -1455,6 +1455,8 @@ void DuckLakeTransaction::DeleteInlinedData(const DuckLakeInlinedTableInfo &inli unique_ptr DuckLakeTransaction::Query(string query) { auto &connection = GetConnection(); auto catalog_identifier = DuckLakeUtil::SQLIdentifierToString(ducklake_catalog.MetadataDatabaseName()); + auto catalog_identifier_backup = + DuckLakeUtil::SQLIdentifierToString(ducklake_catalog.MetadataDatabaseName() + "_backup"); auto catalog_literal = DuckLakeUtil::SQLLiteralToString(ducklake_catalog.MetadataDatabaseName()); auto schema_identifier = DuckLakeUtil::SQLIdentifierToString(ducklake_catalog.MetadataSchemaName()); auto schema_identifier_escaped = StringUtil::Replace(schema_identifier, "'", "''"); @@ -1464,6 +1466,7 @@ unique_ptr DuckLakeTransaction::Query(string query) { query = StringUtil::Replace(query, "{METADATA_CATALOG_NAME_LITERAL}", catalog_literal); query = StringUtil::Replace(query, "{METADATA_CATALOG_NAME_IDENTIFIER}", catalog_identifier); + query = StringUtil::Replace(query, "{METADATA_CATALOG_NAME_IDENTIFIER_BACKUP}", catalog_identifier_backup); query = StringUtil::Replace(query, "{METADATA_SCHEMA_NAME_LITERAL}", schema_literal); query = StringUtil::Replace(query, "{METADATA_CATALOG}", catalog_identifier + "." + schema_identifier); query = StringUtil::Replace(query, "{METADATA_SCHEMA_ESCAPED}", schema_identifier_escaped);