diff --git a/README.md b/README.md index eb2009b2..a2d8fb35 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ For more detailed information on how to use ParquetSharp, see the following docu * [Row-oriented API](docs/RowOriented.md) — a higher level API that abstracts away the column-oriented nature of Parquet files * [Custom types](docs/TypeFactories.md) — how to customize the mapping between .NET and Parquet types, including using the `DateOnly` and `TimeOnly` types added in .NET 6. +* [Encryption](docs/Encryption.md) — using Parquet Modular Encryption to read and write encrypted data * [Writing TimeSpan data](docs/TimeSpan.md) — interoperability with other libraries when writing TimeSpan data * [Use from PowerShell](docs/PowerShell.md) diff --git a/build_windows.ps1 b/build_windows.ps1 index a853907d..ed342094 100644 --- a/build_windows.ps1 +++ b/build_windows.ps1 @@ -34,6 +34,18 @@ if ($Env:GITHUB_ACTIONS -eq "true") { $customTripletFile = "$customTripletsDir/$triplet.cmake" Copy-Item -Path $sourceTripletFile -Destination $customTripletFile Add-Content -Path $customTripletFile -Value "set(VCPKG_BUILD_TYPE release)" + + # Ensure vcpkg uses the same MSVC version to build dependencies as we use to build the ParquetSharp library. + # By default, vcpkg uses the most recent version it can find, which might not be the same as what msbuild uses. + $vsInstPath = & "${env:ProgramFiles(x86)}/Microsoft Visual Studio/Installer/vswhere.exe" -latest -property installationPath + Import-Module "$vsInstPath/Common7/Tools/Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath $vsInstPath -SkipAutomaticLocation + $clPath = Get-Command cl.exe | Select -ExpandProperty "Source" + $toolsetVersion = $clPath.Split("\")[8] + if (-not $toolsetVersion.StartsWith("14.")) { throw "Couldn't get toolset version from path '$clPath'" } + Write-Output "Using platform toolset version = $toolsetVersion" + Add-Content -Path $customTripletFile -Value "set(VCPKG_PLATFORM_TOOLSET_VERSION $toolsetVersion)" + $options += "-D" $options += "VCPKG_OVERLAY_TRIPLETS=$customTripletsDir" } diff --git a/cpp/Buffer.cpp b/cpp/Buffer.cpp index 1ab5cad9..0ef5f2c4 100644 --- a/cpp/Buffer.cpp +++ b/cpp/Buffer.cpp @@ -26,6 +26,11 @@ extern "C" TRYCATCH(*data = (*buffer)->data();) } + PARQUETSHARP_EXPORT ExceptionInfo* Buffer_MutableData(const std::shared_ptr* buffer, uint8_t** data) + { + TRYCATCH(*data = (*buffer)->mutable_data();) + } + PARQUETSHARP_EXPORT ExceptionInfo* Buffer_Size(const std::shared_ptr* buffer, int64_t* size) { TRYCATCH(*size = (*buffer)->size();) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b2155a77..fa92aa81 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -67,6 +67,12 @@ add_library(ParquetSharpNative SHARED arrow/ArrowWriterPropertiesBuilder.cpp arrow/FileReader.cpp arrow/FileWriter.cpp + encryption/CryptoFactory.cpp + encryption/DecryptionConfiguration.cpp + encryption/EncryptionConfiguration.cpp + encryption/KmsConnectionConfig.cpp + encryption/ManagedKmsClient.h + encryption/ManagedKmsClientFactory.h ) generate_export_header(ParquetSharpNative diff --git a/cpp/FileDecryptionProperties.cpp b/cpp/FileDecryptionProperties.cpp index 88c46531..fff336a7 100644 --- a/cpp/FileDecryptionProperties.cpp +++ b/cpp/FileDecryptionProperties.cpp @@ -15,7 +15,7 @@ extern "C" { TRYCATCH(*clone = new std::shared_ptr((*properties)->DeepClone());) } - + PARQUETSHARP_EXPORT void FileDecryptionProperties_Free(const std::shared_ptr* properties) { delete properties; @@ -41,12 +41,14 @@ extern "C" FreeCString(aad_prefix); } - PARQUETSHARP_EXPORT ExceptionInfo* FileDecryptionProperties_Key_Retriever(const std::shared_ptr* properties, void** key_retriever) + PARQUETSHARP_EXPORT ExceptionInfo* FileDecryptionProperties_Key_Retriever(const std::shared_ptr* properties, void** key_retriever) { TRYCATCH ( - const auto r = (*properties)->key_retriever(); - *key_retriever = r ? dynamic_cast(*r).Handle : nullptr; + // This only returns a KeyRetriever handle when a ManagedDecryptionKeyRetriever is used. + // If the key retriever is set using the Key Management Tools API (CryptoFactory) then this will return null. + const auto retriever = std::dynamic_pointer_cast((*properties)->key_retriever()); + *key_retriever = retriever ? retriever->Handle : nullptr; ) } diff --git a/cpp/FileEncryptionProperties.cpp b/cpp/FileEncryptionProperties.cpp index 98989d1c..eec102ef 100644 --- a/cpp/FileEncryptionProperties.cpp +++ b/cpp/FileEncryptionProperties.cpp @@ -13,7 +13,7 @@ extern "C" { TRYCATCH(*clone = new std::shared_ptr((*properties)->DeepClone());) } - + PARQUETSHARP_EXPORT void FileEncryptionProperties_Free(const std::shared_ptr* properties) { delete properties; @@ -54,14 +54,15 @@ extern "C" FreeCString(file_aad); } - PARQUETSHARP_EXPORT ExceptionInfo* FileEncryptionProperties_Column_Encryption_Properties(const std::shared_ptr* properties, const char* column_path, std::shared_ptr* column_encryption_properties) + PARQUETSHARP_EXPORT ExceptionInfo* FileEncryptionProperties_Column_Encryption_Properties(const std::shared_ptr* properties, const char* column_path, std::shared_ptr** column_encryption_properties) { - TRYCATCH(*column_encryption_properties = (*properties)->column_encryption_properties(column_path);) + TRYCATCH( + std::shared_ptr column_properties = (*properties)->column_encryption_properties(column_path); + if (column_properties != nullptr) { + *column_encryption_properties = new std::shared_ptr(column_properties); + } else { + *column_encryption_properties = nullptr; + } + ) } - - // TODO: do we really need this? - //PARQUETSHARP_EXPORT ExceptionInfo* FileEncryptionProperties_Encrypted_Columns(const std::shared_ptr* properties, bool* is_encrypted_with_footer_key) - //{ - // TRYCATCH(*encrypted_columns = (*properties)->encrypted_columns();) - //} } \ No newline at end of file diff --git a/cpp/ResizableBuffer.cpp b/cpp/ResizableBuffer.cpp index d5a2a65b..0b542a97 100644 --- a/cpp/ResizableBuffer.cpp +++ b/cpp/ResizableBuffer.cpp @@ -4,6 +4,7 @@ #include #include +#include extern "C" { @@ -14,4 +15,11 @@ extern "C" *buffer = new std::shared_ptr(pBuffer.ValueOrDie().release()); ) } + + PARQUETSHARP_EXPORT ExceptionInfo* ResizableBuffer_Resize(std::shared_ptr* buffer, int64_t new_size) + { + TRYCATCH( + PARQUET_THROW_NOT_OK((*buffer)->Resize(new_size)); + ) + } } diff --git a/cpp/encryption/CryptoFactory.cpp b/cpp/encryption/CryptoFactory.cpp new file mode 100644 index 00000000..3f77dee7 --- /dev/null +++ b/cpp/encryption/CryptoFactory.cpp @@ -0,0 +1,85 @@ +#include +#include +#include + +#include "cpp/ParquetSharpExport.h" +#include "../ExceptionInfo.h" +#include "ManagedKmsClientFactory.h" + +using namespace parquet::encryption; + +extern "C" +{ + PARQUETSHARP_EXPORT ExceptionInfo* CryptoFactory_Create(CryptoFactory** crypto_factory) + { + TRYCATCH(*crypto_factory = new CryptoFactory();) + } + + PARQUETSHARP_EXPORT void CryptoFactory_Free(CryptoFactory* crypto_factory) + { + delete crypto_factory; + } + + PARQUETSHARP_EXPORT ExceptionInfo* CryptoFactory_RegisterKmsClientFactory( + CryptoFactory* crypto_factory, + void* const client_factory_handle, + const ManagedKmsClient::FreeGcHandleFunc free_gc_handle, + const ManagedKmsClientFactory::CreateClientFunc create_client, + const ManagedKmsClient::WrapFunc wrap, + const ManagedKmsClient::UnwrapFunc unwrap) + { + TRYCATCH( + crypto_factory->RegisterKmsClientFactory( + std::make_shared(client_factory_handle, free_gc_handle, create_client, wrap, unwrap)); + ) + } + + PARQUETSHARP_EXPORT ExceptionInfo* CryptoFactory_GetFileEncryptionProperties( + CryptoFactory* crypto_factory, + const KmsConnectionConfig* kms_connection_config, + const EncryptionConfiguration* encryption_configuration, + const char* file_path, + std::shared_ptr** file_encryption_properties) + { + TRYCATCH( + std::string file_path_str = file_path == nullptr ? "" : file_path; + std::shared_ptr<::arrow::fs::FileSystem> file_system = file_path_str.empty() ? + nullptr : std::make_shared<::arrow::fs::LocalFileSystem>(); + (*file_encryption_properties) = new std::shared_ptr( + crypto_factory->GetFileEncryptionProperties( + *kms_connection_config, *encryption_configuration, file_path_str, file_system)); + ) + } + + PARQUETSHARP_EXPORT ExceptionInfo* CryptoFactory_GetFileDecryptionProperties( + CryptoFactory* crypto_factory, + const KmsConnectionConfig* kms_connection_config, + const DecryptionConfiguration* decryption_configuration, + const char* file_path, + std::shared_ptr** file_decryption_properties) + { + TRYCATCH( + std::string file_path_str = file_path == nullptr ? "" : file_path; + std::shared_ptr<::arrow::fs::FileSystem> file_system = file_path_str.empty() ? + nullptr : std::make_shared<::arrow::fs::LocalFileSystem>(); + (*file_decryption_properties) = new std::shared_ptr( + crypto_factory->GetFileDecryptionProperties( + *kms_connection_config, *decryption_configuration, file_path_str, file_system)); + ) + } + + PARQUETSHARP_EXPORT ExceptionInfo* CryptoFactory_RotateMasterKeys( + CryptoFactory* crypto_factory, + const KmsConnectionConfig* kms_connection_config, + const char* file_path, + bool double_wrapping, + double cache_lifetime_seconds) + { + TRYCATCH( + std::string file_path_str = file_path == nullptr ? "" : file_path; + std::shared_ptr<::arrow::fs::FileSystem> file_system = std::make_shared<::arrow::fs::LocalFileSystem>(); + crypto_factory->RotateMasterKeys( + *kms_connection_config, file_path_str, file_system, double_wrapping, cache_lifetime_seconds); + ) + } +} diff --git a/cpp/encryption/DecryptionConfiguration.cpp b/cpp/encryption/DecryptionConfiguration.cpp new file mode 100644 index 00000000..3e97709d --- /dev/null +++ b/cpp/encryption/DecryptionConfiguration.cpp @@ -0,0 +1,29 @@ +#include + +#include "cpp/ParquetSharpExport.h" +#include "../ExceptionInfo.h" + +using namespace parquet::encryption; + +extern "C" +{ + PARQUETSHARP_EXPORT ExceptionInfo* DecryptionConfiguration_Create(DecryptionConfiguration** configuration) + { + TRYCATCH(*configuration = new DecryptionConfiguration();) + } + + PARQUETSHARP_EXPORT void DecryptionConfiguration_Free(DecryptionConfiguration* configuration) + { + delete configuration; + } + + PARQUETSHARP_EXPORT ExceptionInfo* DecryptionConfiguration_GetCacheLifetimeSeconds(const DecryptionConfiguration* configuration, double* cache_lifetime_seconds) + { + TRYCATCH(*cache_lifetime_seconds = configuration->cache_lifetime_seconds;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* DecryptionConfiguration_SetCacheLifetimeSeconds(DecryptionConfiguration* configuration, double cache_lifetime_seconds) + { + TRYCATCH(configuration->cache_lifetime_seconds = cache_lifetime_seconds;) + } +} diff --git a/cpp/encryption/EncryptionConfiguration.cpp b/cpp/encryption/EncryptionConfiguration.cpp new file mode 100644 index 00000000..26b85212 --- /dev/null +++ b/cpp/encryption/EncryptionConfiguration.cpp @@ -0,0 +1,109 @@ +#include + +#include "cpp/ParquetSharpExport.h" +#include "../ExceptionInfo.h" + +using namespace parquet::encryption; + +extern "C" +{ + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_Create(const char* footer_key, EncryptionConfiguration** configuration) + { + TRYCATCH(*configuration = new EncryptionConfiguration(footer_key == nullptr ? "" : footer_key);) + } + + PARQUETSHARP_EXPORT void EncryptionConfiguration_Free(EncryptionConfiguration* configuration) + { + delete configuration; + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetFooterKey(const EncryptionConfiguration* configuration, const char** footer_key) + { + TRYCATCH(*footer_key = configuration->footer_key.c_str();) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetFooterKey(EncryptionConfiguration* configuration, const char* footer_key) + { + TRYCATCH(configuration->footer_key = footer_key;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetColumnKeys(const EncryptionConfiguration* configuration, const char** column_keys) + { + TRYCATCH(*column_keys = configuration->column_keys.c_str();) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetColumnKeys(EncryptionConfiguration* configuration, const char* column_keys) + { + TRYCATCH(configuration->column_keys = column_keys;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetUniformEncryption(const EncryptionConfiguration* configuration, bool* uniform_encryption) + { + TRYCATCH(*uniform_encryption = configuration->uniform_encryption;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetUniformEncryption(EncryptionConfiguration* configuration, bool uniform_encryption) + { + TRYCATCH(configuration->uniform_encryption = uniform_encryption;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetEncryptionAlgorithm(const EncryptionConfiguration* configuration, parquet::ParquetCipher::type* encryption_algorithm) + { + TRYCATCH(*encryption_algorithm = configuration->encryption_algorithm;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetEncryptionAlgorithm(EncryptionConfiguration* configuration, parquet::ParquetCipher::type encryption_algorithm) + { + TRYCATCH(configuration->encryption_algorithm = encryption_algorithm;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetPlaintextFooter(const EncryptionConfiguration* configuration, bool* plaintext_footer) + { + TRYCATCH(*plaintext_footer = configuration->plaintext_footer;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetPlaintextFooter(EncryptionConfiguration* configuration, bool plaintext_footer) + { + TRYCATCH(configuration->plaintext_footer = plaintext_footer;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetDoubleWrapping(const EncryptionConfiguration* configuration, bool* double_wrapping) + { + TRYCATCH(*double_wrapping = configuration->double_wrapping;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetDoubleWrapping(EncryptionConfiguration* configuration, bool double_wrapping) + { + TRYCATCH(configuration->double_wrapping = double_wrapping;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetCacheLifetimeSeconds(const EncryptionConfiguration* configuration, double* cache_lifetime_seconds) + { + TRYCATCH(*cache_lifetime_seconds = configuration->cache_lifetime_seconds;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetCacheLifetimeSeconds(EncryptionConfiguration* configuration, double cache_lifetime_seconds) + { + TRYCATCH(configuration->cache_lifetime_seconds = cache_lifetime_seconds;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetInternalKeyMaterial(const EncryptionConfiguration* configuration, bool* internal_key_material) + { + TRYCATCH(*internal_key_material = configuration->internal_key_material;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetInternalKeyMaterial(EncryptionConfiguration* configuration, bool internal_key_material) + { + TRYCATCH(configuration->internal_key_material = internal_key_material;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_GetDataKeyLengthBits(const EncryptionConfiguration* configuration, int32_t* data_key_length_bits) + { + TRYCATCH(*data_key_length_bits = configuration->data_key_length_bits;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* EncryptionConfiguration_SetDataKeyLengthBits(EncryptionConfiguration* configuration, int32_t data_key_length_bits) + { + TRYCATCH(configuration->data_key_length_bits = data_key_length_bits;) + } +} diff --git a/cpp/encryption/KmsConnectionConfig.cpp b/cpp/encryption/KmsConnectionConfig.cpp new file mode 100644 index 00000000..ab831072 --- /dev/null +++ b/cpp/encryption/KmsConnectionConfig.cpp @@ -0,0 +1,76 @@ +#include +#include + +#include "cpp/ParquetSharpExport.h" +#include "../ExceptionInfo.h" + +using namespace parquet::encryption; + +extern "C" +{ + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_Create(KmsConnectionConfig** configuration) + { + TRYCATCH(*configuration = new KmsConnectionConfig();) + } + + PARQUETSHARP_EXPORT void KmsConnectionConfig_Free(KmsConnectionConfig* configuration) + { + delete configuration; + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_GetKmsInstanceId(const KmsConnectionConfig* config, const char** instance_id) + { + TRYCATCH(*instance_id = config->kms_instance_id.c_str();) + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_SetKmsInstanceId(KmsConnectionConfig* config, const char* instance_id) + { + TRYCATCH(config->kms_instance_id = instance_id == nullptr ? "" : instance_id;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_GetKmsInstanceUrl(const KmsConnectionConfig* config, const char** instance_url) + { + TRYCATCH(*instance_url = config->kms_instance_url.c_str();) + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_SetKmsInstanceUrl(KmsConnectionConfig* config, const char* instance_url) + { + TRYCATCH(config->kms_instance_url = instance_url == nullptr ? "" : instance_url;) + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_GetKeyAccessToken(const KmsConnectionConfig* config, const char** token) + { + TRYCATCH( + if (config->refreshable_key_access_token == nullptr) { + *token = nullptr; + } else { + *token = config->refreshable_key_access_token->value().c_str(); + } + ) + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_SetKeyAccessToken(KmsConnectionConfig* config, const char* token) + { + TRYCATCH( + std::string token_str = token == nullptr ? "" : token; + if (config->refreshable_key_access_token == nullptr) { + config->refreshable_key_access_token = std::make_shared(token_str); + } else { + config->refreshable_key_access_token->Refresh(token_str); + } + ) + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_GetCustomKmsConf(const KmsConnectionConfig* config, std::shared_ptr<::arrow::KeyValueMetadata>** custom_conf) + { + TRYCATCH(*custom_conf = new std::shared_ptr<::arrow::KeyValueMetadata>(new ::arrow::KeyValueMetadata(config->custom_kms_conf));) + } + + PARQUETSHARP_EXPORT ExceptionInfo* KmsConnectionConfig_SetCustomKmsConf(KmsConnectionConfig* config, std::shared_ptr<::arrow::KeyValueMetadata>* custom_conf) + { + TRYCATCH( + config->custom_kms_conf.clear(); + (*custom_conf)->ToUnorderedMap(&config->custom_kms_conf); + ) + } +} diff --git a/cpp/encryption/ManagedKmsClient.h b/cpp/encryption/ManagedKmsClient.h new file mode 100644 index 00000000..7c1c5fb3 --- /dev/null +++ b/cpp/encryption/ManagedKmsClient.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include + +// Derived KmsClient that can callback into managed code. +// This class maintains a GC reference, such that the managed instance cannot get collected if this class is still alive. +class ManagedKmsClient final : public parquet::encryption::KmsClient +{ +public: + + typedef void (*FreeGcHandleFunc) (void* handle); + + typedef void (*WrapFunc) ( + void* handle, const char* key_bytes, int32_t key_length, const char* master_key_identifier, + const char** wrapped_key, const char** exception); + + typedef void (*UnwrapFunc) ( + void* handle, const char* wrapped_key, const char* master_key_identifier, + std::shared_ptr<::arrow::ResizableBuffer>* unwrapped_key_buffer, const char** exception); + + ManagedKmsClient(const ManagedKmsClient&) = delete; + ManagedKmsClient(ManagedKmsClient&&) = delete; + ManagedKmsClient& operator = (const ManagedKmsClient&) = delete; + ManagedKmsClient& operator = (ManagedKmsClient&&) = delete; + + ManagedKmsClient( + void* const handle, + const FreeGcHandleFunc free_gc_handle, + const WrapFunc wrap, + const UnwrapFunc unwrap) : + handle_(handle), + free_gc_handle_(free_gc_handle), + wrap_(wrap), + unwrap_(unwrap) + { + } + + ~ManagedKmsClient() override + { + free_gc_handle_(handle_); + } + + std::string WrapKey(const std::string& key_bytes, const std::string& master_key_identifier) override + { + const char* exception = nullptr; + const char* wrapped_key = nullptr; + + wrap_( + handle_, key_bytes.data(), static_cast(key_bytes.length()), master_key_identifier.c_str(), + &wrapped_key, &exception); + + if (exception != nullptr) + { + throw std::runtime_error(exception); + } + if (wrapped_key == nullptr) + { + throw std::runtime_error("WrapKey callback did not set exception or wrapped_key"); + } + + return std::string(wrapped_key); + } + + std::string UnwrapKey(const std::string& wrapped_key, const std::string& master_key_identifier) override + { + const char* exception = nullptr; + + std::shared_ptr unwrapped_key_buffer; + PARQUET_ASSIGN_OR_THROW(unwrapped_key_buffer, arrow::AllocateResizableBuffer(0)); + unwrap_( + handle_, wrapped_key.c_str(), master_key_identifier.c_str(), + &unwrapped_key_buffer, &exception); + + if (exception != nullptr) + { + throw std::runtime_error(exception); + } + + return std::string(unwrapped_key_buffer->data_as(), unwrapped_key_buffer->size()); + } + +private: + void* const handle_; + const FreeGcHandleFunc free_gc_handle_; + const WrapFunc wrap_; + const UnwrapFunc unwrap_; +}; diff --git a/cpp/encryption/ManagedKmsClientFactory.h b/cpp/encryption/ManagedKmsClientFactory.h new file mode 100644 index 00000000..044b76b9 --- /dev/null +++ b/cpp/encryption/ManagedKmsClientFactory.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include +#include +#include "ManagedKmsClient.h" + +class ManagedKmsClientFactory final : public parquet::encryption::KmsClientFactory +{ +public: + + typedef void (*CreateClientFunc) ( + void* handle, const parquet::encryption::KmsConnectionConfig* kms_connection_config, + void** client, const char** exception); + + ManagedKmsClientFactory(const ManagedKmsClientFactory&) = delete; + ManagedKmsClientFactory(ManagedKmsClientFactory&&) = delete; + ManagedKmsClientFactory& operator = (const ManagedKmsClientFactory&) = delete; + ManagedKmsClientFactory& operator = (ManagedKmsClientFactory&&) = delete; + + ManagedKmsClientFactory( + void* const handle, + const ManagedKmsClient::FreeGcHandleFunc free_gc_handle, + const CreateClientFunc create_client, + const ManagedKmsClient::WrapFunc wrap, + const ManagedKmsClient::UnwrapFunc unwrap) : + handle_(handle), + free_gc_handle_(free_gc_handle), + create_client_(create_client), + wrap_(wrap), + unwrap_(unwrap) + { + } + + ~ManagedKmsClientFactory() override + { + free_gc_handle_(handle_); + } + + std::shared_ptr CreateKmsClient( + const parquet::encryption::KmsConnectionConfig& kms_connection_config) override { + const char* exception = nullptr; + void* client = nullptr; + + create_client_( + handle_, &kms_connection_config, &client, &exception); + + if (exception != nullptr) + { + throw std::runtime_error(exception); + } + if (client == nullptr) + { + throw std::runtime_error("KmsClientFactory callback did not set client or exception"); + } + + // Reuse same FreeGcHandle for client as we use for the factory, as this isn't type specific + return std::make_shared(client, free_gc_handle_, wrap_, unwrap_); + } + +private: + void* const handle_; + const ManagedKmsClient::FreeGcHandleFunc free_gc_handle_; + const CreateClientFunc create_client_; + const ManagedKmsClient::WrapFunc wrap_; + const ManagedKmsClient::UnwrapFunc unwrap_; +}; diff --git a/csharp.test/Arrow/TestEncryptionRoundTrip.cs b/csharp.test/Arrow/TestEncryptionRoundTrip.cs new file mode 100644 index 00000000..44ea1b5e --- /dev/null +++ b/csharp.test/Arrow/TestEncryptionRoundTrip.cs @@ -0,0 +1,153 @@ +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using NUnit.Framework; +using Apache.Arrow; +using Apache.Arrow.Types; +using ParquetSharp.Arrow; +using ParquetSharp.Encryption; +using ParquetSharp.IO; +using ParquetSharp.Test.Encryption; + +namespace ParquetSharp.Test.Arrow +{ + /// + /// Test writing and reading using the Arrow API with column encryption + /// + [TestFixture] + internal sealed class TestEncryptionRoundTrip + { + [Test] + public static async Task TestArrowColumnEncryption() + { + var recordBatch = CreateTestData(); + + using var cryptoFactory = new CryptoFactory(_ => new TestKmsClient()); + using var connectionConfig = new KmsConnectionConfig(); + using var decryptionConfig = new DecryptionConfiguration(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"x"}}, + {"Key2", new[] {"y"}}, + }; + + using var buffer = new ResizableBuffer(); + using (var outStream = new BufferOutputStream(buffer)) + { + using var encryptionProperties = + cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig); + using var writerProperties = GetWriterProperties(encryptionProperties); + using var writer = new FileWriter(outStream, recordBatch.Schema, writerProperties); + writer.WriteRecordBatch(recordBatch); + writer.Close(); + } + + using var inStream = new BufferReader(buffer); + using var decryptionProperties = + cryptoFactory.GetFileDecryptionProperties(connectionConfig, decryptionConfig); + using var readerProperties = GetReaderProperties(decryptionProperties); + using var fileReader = new FileReader(inStream, readerProperties); + using var batchReader = fileReader.GetRecordBatchReader(); + var batchCount = 0; + RecordBatch batch; + while ((batch = await batchReader.ReadNextRecordBatchAsync()) != null) + { + using (batch) + { + VerifyReadData(batch); + } + + ++batchCount; + } + Assert.That(batchCount, Is.EqualTo(1)); + } + + [Test] + public static void TestReadWithoutDecryptionProperties() + { + var recordBatch = CreateTestData(); + + using var cryptoFactory = new CryptoFactory(_ => new TestKmsClient()); + using var connectionConfig = new KmsConnectionConfig(); + using var decryptionConfig = new DecryptionConfiguration(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"x"}}, + {"Key2", new[] {"y"}}, + }; + + using var buffer = new ResizableBuffer(); + using (var outStream = new BufferOutputStream(buffer)) + { + using var encryptionProperties = + cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig); + using var writerProperties = GetWriterProperties(encryptionProperties); + using var writer = new FileWriter(outStream, recordBatch.Schema, writerProperties); + writer.WriteRecordBatch(recordBatch); + writer.Close(); + } + + using var inStream = new BufferReader(buffer); + var exception = Assert.Throws(() => new FileReader(inStream)); + Assert.That(exception!.Message, Does.Contain("no decryption found")); + } + + private static WriterProperties GetWriterProperties(FileEncryptionProperties encryptionProperties) + { + using var builder = new WriterPropertiesBuilder(); + builder.Compression(Compression.Snappy); + builder.Encryption(encryptionProperties); + return builder.Build(); + } + + private static ReaderProperties GetReaderProperties(FileDecryptionProperties decryptionProperties) + { + var properties = ReaderProperties.GetDefaultReaderProperties(); + properties.FileDecryptionProperties = decryptionProperties; + return properties; + } + + private static RecordBatch CreateTestData() + { + var fields = new[] + { + new Field("x", new Int32Type(), false), + new Field("y", new FloatType(), false), + }; + const int numRows = 1000; + var schema = new Apache.Arrow.Schema(fields, null); + + var arrays = new IArrowArray[] + { + new Int32Array.Builder() + .AppendRange(Enumerable.Range(0, numRows)) + .Build(), + new FloatArray.Builder() + .AppendRange(Enumerable.Range(0, numRows).Select(i => i / 100.0f)) + .Build(), + }; + return new RecordBatch(schema, arrays, numRows); + } + + private static void VerifyReadData(RecordBatch batch) + { + Assert.That(batch.Schema.FieldsList.Count, Is.EqualTo(2)); + Assert.That(batch.Schema.FieldsList[0].Name, Is.EqualTo("x")); + Assert.That(batch.Schema.FieldsList[1].Name, Is.EqualTo("y")); + + var xArray = batch.Column(0) as Int32Array; + var yArray = batch.Column(1) as FloatArray; + Assert.That(xArray, Is.Not.Null); + Assert.That(yArray, Is.Not.Null); + + Assert.That(batch.Length, Is.EqualTo(1000)); + for (var row = 0; row < batch.Length; ++row) + { + Assert.That(xArray!.GetValue(row), Is.EqualTo(row)); + Assert.That(yArray!.GetValue(row), Is.EqualTo(row / 100.0f)); + } + } + } +} diff --git a/csharp.test/Encryption/TestCryptoFactory.cs b/csharp.test/Encryption/TestCryptoFactory.cs new file mode 100644 index 00000000..0bf36105 --- /dev/null +++ b/csharp.test/Encryption/TestCryptoFactory.cs @@ -0,0 +1,228 @@ +using System; +using System.Collections.Generic; +using System.Threading; +using NUnit.Framework; +using ParquetSharp.Encryption; + +namespace ParquetSharp.Test.Encryption +{ + [TestFixture] + internal static class TestCryptoFactory + { + [Test] + public static void TestCreateEncryptionProperties() + { + using var cryptoFactory = new CryptoFactory(_ => new TestKmsClient()); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"col0", "col1"}} + }; + using var fileEncryptionProperties = + cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig); + + Assert.That(fileEncryptionProperties.FooterKey, Is.Not.Empty); + Assert.That(fileEncryptionProperties.FooterKeyMetadata, Is.Not.Empty); + Assert.That(fileEncryptionProperties.FooterKeyMetadata, Does.Contain("\"masterKeyID\":\"Key0\"")); + + using var col0Properties = fileEncryptionProperties.ColumnEncryptionProperties("col0")!; + Assert.That(col0Properties, Is.Not.Null); + Assert.That(col0Properties.Key, Is.Not.Empty); + Assert.That(col0Properties.KeyMetadata, Is.Not.Empty); + Assert.That(col0Properties.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + + using var col1Properties = fileEncryptionProperties.ColumnEncryptionProperties("col1")!; + Assert.That(col1Properties, Is.Not.Null); + Assert.That(col1Properties.Key, Is.Not.Empty); + Assert.That(col1Properties.KeyMetadata, Is.Not.Empty); + Assert.That(col1Properties.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + + using var col2Properties = fileEncryptionProperties.ColumnEncryptionProperties("col2"); + Assert.That(col2Properties, Is.Null); + } + + [Test] + public static void TestCreateDecryptionProperties() + { + using var cryptoFactory = new CryptoFactory(_ => new TestKmsClient()); + using var connectionConfig = new KmsConnectionConfig(); + using var decryptionConfig = new DecryptionConfiguration(); + using var fileDecryptionProperties = + cryptoFactory.GetFileDecryptionProperties(connectionConfig, decryptionConfig); + // There is a key retriever set internally, but we can't access it from C# + var retriever = fileDecryptionProperties.KeyRetriever; + Assert.That(retriever, Is.Null); + // Unlike the encryption side, the decryption keys aren't accessed until + // columns and metadata are decrypted, after reading the key metadata + } + + [Test] + public static void TestThrowingFactory() + { + using var cryptoFactory = new CryptoFactory(_ => throw new Exception("Test message")); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"col0", "col1"}} + }; + + var exception = Assert.Throws(() => + cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig)); + Assert.That(exception!.Message, Does.Contain("Test message")); + } + + [Test] + public static void TestConnectionConfigPassThrough() + { + const string kmsInstanceId = "123"; + const string kmsInstanceUrl = "https://example.com"; + const string keyAccessToken = "SECRET"; + const string updatedKeyAccessToken = "NEW_SECRET"; + var customKmsConf = new Dictionary + { + {"key", "value"} + }; + + using var connectionConfig = new KmsConnectionConfig(); + connectionConfig.KmsInstanceId = kmsInstanceId; + connectionConfig.KmsInstanceUrl = kmsInstanceUrl; + connectionConfig.KeyAccessToken = keyAccessToken; + connectionConfig.CustomKmsConf = customKmsConf; + + var configValid = false; + + using var cryptoFactory = new CryptoFactory(config => + { + Assert.That(config.KmsInstanceId, Is.EqualTo(kmsInstanceId)); + Assert.That(config.KmsInstanceUrl, Is.EqualTo(kmsInstanceUrl)); + Assert.That(config.KeyAccessToken, Is.EqualTo(updatedKeyAccessToken)); + Assert.That(config.CustomKmsConf, Is.EqualTo(customKmsConf)); + + configValid = true; + + return new TestKmsClient(); + }); + + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"col0", "col1"}} + }; + + connectionConfig.RefreshKeyAccessToken(updatedKeyAccessToken); + + using var fileEncryptionProperties = + cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig); + + Assert.That(configValid, Is.True); + } + + [Test] + public static void TestOwnership() + { + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.UniformEncryption = true; + + var clientRefs = new List(); + var (cryptoFactory, clientFactoryRef) = CreateCryptoFactory(clientRefs); + + using (cryptoFactory) + { + GcCollect(); + Assert.That(clientFactoryRef.IsAlive); + Assert.That(clientRefs, Is.Empty); + + using var properties0 = cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig); + + GcCollect(); + Assert.That(clientRefs.Count, Is.EqualTo(1)); + Assert.That(clientRefs[0].IsAlive); + + using var properties1 = cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig); + + GcCollect(); + Assert.That(clientRefs.Count, Is.EqualTo(1)); + Assert.That(clientRefs[0].IsAlive); + } + + GcCollect(); + Assert.That(clientRefs.Count, Is.EqualTo(1)); + Assert.That(clientRefs[0].IsAlive, Is.False); + Assert.That(clientFactoryRef.IsAlive, Is.False); + + var exception = Assert.Throws(() => + cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig)); + Assert.That(exception!.Message, Does.Contain("null native handle")); + } + + [Test] + [Explicit("long running manual test")] + public static void TestClientCaching() + { + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.CacheLifetimeSeconds = 1; + encryptionConfig.UniformEncryption = true; + + var clientRefs = new List(); + var (cryptoFactory, clientFactoryRef) = CreateCryptoFactory(clientRefs); + + const int numClients = 3; + using (cryptoFactory) + { + GcCollect(); + Assert.That(clientRefs, Is.Empty); + + for (var i = 0; i < numClients; ++i) + { + using var properties = cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig); + Thread.Sleep(TimeSpan.FromSeconds(2)); + } + + GcCollect(); + Assert.That(clientRefs.Count, Is.EqualTo(numClients)); + for (var i = 0; i < numClients; ++i) + { + if (i == numClients - 1) + { + Assert.That(clientRefs[i].IsAlive, Is.True); + } + else + { + Assert.That(clientRefs[i].IsAlive, Is.False); + } + } + } + + GcCollect(); + Assert.That(clientFactoryRef.IsAlive, Is.False); + Assert.That(clientRefs.Count, Is.EqualTo(numClients)); + foreach (var clientRef in clientRefs) + { + Assert.That(clientRef.IsAlive, Is.False); + } + } + + private static (CryptoFactory, WeakReference) CreateCryptoFactory(List clientRefs) + { + CryptoFactory.KmsClientFactory kmsClientFactory = _ => + { + var client = new TestKmsClient(); + clientRefs.Add(new WeakReference(client)); + return client; + }; + var factoryRef = new WeakReference(kmsClientFactory); + return (new CryptoFactory(kmsClientFactory), factoryRef); + } + + private static void GcCollect() + { + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + } + } +} diff --git a/csharp.test/Encryption/TestDecryptionConfiguration.cs b/csharp.test/Encryption/TestDecryptionConfiguration.cs new file mode 100644 index 00000000..3af988f4 --- /dev/null +++ b/csharp.test/Encryption/TestDecryptionConfiguration.cs @@ -0,0 +1,27 @@ +using NUnit.Framework; +using ParquetSharp.Encryption; + +namespace ParquetSharp.Test.Encryption +{ + [TestFixture] + internal static class TestDecryptionConfiguration + { + [Test] + public static void TestDefaultConfiguration() + { + using var config = new DecryptionConfiguration(); + + Assert.That(config.CacheLifetimeSeconds, Is.EqualTo(600)); + } + + [Test] + public static void TestModifyConfiguration() + { + using var config = new DecryptionConfiguration(); + + config.CacheLifetimeSeconds = 300; + + Assert.That(config.CacheLifetimeSeconds, Is.EqualTo(300)); + } + } +} diff --git a/csharp.test/Encryption/TestEncryptionConfiguration.cs b/csharp.test/Encryption/TestEncryptionConfiguration.cs new file mode 100644 index 00000000..908efcd4 --- /dev/null +++ b/csharp.test/Encryption/TestEncryptionConfiguration.cs @@ -0,0 +1,60 @@ +using System.Collections.Generic; +using NUnit.Framework; +using ParquetSharp.Encryption; + +namespace ParquetSharp.Test.Encryption +{ + [TestFixture] + internal static class TestEncryptionConfiguration + { + [Test] + public static void TestDefaultConfiguration() + { + using var config = new EncryptionConfiguration("footer_key_id"); + + Assert.That(config.FooterKey, Is.EqualTo("footer_key_id")); + Assert.That(config.UniformEncryption, Is.False); + Assert.That(config.EncryptionAlgorithm, Is.EqualTo(ParquetCipher.AesGcmV1)); + Assert.That(config.PlaintextFooter, Is.False); + Assert.That(config.DoubleWrapping, Is.True); + Assert.That(config.CacheLifetimeSeconds, Is.EqualTo(600)); + Assert.That(config.InternalKeyMaterial, Is.True); + Assert.That(config.DataKeyLengthBits, Is.EqualTo(128)); + } + + [Test] + public static void TestConfigureEncryption() + { + using var config = new EncryptionConfiguration("footer_key_id"); + + config.FooterKey = "new_footer_key"; + config.ColumnKeys = new Dictionary> + { + {"key1", new[] {"col_a", "col_b"}}, + {"key2", new[] {"col_c"}}, + }; + config.UniformEncryption = true; + config.EncryptionAlgorithm = ParquetCipher.AesGcmCtrV1; + config.PlaintextFooter = true; + config.DoubleWrapping = false; + config.CacheLifetimeSeconds = 300; + config.InternalKeyMaterial = false; + config.DataKeyLengthBits = 256; + + Assert.That(config.FooterKey, Is.EqualTo("new_footer_key")); + + var columnKeys = config.ColumnKeys; + Assert.That(columnKeys.Count, Is.EqualTo(2)); + Assert.That(columnKeys["key1"], Is.EqualTo(new[] {"col_a", "col_b"})); + Assert.That(columnKeys["key2"], Is.EqualTo(new[] {"col_c"})); + + Assert.That(config.UniformEncryption, Is.True); + Assert.That(config.EncryptionAlgorithm, Is.EqualTo(ParquetCipher.AesGcmCtrV1)); + Assert.That(config.PlaintextFooter, Is.True); + Assert.That(config.DoubleWrapping, Is.False); + Assert.That(config.CacheLifetimeSeconds, Is.EqualTo(300)); + Assert.That(config.InternalKeyMaterial, Is.False); + Assert.That(config.DataKeyLengthBits, Is.EqualTo(256)); + } + } +} diff --git a/csharp.test/Encryption/TestEncryptionRoundTrip.cs b/csharp.test/Encryption/TestEncryptionRoundTrip.cs new file mode 100644 index 00000000..9401c4c0 --- /dev/null +++ b/csharp.test/Encryption/TestEncryptionRoundTrip.cs @@ -0,0 +1,730 @@ +using System; +using System.IO; +using System.Collections.Generic; +using NUnit.Framework; +using ParquetSharp.Encryption; +using ParquetSharp.IO; + +namespace ParquetSharp.Test.Encryption +{ + /// + /// Tests writing then reading with the high-level encryption API + /// + [TestFixture] + public class TestEncryptionRoundTrip + { + [Test] + public static void TestUniformEncryption() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.UniformEncryption = true; + using var decryptionConfig = new DecryptionConfiguration(); + + TestEncryptionRoundtrip( + connectionConfig, encryptionConfig, decryptionConfig, testClient, rowGroupMetaData => + { + using var colMetadata0 = rowGroupMetaData.GetColumnChunkMetaData(0); + using var colMetadata1 = rowGroupMetaData.GetColumnChunkMetaData(1); + using var crypto0 = colMetadata0.CryptoMetadata; + using var crypto1 = colMetadata1.CryptoMetadata; + + Assert.That(crypto0?.EncryptedWithFooterKey, Is.True); + Assert.That(crypto1?.EncryptedWithFooterKey, Is.True); + }); + + Assert.That(testClient.WrappedKeys.Count, Is.EqualTo(1)); + Assert.That(testClient.UnwrappedKeys.Count, Is.EqualTo(1)); + } + + [Test] + public static void TestSingleColumnEncryption() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Value"}}, + }; + using var decryptionConfig = new DecryptionConfiguration(); + + TestEncryptionRoundtrip( + connectionConfig, encryptionConfig, decryptionConfig, testClient, rowGroupMetaData => + { + using var colMetadata0 = rowGroupMetaData.GetColumnChunkMetaData(0); + using var colMetadata1 = rowGroupMetaData.GetColumnChunkMetaData(1); + using var crypto0 = colMetadata0.CryptoMetadata; + using var crypto1 = colMetadata1.CryptoMetadata; + + Assert.That(crypto0, Is.Null); + + Assert.That(crypto1?.EncryptedWithFooterKey, Is.False); + Assert.That(crypto1?.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + using var path1 = crypto1?.ColumnPath; + Assert.That(path1?.ToDotString(), Is.EqualTo("Value")); + }); + + Assert.That(testClient.WrappedKeys.Count, Is.EqualTo(2)); + Assert.That(testClient.UnwrappedKeys.Count, Is.EqualTo(2)); + } + + [Test] + public static void TestColumnEncryption() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + using var decryptionConfig = new DecryptionConfiguration(); + + TestEncryptionRoundtrip( + connectionConfig, encryptionConfig, decryptionConfig, testClient, rowGroupMetaData => + { + using var colMetadata0 = rowGroupMetaData.GetColumnChunkMetaData(0); + using var colMetadata1 = rowGroupMetaData.GetColumnChunkMetaData(1); + using var crypto0 = colMetadata0.CryptoMetadata; + using var crypto1 = colMetadata1.CryptoMetadata; + + Assert.That(crypto0?.EncryptedWithFooterKey, Is.False); + Assert.That(crypto0?.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + using var path0 = crypto0?.ColumnPath; + Assert.That(path0?.ToDotString(), Is.EqualTo("Id")); + + Assert.That(crypto1?.EncryptedWithFooterKey, Is.False); + Assert.That(crypto1?.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + using var path1 = crypto1?.ColumnPath; + Assert.That(path1?.ToDotString(), Is.EqualTo("Value")); + }); + + // Footer key and one KEK need to be encrypted by master keys + Assert.That(testClient.WrappedKeys.Count, Is.EqualTo(2)); + Assert.That(testClient.UnwrappedKeys.Count, Is.EqualTo(2)); + } + + [Test] + public static void TestPerColumnEncryption() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id"}}, + {"Key2", new[] {"Value"}}, + }; + using var decryptionConfig = new DecryptionConfiguration(); + + TestEncryptionRoundtrip( + connectionConfig, encryptionConfig, decryptionConfig, testClient, rowGroupMetaData => + { + using var colMetadata0 = rowGroupMetaData.GetColumnChunkMetaData(0); + using var colMetadata1 = rowGroupMetaData.GetColumnChunkMetaData(1); + using var crypto0 = colMetadata0.CryptoMetadata; + using var crypto1 = colMetadata1.CryptoMetadata; + + Assert.That(crypto0?.EncryptedWithFooterKey, Is.False); + Assert.That(crypto0?.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + using var path0 = crypto0?.ColumnPath; + Assert.That(path0?.ToDotString(), Is.EqualTo("Id")); + + Assert.That(crypto1?.EncryptedWithFooterKey, Is.False); + Assert.That(crypto1?.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key2\"")); + using var path1 = crypto1?.ColumnPath; + Assert.That(path1?.ToDotString(), Is.EqualTo("Value")); + }); + + Assert.That(testClient.WrappedKeys.Count, Is.EqualTo(3)); + Assert.That(testClient.UnwrappedKeys.Count, Is.EqualTo(3)); + } + + [Test] + public static void TestSingleWrapping() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + encryptionConfig.DoubleWrapping = false; + using var decryptionConfig = new DecryptionConfiguration(); + + TestEncryptionRoundtrip( + connectionConfig, encryptionConfig, decryptionConfig, testClient, rowGroupMetaData => + { + using var colMetadata0 = rowGroupMetaData.GetColumnChunkMetaData(0); + using var colMetadata1 = rowGroupMetaData.GetColumnChunkMetaData(1); + using var crypto0 = colMetadata0.CryptoMetadata; + using var crypto1 = colMetadata1.CryptoMetadata; + + Assert.That(crypto0?.EncryptedWithFooterKey, Is.False); + Assert.That(crypto0?.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + using var path0 = crypto0?.ColumnPath; + Assert.That(path0?.ToDotString(), Is.EqualTo("Id")); + + Assert.That(crypto1?.EncryptedWithFooterKey, Is.False); + Assert.That(crypto1?.KeyMetadata, Does.Contain("\"masterKeyID\":\"Key1\"")); + using var path1 = crypto1?.ColumnPath; + Assert.That(path1?.ToDotString(), Is.EqualTo("Value")); + }); + + // 1 for footer and 1 for each column, even though they use the same master key, + // as each data key needs to be encrypted separately by the master key rather than use a KEK. + Assert.That(testClient.WrappedKeys.Count, Is.EqualTo(3)); + Assert.That(testClient.UnwrappedKeys.Count, Is.EqualTo(3)); + } + + [Test] + public static void TestUnencryptedMetadata() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + encryptionConfig.PlaintextFooter = true; + using var decryptionConfig = new DecryptionConfiguration(); + + var kvMetadata = new Dictionary + { + {"abc", "123"}, + }; + + using var buffer = new ResizableBuffer(); + + using (var output = new BufferOutputStream(buffer)) + { + using var cryptoFactory = new CryptoFactory(_ => testClient); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(output, Columns, writerProperties, kvMetadata); + WriteParquetFile(fileWriter); + } + + using (var input = new BufferReader(buffer)) + { + using var fileReader = new ParquetFileReader(input); + + // Can read file schema and metadata + using var metadata = fileReader.FileMetaData; + Assert.That(metadata.NumColumns, Is.EqualTo(2)); + Assert.That(metadata.NumRows, Is.EqualTo(7)); + var col0 = metadata.Schema.Column(0); + var col1 = metadata.Schema.Column(1); + Assert.That(col0.Name, Is.EqualTo("Id")); + Assert.That(col0.PhysicalType, Is.EqualTo(PhysicalType.Int32)); + Assert.That(col1.Name, Is.EqualTo("Value")); + Assert.That(col1.PhysicalType, Is.EqualTo(PhysicalType.Float)); + + Assert.That(metadata.KeyValueMetadata, Is.EqualTo(kvMetadata)); + + var exception = Assert.Throws(() => ReadParquetFile(fileReader)); + Assert.That(exception!.Message, Does.Contain("Cannot decrypt ColumnMetadata")); + } + } + + [Test] + public static void TestUnencryptedMetadataWithSingleColumnEncryption() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Value"}}, + }; + encryptionConfig.PlaintextFooter = true; + using var decryptionConfig = new DecryptionConfiguration(); + + using var buffer = new ResizableBuffer(); + + using (var output = new BufferOutputStream(buffer)) + { + using var cryptoFactory = new CryptoFactory(_ => testClient); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(output, Columns, writerProperties); + WriteParquetFile(fileWriter); + } + + using (var input = new BufferReader(buffer)) + { + using var fileReader = new ParquetFileReader(input); + using var groupReader = fileReader.RowGroup(0); + + // Can read first column + using (var idReader = groupReader.Column(0).LogicalReader()) + { + Assert.AreEqual(Ids, idReader.ReadAll((int) groupReader.MetaData.NumRows)); + } + + // Can't read second column + var exception = Assert.Throws(() => groupReader.Column(1)); + Assert.That(exception!.Message, Does.Contain("Cannot decrypt ColumnMetadata")); + } + } + + [Test] + public static void TestEncryptWithMissingKeys() + { + var client = new TestKmsClient(new Dictionary + { + {"Key99", new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}, + }); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + using var decryptionConfig = new DecryptionConfiguration(); + + using var buffer = new ResizableBuffer(); + + using var output = new BufferOutputStream(buffer); + using var cryptoFactory = new CryptoFactory(_ => client); + var exception = Assert.Throws(() => cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig)); + Assert.That(exception!.Message, Does.Contain("KeyNotFoundException")); + } + + [Test] + public static void TestDecryptWithMissingKeys() + { + var encryptionClient = new TestKmsClient(); + var decryptionClient = new TestKmsClient(new Dictionary + { + {"Key99", new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}, + }); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + using var decryptionConfig = new DecryptionConfiguration(); + + using var buffer = new ResizableBuffer(); + + using (var output = new BufferOutputStream(buffer)) + { + using var cryptoFactory = new CryptoFactory(_ => encryptionClient); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(output, Columns, writerProperties); + WriteParquetFile(fileWriter); + } + + using (var input = new BufferReader(buffer)) + { + using var cryptoFactory = new CryptoFactory(_ => decryptionClient); + using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + connectionConfig, decryptionConfig); + using var readerProperties = CreateReaderProperties(fileDecryptionProperties); + var exception = Assert.Throws(() => new ParquetFileReader(input, readerProperties)); + Assert.That(exception!.Message, Does.Contain("KeyNotFoundException")); + } + } + + [Test] + public static void TestDecryptWithIncorrectKeys() + { + var encryptionClient = new TestKmsClient(); + var decryptionClient = new TestKmsClient(new Dictionary + { + {"Key0", TestKmsClient.DefaultMasterKeys["Key1"]}, + {"Key1", TestKmsClient.DefaultMasterKeys["Key2"]}, + {"Key2", TestKmsClient.DefaultMasterKeys["Key0"]}, + }); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + using var decryptionConfig = new DecryptionConfiguration(); + + using var buffer = new ResizableBuffer(); + + using (var output = new BufferOutputStream(buffer)) + { + using var cryptoFactory = new CryptoFactory(_ => encryptionClient); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(output, Columns, writerProperties); + WriteParquetFile(fileWriter); + } + + using (var input = new BufferReader(buffer)) + { + using var cryptoFactory = new CryptoFactory(_ => decryptionClient); + using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + connectionConfig, decryptionConfig); + using var readerProperties = CreateReaderProperties(fileDecryptionProperties); + // Exception is thrown in TestKmsClient when trying to decrypt the key-encryption key + Assert.Throws(() => new ParquetFileReader(input, readerProperties)); + } + } + + [Test] + public static void TestInvalidColumnSpecified() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value", "InvalidColumn"}}, + }; + using var decryptionConfig = new DecryptionConfiguration(); + + using var buffer = new ResizableBuffer(); + using var output = new BufferOutputStream(buffer); + using var cryptoFactory = new CryptoFactory(_ => testClient); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + var exception = Assert.Throws(() => new ParquetFileWriter(output, Columns, writerProperties)); + Assert.That(exception!.Message, Does.Contain("InvalidColumn")); + } + + [Test] + public static void TestExternalKeyMaterial() + { + using var tmpDir = new TempWorkingDirectory(); + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + encryptionConfig.InternalKeyMaterial = false; + using var decryptionConfig = new DecryptionConfiguration(); + + TestEncryptionRoundtripWithFileSystem( + tmpDir.DirectoryPath, connectionConfig, encryptionConfig, decryptionConfig, testClient); + + var expectedMaterialPath = tmpDir.DirectoryPath + "/_KEY_MATERIAL_FOR_data.parquet.json"; + Assert.That(File.Exists(expectedMaterialPath)); + } + + [Test] + public static void TestWriteExternalKeyMaterialWithoutFilePath() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + encryptionConfig.InternalKeyMaterial = false; + using var decryptionConfig = new DecryptionConfiguration(); + + using var cryptoFactory = new CryptoFactory(_ => testClient); + + var exception = Assert.Throws( + () => cryptoFactory.GetFileEncryptionProperties(connectionConfig, encryptionConfig)); + Assert.That(exception!.Message, Does.Contain("Parquet file path must be specified")); + } + + [Test] + public static void TestReadExternalKeyMaterialWithoutFilePath() + { + var testClient = new TestKmsClient(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id", "Value"}}, + }; + encryptionConfig.InternalKeyMaterial = false; + using var decryptionConfig = new DecryptionConfiguration(); + + using var tmpDir = new TempWorkingDirectory(); + var filePath = tmpDir.DirectoryPath + "/data.parquet"; + + using var cryptoFactory = new CryptoFactory(_ => testClient); + + { + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig, filePath: filePath); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(filePath, Columns, writerProperties); + WriteParquetFile(fileWriter); + } + + { + using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties(connectionConfig, decryptionConfig); + using var readerProperties = CreateReaderProperties(fileDecryptionProperties); + var exception = Assert.Throws(() => new ParquetFileReader(filePath, readerProperties)); + Assert.That(exception!.Message, Does.Contain("Parquet file path must be specified")); + } + } + + [Test] + public static void TestKeyRotation([Values] bool doubleWrapping) + { + using var tmpDir = new TempWorkingDirectory(); + using var connectionConfig = new KmsConnectionConfig(); + using var encryptionConfig = new EncryptionConfiguration("Key0"); + encryptionConfig.ColumnKeys = new Dictionary> + { + {"Key1", new[] {"Id"}}, + {"Key2", new[] {"Value"}}, + }; + encryptionConfig.InternalKeyMaterial = false; + encryptionConfig.DoubleWrapping = doubleWrapping; + using var decryptionConfig = new DecryptionConfiguration(); + + var newKey0 = new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 99}; + var newKey1 = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 99}; + var newKey2 = new byte[] {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99}; + + // Encrypt with a client that only knows version 0 keys + var encryptionClient = new TestKmsClient(); + + // Rotate with a client that can decrypt version 0 and re-encrypt with version 1 + var rotationClient = new TestKmsClient(new Dictionary> + { + { + "Key0", new Dictionary + { + {0, TestKmsClient.DefaultMasterKeys["Key0"]}, + {1, newKey0}, + } + }, + { + "Key1", new Dictionary + { + {0, TestKmsClient.DefaultMasterKeys["Key1"]}, + {1, newKey1}, + } + }, + { + "Key2", new Dictionary + { + {0, TestKmsClient.DefaultMasterKeys["Key2"]}, + {1, newKey2}, + } + }, + }); + + // Use a client that only knows version 1 keys to test decryption + var decryptionClient = new TestKmsClient(new Dictionary> + { + { + "Key0", new Dictionary + { + {1, newKey0}, + } + }, + { + "Key1", new Dictionary + { + {1, newKey1}, + } + }, + { + "Key2", new Dictionary + { + {1, newKey2}, + } + }, + }); + + // And test with a client that pretends to know the latest version but they're actually the old version keys + var invalidClient = new TestKmsClient(new Dictionary> + { + { + "Key0", new Dictionary + { + {1, TestKmsClient.DefaultMasterKeys["Key0"]}, + } + }, + { + "Key1", new Dictionary + { + {1, TestKmsClient.DefaultMasterKeys["Key1"]}, + } + }, + { + "Key2", new Dictionary + { + {1, TestKmsClient.DefaultMasterKeys["Key2"]}, + } + }, + }); + + var filePath = tmpDir.DirectoryPath + "/data.parquet"; + + { + using var cryptoFactory = new CryptoFactory(_ => encryptionClient); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfig, filePath: filePath); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(filePath, Columns, writerProperties); + WriteParquetFile(fileWriter); + } + + { + using var cryptoFactory = new CryptoFactory(_ => rotationClient); + cryptoFactory.RotateMasterKeys( + connectionConfig, filePath, doubleWrapping); + } + + { + using var cryptoFactory = new CryptoFactory(_ => decryptionClient); + using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + connectionConfig, decryptionConfig, filePath: filePath); + using var readerProperties = CreateReaderProperties(fileDecryptionProperties); + using var fileReader = new ParquetFileReader(filePath, readerProperties); + ReadParquetFile(fileReader); + } + + { + using var cryptoFactory = new CryptoFactory(_ => invalidClient); + using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + connectionConfig, decryptionConfig, filePath: filePath); + using var readerProperties = CreateReaderProperties(fileDecryptionProperties); + Assert.Throws(() => new ParquetFileReader(filePath, readerProperties)); + } + } + + private static void TestEncryptionRoundtrip( + KmsConnectionConfig connectionConfig, + EncryptionConfiguration encryptionConfiguration, + DecryptionConfiguration decryptionConfiguration, + IKmsClient client, + Action? onGroupMetadata = null) + { + using var buffer = new ResizableBuffer(); + CryptoFactory.KmsClientFactory kmsClientFactory = _ => client; + + using (var output = new BufferOutputStream(buffer)) + { + using var cryptoFactory = new CryptoFactory(kmsClientFactory); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfiguration); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(output, Columns, writerProperties); + WriteParquetFile(fileWriter); + } + + using (var input = new BufferReader(buffer)) + { + using var cryptoFactory = new CryptoFactory(kmsClientFactory); + using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + connectionConfig, decryptionConfiguration); + using var readerProperties = CreateReaderProperties(fileDecryptionProperties); + using var fileReader = new ParquetFileReader(input, readerProperties); + ReadParquetFile(fileReader, onGroupMetadata); + } + } + + private static void TestEncryptionRoundtripWithFileSystem( + string workingDirectory, + KmsConnectionConfig connectionConfig, + EncryptionConfiguration encryptionConfiguration, + DecryptionConfiguration decryptionConfiguration, + IKmsClient client, + Action? onGroupMetadata = null) + { + var filePath = workingDirectory + "/data.parquet"; + CryptoFactory.KmsClientFactory kmsClientFactory = _ => client; + + { + using var cryptoFactory = new CryptoFactory(kmsClientFactory); + using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + connectionConfig, encryptionConfiguration, filePath: filePath); + using var writerProperties = CreateWriterProperties(fileEncryptionProperties); + using var fileWriter = new ParquetFileWriter(filePath, Columns, writerProperties); + WriteParquetFile(fileWriter); + } + + { + using var cryptoFactory = new CryptoFactory(kmsClientFactory); + using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + connectionConfig, decryptionConfiguration, filePath: filePath); + using var readerProperties = CreateReaderProperties(fileDecryptionProperties); + using var fileReader = new ParquetFileReader(filePath, readerProperties); + ReadParquetFile(fileReader, onGroupMetadata); + } + } + + private static void WriteParquetFile(ParquetFileWriter fileWriter) + { + using var groupWriter = fileWriter.AppendRowGroup(); + + using (var idWriter = groupWriter.NextColumn().LogicalWriter()) + { + idWriter.WriteBatch(Ids); + } + + using (var valueWriter = groupWriter.NextColumn().LogicalWriter()) + { + valueWriter.WriteBatch(Values); + } + } + + private static void ReadParquetFile( + ParquetFileReader fileReader, Action? onGroupMetadata = null) + { + using var groupReader = fileReader.RowGroup(0); + + var metaData = groupReader.MetaData; + var numRows = (int) metaData.NumRows; + + onGroupMetadata?.Invoke(metaData); + + using (var idReader = groupReader.Column(0).LogicalReader()) + { + Assert.AreEqual(Ids, idReader.ReadAll(numRows)); + } + + using (var valueReader = groupReader.Column(1).LogicalReader()) + { + Assert.AreEqual(Values, valueReader.ReadAll(numRows)); + } + } + + private static WriterProperties CreateWriterProperties(FileEncryptionProperties? fileEncryptionProperties) + { + using var builder = new WriterPropertiesBuilder(); + + return builder + .Compression(Compression.Snappy) + .Encryption(fileEncryptionProperties) + .Build(); + } + + private static ReaderProperties CreateReaderProperties(FileDecryptionProperties? fileDecryptionProperties) + { + var readerProperties = ReaderProperties.GetDefaultReaderProperties(); + readerProperties.FileDecryptionProperties = fileDecryptionProperties; + return readerProperties; + } + + private static readonly Column[] Columns = + { + new Column("Id"), + new Column("Value") + }; + + private static readonly int[] Ids = {1, 2, 3, 5, 7, 8, 13}; + private static readonly float[] Values = {3.14f, 1.27f, 42.0f, 10.6f, 9.81f, 2.71f, -1f}; + } +} diff --git a/csharp.test/Encryption/TestKmsClient.cs b/csharp.test/Encryption/TestKmsClient.cs new file mode 100644 index 00000000..b3810c11 --- /dev/null +++ b/csharp.test/Encryption/TestKmsClient.cs @@ -0,0 +1,107 @@ +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Security.Cryptography; +using ParquetSharp.Encryption; + +namespace ParquetSharp.Test.Encryption +{ + /// + /// Test KMS client with hard-coded master keys. + /// Supports key-versioning to allow testing key rotation. + /// + internal sealed class TestKmsClient : IKmsClient + { + public TestKmsClient() : this(DefaultMasterKeys) + { + } + + public TestKmsClient(IReadOnlyDictionary masterKeys) : this(ToVersionedKeys(masterKeys)) + { + } + + public TestKmsClient(IReadOnlyDictionary> masterKeys) + { + _masterKeys = masterKeys; + } + + public string WrapKey(byte[] keyBytes, string masterKeyIdentifier) + { + WrappedKeys.Add(keyBytes); + var masterKeys = _masterKeys[masterKeyIdentifier]; + var keyVersion = masterKeys.Keys.Max(); + var masterKey = masterKeys[keyVersion]; + using var aes = Aes.Create(); + aes.Key = masterKey; + using var encryptor = aes.CreateEncryptor(aes.Key, aes.IV); + var encrypted = EncryptBytes(encryptor, keyBytes); + return $"{keyVersion}:{System.Convert.ToBase64String(aes.IV)}:{System.Convert.ToBase64String(encrypted)}"; + } + + public byte[] UnwrapKey(string wrappedKey, string masterKeyIdentifier) + { + UnwrappedKeys.Add(wrappedKey); + var split = wrappedKey.Split(':'); + var keyVersion = int.Parse(split[0]); + var iv = System.Convert.FromBase64String(split[1]); + var encryptedKey = System.Convert.FromBase64String(split[2]); + var masterKey = _masterKeys[masterKeyIdentifier][keyVersion]; + using var aes = Aes.Create(); + aes.Key = masterKey; + aes.IV = iv; + using var decryptor = aes.CreateDecryptor(aes.Key, aes.IV); + return DecryptBytes(decryptor, encryptedKey); + } + + public readonly List WrappedKeys = new(); + + public readonly List UnwrappedKeys = new(); + + public static readonly Dictionary DefaultMasterKeys = new() + { + {"Key0", new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}, + {"Key1", new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}}, + {"Key2", new byte[] {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}}, + }; + + private static byte[] EncryptBytes(ICryptoTransform encryptor, byte[] plainText) + { + using var memoryStream = new MemoryStream(); + using (var cryptoStream = new CryptoStream(memoryStream, encryptor, CryptoStreamMode.Write)) + { + cryptoStream.Write(plainText, 0, plainText.Length); + } + + return memoryStream.ToArray(); + } + + private static byte[] DecryptBytes(ICryptoTransform decryptor, byte[] cipherText) + { + using var memoryStream = new MemoryStream(cipherText); + using var cryptoStream = new CryptoStream(memoryStream, decryptor, CryptoStreamMode.Read); + var buffer = new byte[16]; + var offset = 0; + while (true) + { + var read = cryptoStream.Read(buffer, offset, buffer.Length - offset); + if (read == 0) + { + break; + } + + offset += read; + } + + return buffer.Take(offset).ToArray(); + } + + private static IReadOnlyDictionary> ToVersionedKeys(IReadOnlyDictionary masterKeys) + { + return masterKeys.ToDictionary( + kvp => kvp.Key, + kvp => (IReadOnlyDictionary) new Dictionary {{0, kvp.Value}}); + } + + private readonly IReadOnlyDictionary> _masterKeys; + } +} diff --git a/csharp.test/Encryption/TestKmsConnectionConfig.cs b/csharp.test/Encryption/TestKmsConnectionConfig.cs new file mode 100644 index 00000000..01c24aff --- /dev/null +++ b/csharp.test/Encryption/TestKmsConnectionConfig.cs @@ -0,0 +1,54 @@ +using System.Collections.Generic; +using NUnit.Framework; +using ParquetSharp.Encryption; + +namespace ParquetSharp.Test.Encryption +{ + [TestFixture] + internal static class TestKmsConnectionConfig + { + [Test] + public static void TestEmptyConfig() + { + using var config = new KmsConnectionConfig(); + + Assert.That(config.KmsInstanceId, Is.Empty); + Assert.That(config.KmsInstanceUrl, Is.Empty); + Assert.That(config.KeyAccessToken, Is.EqualTo("DEFAULT")); + Assert.That(config.CustomKmsConf, Is.Empty); + } + + [Test] + public static void TestCreateConfig() + { + using var config = new KmsConnectionConfig(); + config.KmsInstanceId = "kms_id"; + config.KmsInstanceUrl = "https://example.com"; + config.KeyAccessToken = "12345"; + config.CustomKmsConf = new Dictionary + { + {"abc", "def"}, + {"ghi", "jkl"}, + }; + + Assert.That(config.KmsInstanceId, Is.EqualTo("kms_id")); + Assert.That(config.KmsInstanceUrl, Is.EqualTo("https://example.com")); + Assert.That(config.KeyAccessToken, Is.EqualTo("12345")); + var customConf = config.CustomKmsConf; + Assert.That(customConf.Count, Is.EqualTo(2)); + Assert.That(customConf["abc"], Is.EqualTo("def")); + Assert.That(customConf["ghi"], Is.EqualTo("jkl")); + } + + [Test] + public static void TestRefreshAccessToken() + { + using var config = new KmsConnectionConfig(); + config.KeyAccessToken = "12345"; + + config.RefreshKeyAccessToken("67890"); + + Assert.That(config.KeyAccessToken, Is.EqualTo("67890")); + } + } +} diff --git a/csharp.test/TempWorkingDirectory.cs b/csharp.test/TempWorkingDirectory.cs new file mode 100644 index 00000000..41adcf07 --- /dev/null +++ b/csharp.test/TempWorkingDirectory.cs @@ -0,0 +1,27 @@ +using System; +using System.IO; + +namespace ParquetSharp.Test +{ + internal sealed class TempWorkingDirectory : IDisposable + { + public TempWorkingDirectory() + { + _originalWorkingDirectory = Directory.GetCurrentDirectory(); + _directoryPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); + Directory.CreateDirectory(_directoryPath); + Directory.SetCurrentDirectory(_directoryPath); + } + + public void Dispose() + { + Directory.SetCurrentDirectory(_originalWorkingDirectory); + Directory.Delete(_directoryPath, recursive: true); + } + + public string DirectoryPath => _directoryPath; + + private readonly string _directoryPath; + private readonly string _originalWorkingDirectory; + } +} diff --git a/csharp.test/TestBuffer.cs b/csharp.test/TestBuffer.cs index 8a0aec5f..72bf9d97 100644 --- a/csharp.test/TestBuffer.cs +++ b/csharp.test/TestBuffer.cs @@ -1,5 +1,6 @@ using System; using System.Linq; +using System.Runtime.InteropServices; using ParquetSharp.IO; using NUnit.Framework; @@ -84,5 +85,18 @@ public static void TestBufferOutputStreamFinish() var allData = columnReader.ReadAll((int) rowGroup.MetaData.NumRows); Assert.AreEqual(expected, allData); } + + [Test] + public static void TestResizeBuffer() + { + using var buffer = new ResizableBuffer(initialSize: 128); + const int newLength = 256; + buffer.Resize(newLength); + var values = Enumerable.Range(0, newLength).Select(i => (byte) i).ToArray(); + Marshal.Copy(values, 0, buffer.MutableData, newLength); + var readValues = new byte[newLength]; + Marshal.Copy(buffer.Data, readValues, 0, newLength); + Assert.That(readValues, Is.EqualTo(values)); + } } } diff --git a/csharp.test/TestEncryption.cs b/csharp.test/TestEncryption.cs index 59343faa..0cc076ea 100644 --- a/csharp.test/TestEncryption.cs +++ b/csharp.test/TestEncryption.cs @@ -5,6 +5,9 @@ namespace ParquetSharp.Test { + /// + /// These test the low level encryption API, where AES keys are provided directly + /// [TestFixture] internal static class TestEncryption { @@ -62,6 +65,29 @@ public static void TestEncryptAllSameKey() [Test] public static void TestEncryptAllSeparateKeys() + { + // Case where the footer and all columns are encrypted all with different keys. + AssertEncryptionRoundtrip(CreateEncryptAllSeparateKeysProperties, CreateDecryptWithSeparateKeyProperties, rowGroupMetadata => + { + using var colMetadata0 = rowGroupMetadata.GetColumnChunkMetaData(0); + using var colMetadata1 = rowGroupMetadata.GetColumnChunkMetaData(1); + using var crypto0 = colMetadata0.CryptoMetadata; + using var crypto1 = colMetadata1.CryptoMetadata; + + using var path0 = crypto0?.ColumnPath; + Assert.AreEqual("Id", path0?.ToDotString()); + Assert.AreEqual(false, crypto0?.EncryptedWithFooterKey); + Assert.AreEqual("Key1", crypto0?.KeyMetadata); + + using var path1 = crypto1?.ColumnPath; + Assert.AreEqual("Value", path1?.ToDotString()); + Assert.AreEqual(false, crypto1?.EncryptedWithFooterKey); + Assert.AreEqual("Key2", crypto1?.KeyMetadata); + }); + } + + [Test] + public static void TestEncryptAllSeparateKeysWithKeyRetriever() { // Case where the footer and all columns are encrypted all with different keys. AssertEncryptionRoundtrip(CreateEncryptAllSeparateKeysProperties, CreateDecryptWithKeyRetrieverProperties, rowGroupMetadata => @@ -109,7 +135,7 @@ public static void TestEncryptJustColumns() [Test] public static void TestEncryptJustOneColumn() { - // Case where the footer is unencrypted and all columns are encrypted all with different keys. + // Case where the footer is unencrypted and only a single column is encrypted using var buffer = new ResizableBuffer(); using (var output = new BufferOutputStream(buffer)) @@ -153,6 +179,29 @@ public static void TestEncryptJustOneColumn() } } + [Test] + public static void TestVerifyAadPrefix([Values] bool useVerifier) + { + FileDecryptionProperties? GetDecryptionProperties() => + useVerifier ? CreateDecryptWithAadPrefixVerifierProperties() : CreateDecryptWithAadPrefixProperties(); + + AssertEncryptionRoundtrip(CreateEncryptWithAadPrefixProperties, GetDecryptionProperties); + } + + [Test] + public static void TestVerifyInvalidAadPrefix([Values] bool useVerifier) + { + FileDecryptionProperties? GetDecryptionProperties() => + useVerifier ? CreateDecryptWithAadPrefixVerifierProperties() : CreateDecryptWithAadPrefixProperties(); + + var exception = Assert.Throws(() => AssertEncryptionRoundtrip( + CreateEncryptWithDifferentAadPrefixProperties, GetDecryptionProperties)); + var expectedMessage = useVerifier + ? "Got unexpected AAD prefix: unexpected-prefix" + : "AAD Prefix in file and in properties is not the same"; + Assert.That(exception?.Message, Contains.Substring(expectedMessage)); + } + // Encrypt Properties private static FileEncryptionProperties CreateEncryptSameKeyProperties() @@ -224,6 +273,26 @@ private static FileEncryptionProperties CreateEncryptJustOneColumnProperties() .Build(); } + private static FileEncryptionProperties CreateEncryptWithAadPrefixProperties() + { + using var builder = new FileEncryptionPropertiesBuilder(Key0); + + return builder + .FooterKeyMetadata("Key0") + .AadPrefix("expected-prefix") + .Build(); + } + + private static FileEncryptionProperties CreateEncryptWithDifferentAadPrefixProperties() + { + using var builder = new FileEncryptionPropertiesBuilder(Key0); + + return builder + .FooterKeyMetadata("Key0") + .AadPrefix("unexpected-prefix") + .Build(); + } + // Decrypt Properties private static FileDecryptionProperties CreateDecryptAllSameKeyProperties() @@ -235,6 +304,22 @@ private static FileDecryptionProperties CreateDecryptAllSameKeyProperties() .Build(); } + private static FileDecryptionProperties CreateDecryptWithSeparateKeyProperties() + { + using var builder = new FileDecryptionPropertiesBuilder(); + + using var col0Builder = new ColumnDecryptionPropertiesBuilder("Id"); + using var col0Properties = col0Builder.Key(Key1).Build(); + + using var col1Builder = new ColumnDecryptionPropertiesBuilder("Value"); + using var col1Properties = col1Builder.Key(Key2).Build(); + + return builder + .FooterKey(Key0) + .ColumnKeys(new[] {col0Properties, col1Properties}) + .Build(); + } + private static FileDecryptionProperties CreateDecryptWithKeyRetrieverProperties() { using var builder = new FileDecryptionPropertiesBuilder(); @@ -244,6 +329,26 @@ private static FileDecryptionProperties CreateDecryptWithKeyRetrieverProperties( .Build(); } + private static FileDecryptionProperties CreateDecryptWithAadPrefixProperties() + { + using var builder = new FileDecryptionPropertiesBuilder(); + + return builder + .KeyRetriever(new TestRetriever()) + .AadPrefix("expected-prefix") + .Build(); + } + + private static FileDecryptionProperties CreateDecryptWithAadPrefixVerifierProperties() + { + using var builder = new FileDecryptionPropertiesBuilder(); + + return builder + .KeyRetriever(new TestRetriever()) + .AadPrefixVerifier(new TestAadVerifier()) + .Build(); + } + private static void AssertEncryptionRoundtrip( Func createFileEncryptionProperties, Func createFileDecryptionProperties, @@ -334,17 +439,28 @@ public override byte[] GetKey(string keyMetadata) } } + private sealed class TestAadVerifier : AadPrefixVerifier + { + public override void Verify(string aadPrefix) + { + if (aadPrefix != "expected-prefix") + { + throw new Exception($"Got unexpected AAD prefix: {aadPrefix}"); + } + } + } + private static readonly byte[] Key0 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; private static readonly byte[] Key1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; private static readonly byte[] Key2 = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - public static readonly Column[] Columns = + private static readonly Column[] Columns = { new Column("Id"), new Column("Value") }; - public static readonly int[] Ids = {1, 2, 3, 5, 7, 8, 13}; - public static readonly float[] Values = {3.14f, 1.27f, 42.0f, 10.6f, 9.81f, 2.71f, -1f}; + private static readonly int[] Ids = {1, 2, 3, 5, 7, 8, 13}; + private static readonly float[] Values = {3.14f, 1.27f, 42.0f, 10.6f, 9.81f, 2.71f, -1f}; } } diff --git a/csharp.test/TestParquetFileWriter.cs b/csharp.test/TestParquetFileWriter.cs index df116640..4058765f 100644 --- a/csharp.test/TestParquetFileWriter.cs +++ b/csharp.test/TestParquetFileWriter.cs @@ -384,26 +384,4 @@ void WriteFile() Task.WaitAll(running); } } - - internal sealed class TempWorkingDirectory : IDisposable - { - public TempWorkingDirectory() - { - _originalWorkingDirectory = Directory.GetCurrentDirectory(); - _directoryPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); - Directory.CreateDirectory(_directoryPath); - Directory.SetCurrentDirectory(_directoryPath); - } - - public void Dispose() - { - Directory.SetCurrentDirectory(_originalWorkingDirectory); - Directory.Delete(_directoryPath, recursive: true); - } - - public string DirectoryPath => _directoryPath; - - private readonly string _directoryPath; - private readonly string _originalWorkingDirectory; - } } diff --git a/csharp/Encryption/CryptoFactory.cs b/csharp/Encryption/CryptoFactory.cs new file mode 100644 index 00000000..784bb392 --- /dev/null +++ b/csharp/Encryption/CryptoFactory.cs @@ -0,0 +1,234 @@ +using System; +using System.Runtime.InteropServices; +using ParquetSharp.IO; + +namespace ParquetSharp.Encryption +{ + /// + /// Translates high-level encryption configuration into low-level encryption parameters + /// + public sealed class CryptoFactory : IDisposable + { + public delegate IKmsClient KmsClientFactory(ReadonlyKmsConnectionConfig config); + + /// + /// Create a new CryptoFactory + /// + /// Creates KMS clients from a connection configuration + public unsafe CryptoFactory(KmsClientFactory kmsClientFactory) + { + var handle = ExceptionInfo.Return(CryptoFactory_Create); + _handle = new ParquetHandle(handle, CryptoFactory_Free); + + ExceptionInfo.Check(CryptoFactory_RegisterKmsClientFactory( + _handle.IntPtr, + CreateClientFactoryGcHandle(kmsClientFactory), + FreeGcHandleCallback, + CreateClientCallback, + WrapKeyCallback, + UnwrapKeyCallback)); + } + + /// + /// Get the encryption properties for a Parquet file. + /// If external key material is used then the path to the Parquet file must be provided. + /// + /// The KMS connection configuration to use + /// The encryption configuration to use + /// The path to the Parquet file being written + /// Encryption properties for the file + public FileEncryptionProperties GetFileEncryptionProperties( + KmsConnectionConfig connectionConfig, + EncryptionConfiguration encryptionConfig, + string? filePath = null) + { + var fileEncryptionPropertiesHandle = ExceptionInfo.Return( + _handle.IntPtr, connectionConfig.Handle.IntPtr, encryptionConfig.Handle.IntPtr, filePath, CryptoFactory_GetFileEncryptionProperties); + return new FileEncryptionProperties(fileEncryptionPropertiesHandle); + } + + + /// + /// Get decryption properties for a Parquet file. + /// If external key material is used then the path to the parquet file must be provided. + /// This CryptoFactory instance must remain alive and not disposed until after any files using these + /// decryption properties have been read, as internally the FileDecryptionProperties contains references to + /// data in the CryptoFactory that cannot be managed by ParquetSharp. + /// Failure to do so may result in native memory access violations and crashes that cannot be caught as exceptions. + /// + /// The KMS connection configuration to use + /// The decryption configuration to use + /// The path to the Parquet file being read + /// Decryption properties for the file + public FileDecryptionProperties GetFileDecryptionProperties( + KmsConnectionConfig connectionConfig, + DecryptionConfiguration decryptionConfig, + string? filePath = null) + { + var fileDecryptionPropertiesHandle = ExceptionInfo.Return( + _handle.IntPtr, connectionConfig.Handle.IntPtr, decryptionConfig.Handle.IntPtr, filePath, CryptoFactory_GetFileDecryptionProperties); + return new FileDecryptionProperties(fileDecryptionPropertiesHandle); + } + + /// + /// Rotates master encryption keys for a Parquet file that uses external key material. + /// In single wrapping mode, data encryption keys are decrypted with the old master keys + /// and then re-encrypted with new master keys. + /// In double wrapping mode, key encryption keys are decrypted with the old master keys + /// and then re-encrypted with new master keys. + /// This relies on the KMS supporting versioning, such that the old master key is + /// used when unwrapping a key, and the latest version is used when wrapping a key. + /// + /// The KMS connection configuration to use + /// Path to the encrypted Parquet file + /// Whether to use double wrapping when rotating + /// Lifetime of cached objects in seconds + public void RotateMasterKeys( + KmsConnectionConfig connectionConfig, + string parquetFilePath, + bool doubleWrapping, + double cacheLifetimeSeconds = 600) + { + ExceptionInfo.Check(CryptoFactory_RotateMasterKeys( + _handle.IntPtr, connectionConfig.Handle.IntPtr, parquetFilePath, doubleWrapping, cacheLifetimeSeconds)); + } + + public void Dispose() + { + _handle.Dispose(); + } + + private static IntPtr CreateClientFactoryGcHandle(KmsClientFactory kmsClientFactory) + { + var gcHandle = GCHandle.Alloc(kmsClientFactory, GCHandleType.Normal); + return GCHandle.ToIntPtr(gcHandle); + } + + private static IKmsClient GetKmsClientFromHandle(IntPtr handle) + { + return (IKmsClient) GCHandle.FromIntPtr(handle).Target!; + } + + private static void FreeGcHandle(IntPtr handle) + { + GCHandle.FromIntPtr(handle).Free(); + } + + private static void CreateKmsClient(IntPtr clientFactoryGcHandle, IntPtr connectionConfigHandle, out IntPtr clientHandlePtr, out string? exception) + { + exception = null; + clientHandlePtr = IntPtr.Zero; + + try + { + var clientFactory = (KmsClientFactory) GCHandle.FromIntPtr(clientFactoryGcHandle).Target!; + var connectionConfig = KmsConnectionConfig.FromConstPointer(connectionConfigHandle); + var client = clientFactory(connectionConfig); + var clientHandle = GCHandle.Alloc(client, GCHandleType.Normal); + clientHandlePtr = GCHandle.ToIntPtr(clientHandle); + } + catch (Exception ex) + { + exception = ex.ToString(); + } + } + + private static unsafe void WrapKey( + IntPtr handle, byte* keyBytes, int keyBytesLength, string masterKeyIdentifier, out string wrappedKey, out string? exception) + { + exception = null; + wrappedKey = ""; + + try + { + var kmsClient = GetKmsClientFromHandle(handle); + var keyBytesArray = new byte[keyBytesLength]; + Marshal.Copy(new IntPtr(keyBytes), keyBytesArray, 0, keyBytesLength); + + wrappedKey = kmsClient.WrapKey(keyBytesArray, masterKeyIdentifier); + } + catch (Exception ex) + { + exception = ex.ToString(); + } + } + + private static void UnwrapKey( + IntPtr handle, string wrappedKey, string masterKeyIdentifier, IntPtr unwrappedKeyBufferPtr, out string? exception) + { + exception = null; + + try + { + var kmsClient = GetKmsClientFromHandle(handle); + + var unwrapped = kmsClient.UnwrapKey(wrappedKey, masterKeyIdentifier); + + // Copy unwrapped bytes into the buffer provided. + // We don't free the buffer when disposing, it is owned by the C++ side + using var unwrappedKeyBuffer = ResizableBuffer.FromNonOwnedPtr(unwrappedKeyBufferPtr); + unwrappedKeyBuffer.Resize(unwrapped.Length); + Marshal.Copy(unwrapped, 0, unwrappedKeyBuffer.MutableData, unwrapped.Length); + } + catch (Exception ex) + { + exception = ex.ToString(); + } + } + + private delegate void FreeGcHandleFunc(IntPtr handle); + + private delegate void CreateClientFunc( + IntPtr factoryHandle, + IntPtr kmsConnectionConfig, + out IntPtr clientHandle, + [MarshalAs(UnmanagedType.LPStr)] out string? exception); + + private unsafe delegate void WrapKeyFunc( + IntPtr handle, + byte* keyBytes, + int keyBytesLength, + [MarshalAs(UnmanagedType.LPUTF8Str)] string masterKeyIdentifier, + [MarshalAs(UnmanagedType.LPUTF8Str)] out string wrappedKey, + [MarshalAs(UnmanagedType.LPStr)] out string? exception); + + private delegate void UnwrapKeyFunc( + IntPtr handle, + [MarshalAs(UnmanagedType.LPUTF8Str)] string wrappedKey, + [MarshalAs(UnmanagedType.LPUTF8Str)] string masterKeyIdentifier, + IntPtr unwrappedKeyBuffer, + [MarshalAs(UnmanagedType.LPStr)] out string? exception); + + private static readonly FreeGcHandleFunc FreeGcHandleCallback = FreeGcHandle; + private static readonly CreateClientFunc CreateClientCallback = CreateKmsClient; + private static readonly unsafe WrapKeyFunc WrapKeyCallback = WrapKey; + private static readonly UnwrapKeyFunc UnwrapKeyCallback = UnwrapKey; + + [DllImport(ParquetDll.Name)] + private static extern IntPtr CryptoFactory_Create(out IntPtr cryptoFactory); + + [DllImport(ParquetDll.Name)] + private static extern void CryptoFactory_Free(IntPtr cryptoFactory); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr CryptoFactory_RegisterKmsClientFactory( + IntPtr cryptoFactory, IntPtr clientFactory, FreeGcHandleFunc freeGcHandle, CreateClientFunc createClient, WrapKeyFunc wrapKey, UnwrapKeyFunc unwrapKey); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr CryptoFactory_GetFileEncryptionProperties( + IntPtr cryptoFactory, IntPtr kmsConnectionConfig, IntPtr encryptionConfig, + [MarshalAs(UnmanagedType.LPUTF8Str)] string? filePath, out IntPtr fileEncryptionProperties); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr CryptoFactory_GetFileDecryptionProperties( + IntPtr cryptoFactory, IntPtr kmsConnectionConfig, IntPtr decryptionConfig, + [MarshalAs(UnmanagedType.LPUTF8Str)] string? filePath, out IntPtr fileDecryptionProperties); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr CryptoFactory_RotateMasterKeys( + IntPtr cryptoFactory, IntPtr kmsConnectionConfig, [MarshalAs(UnmanagedType.LPUTF8Str)] string parquetFilePath, + [MarshalAs(UnmanagedType.I1)] bool doubleWrapping, double cacheLifetimeSeconds); + + private readonly ParquetHandle _handle; + } +} diff --git a/csharp/Encryption/DecryptionConfiguration.cs b/csharp/Encryption/DecryptionConfiguration.cs new file mode 100644 index 00000000..40dfe41b --- /dev/null +++ b/csharp/Encryption/DecryptionConfiguration.cs @@ -0,0 +1,48 @@ +using System; +using System.Runtime.InteropServices; + +namespace ParquetSharp.Encryption +{ + /// + /// Configures how data should be decrypted when reading a ParquetFile with a KMS + /// + public sealed class DecryptionConfiguration : IDisposable + { + /// + /// Create a new DecryptionConfiguration + /// + public DecryptionConfiguration() + { + var handle = ExceptionInfo.Return(DecryptionConfiguration_Create); + Handle = new ParquetHandle(handle, DecryptionConfiguration_Free); + } + + /// + /// Lifetime of cached entities (key encryption keys, local wrapping keys, KMS client objects) in seconds. + /// + public double CacheLifetimeSeconds + { + get => ExceptionInfo.Return(Handle, DecryptionConfiguration_GetCacheLifetimeSeconds); + set => ExceptionInfo.Check(DecryptionConfiguration_SetCacheLifetimeSeconds(Handle.IntPtr, value)); + } + + public void Dispose() + { + Handle.Dispose(); + } + + [DllImport(ParquetDll.Name)] + private static extern IntPtr DecryptionConfiguration_Create(out IntPtr decryptionConfiguration); + + [DllImport(ParquetDll.Name)] + private static extern void DecryptionConfiguration_Free(IntPtr decryptionConfiguration); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr DecryptionConfiguration_GetCacheLifetimeSeconds(IntPtr decryptionConfiguration, out double lifetime); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr DecryptionConfiguration_SetCacheLifetimeSeconds(IntPtr decryptionConfiguration, double lifetime); + + internal readonly ParquetHandle Handle; + } +} diff --git a/csharp/Encryption/EncryptionConfiguration.cs b/csharp/Encryption/EncryptionConfiguration.cs new file mode 100644 index 00000000..6e09912f --- /dev/null +++ b/csharp/Encryption/EncryptionConfiguration.cs @@ -0,0 +1,204 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; + +namespace ParquetSharp.Encryption +{ + /// + /// Configures how data should be encrypted when writing a ParquetFile with a KMS + /// + public sealed class EncryptionConfiguration : IDisposable + { + /// + /// Create a new EncryptionConfiguration + /// + /// ID of the master key for footer encryption and signing + public EncryptionConfiguration(string footerKey) + { + var handle = ExceptionInfo.Return(footerKey, EncryptionConfiguration_Create); + Handle = new ParquetHandle(handle, EncryptionConfiguration_Free); + } + + /// + /// ID of the master key for footer encryption and signing + /// + public string FooterKey + { + get => ExceptionInfo.ReturnString(Handle, EncryptionConfiguration_GetFooterKey); + set => ExceptionInfo.Check(EncryptionConfiguration_SetFooterKey(Handle.IntPtr, value)); + } + + /// + /// Map from master key IDs to the names of columns encrypted with this key + /// + public IReadOnlyDictionary> ColumnKeys + { + get => ParseColumnKeys(ExceptionInfo.ReturnString(Handle, EncryptionConfiguration_GetColumnKeys)); + set => ExceptionInfo.Check(EncryptionConfiguration_SetColumnKeys(Handle.IntPtr, EncodeColumnKeys(value))); + } + + /// + /// Whether the footer and columns are all encrypted with the same key + /// + public bool UniformEncryption + { + get => ExceptionInfo.Return(Handle, EncryptionConfiguration_GetUniformEncryption); + set => ExceptionInfo.Check(EncryptionConfiguration_SetUniformEncryption(Handle.IntPtr, value)); + } + + /// + /// The encryption algorithm to use + /// + public ParquetCipher EncryptionAlgorithm + { + get => ExceptionInfo.Return(Handle, EncryptionConfiguration_GetEncryptionAlgorithm); + set => ExceptionInfo.Check(EncryptionConfiguration_SetEncryptionAlgorithm(Handle.IntPtr, value)); + } + + /// + /// Whether the footer should be stored unencrypted + /// + public bool PlaintextFooter + { + get => ExceptionInfo.Return(Handle, EncryptionConfiguration_GetPlaintextFooter); + set => ExceptionInfo.Check(EncryptionConfiguration_SetPlaintextFooter(Handle.IntPtr, value)); + } + + /// + /// Whether double wrapping should be used, where data encryption keys (DEKs) are encrypted + /// with key encryption keys (KEKs), which in turn are encrypted with master keys. + /// If false, single wrapping is used, where data encryption keys are encrypted directly + /// with master keys. + /// + public bool DoubleWrapping + { + get => ExceptionInfo.Return(Handle, EncryptionConfiguration_GetDoubleWrapping); + set => ExceptionInfo.Check(EncryptionConfiguration_SetDoubleWrapping(Handle.IntPtr, value)); + } + + /// + /// Lifetime of cached entities (key encryption keys, local wrapping keys, KMS client objects) in seconds. + /// + public double CacheLifetimeSeconds + { + get => ExceptionInfo.Return(Handle, EncryptionConfiguration_GetCacheLifetimeSeconds); + set => ExceptionInfo.Check(EncryptionConfiguration_SetCacheLifetimeSeconds(Handle.IntPtr, value)); + } + + /// + /// Store key material inside Parquet file footers; this mode doesn’t produce + /// additional files. By default, true. If set to false, key material is stored in + /// separate files in the same folder, which enables key rotation for immutable + /// Parquet files. + /// + public bool InternalKeyMaterial + { + get => ExceptionInfo.Return(Handle, EncryptionConfiguration_GetInternalKeyMaterial); + set => ExceptionInfo.Check(EncryptionConfiguration_SetInternalKeyMaterial(Handle.IntPtr, value)); + } + + /// + /// Length of data encryption keys (DEKs), randomly generated by parquet key + /// management tools. Can be 128, 192 or 256 bits. + /// The default is 128 bits. + /// + public int DataKeyLengthBits + { + get => ExceptionInfo.Return(Handle, EncryptionConfiguration_GetDataKeyLengthBits); + set => ExceptionInfo.Check(EncryptionConfiguration_SetDataKeyLengthBits(Handle.IntPtr, value)); + } + + private static string EncodeColumnKeys(IReadOnlyDictionary> columnKeys) + { + return string.Join(";", columnKeys.Select(kvp => $"{kvp.Key}:{string.Join(",", kvp.Value)}")); + } + + private static IReadOnlyDictionary> ParseColumnKeys(string columnKeys) + { + var keyConfigs = columnKeys.Split(';'); + var columnKeysMap = new Dictionary>(); + foreach (var keyToColumns in keyConfigs) + { + var mapping = keyToColumns.Split(':'); + if (mapping.Length != 2) + { + throw new Exception($"Invalid column keys format: '{columnKeys}'"); + } + + var masterKeyId = mapping[0].Trim(); + var columns = mapping[1].Split(',').Select(col => col.Trim()).ToArray(); + columnKeysMap[masterKeyId] = columns; + } + return columnKeysMap; + } + + public void Dispose() + { + Handle.Dispose(); + } + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_Create(string footerKey, out IntPtr encryptionConfiguration); + + [DllImport(ParquetDll.Name)] + private static extern void EncryptionConfiguration_Free(IntPtr encryptionConfiguration); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetFooterKey(IntPtr encryptionConfiguration, out IntPtr footerKey); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetFooterKey(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.LPUTF8Str)] string footerKey); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetColumnKeys(IntPtr encryptionConfiguration, out IntPtr columnKeys); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetColumnKeys(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.LPUTF8Str)] string columnKeys); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetUniformEncryption(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] out bool uniformEncryption); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetUniformEncryption(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] bool uniformEncryption); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetEncryptionAlgorithm(IntPtr encryptionConfiguration, out ParquetCipher cipher); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetEncryptionAlgorithm(IntPtr encryptionConfiguration, ParquetCipher cipher); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetPlaintextFooter(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] out bool plaintextFooter); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetPlaintextFooter(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] bool plaintextFooter); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetDoubleWrapping(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] out bool doubleWrapping); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetDoubleWrapping(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] bool doubleWrapping); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetCacheLifetimeSeconds(IntPtr encryptionConfiguration, out double lifetime); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetCacheLifetimeSeconds(IntPtr encryptionConfiguration, double lifetime); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetInternalKeyMaterial(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] out bool internalKeyMaterial); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetInternalKeyMaterial(IntPtr encryptionConfiguration, [MarshalAs(UnmanagedType.I1)] bool internalKeyMaterial); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_GetDataKeyLengthBits(IntPtr encryptionConfiguration, out int keyLength); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr EncryptionConfiguration_SetDataKeyLengthBits(IntPtr encryptionConfiguration, int keyLength); + + internal readonly ParquetHandle Handle; + } +} diff --git a/csharp/Encryption/IKmsClient.cs b/csharp/Encryption/IKmsClient.cs new file mode 100644 index 00000000..fe1268c1 --- /dev/null +++ b/csharp/Encryption/IKmsClient.cs @@ -0,0 +1,18 @@ +namespace ParquetSharp.Encryption +{ + /// + /// Interface for Key Management System (KMS) client implementations + /// + public interface IKmsClient + { + /// + /// Wrap a key - encrypt it with the master key + /// + public string WrapKey(byte[] keyBytes, string masterKeyIdentifier); + + /// + /// Unwrap a key - decrypt it with the master key + /// + public byte[] UnwrapKey(string wrappedKey, string masterKeyIdentifier); + } +} diff --git a/csharp/Encryption/KmsConnectionConfig.cs b/csharp/Encryption/KmsConnectionConfig.cs new file mode 100644 index 00000000..0fa2e20e --- /dev/null +++ b/csharp/Encryption/KmsConnectionConfig.cs @@ -0,0 +1,128 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace ParquetSharp.Encryption +{ + /// + /// Configures how to connect to a Key Management System (KMS) + /// + public class KmsConnectionConfig : IDisposable + { + public KmsConnectionConfig() + { + var handle = ExceptionInfo.Return(KmsConnectionConfig_Create); + Handle = new ParquetHandle(handle, KmsConnectionConfig_Free); + } + + private KmsConnectionConfig(ParquetHandle handle) + { + Handle = handle; + } + + internal static ReadonlyKmsConnectionConfig FromConstPointer(IntPtr ptr) + { + var handle = new ParquetHandle(ptr, _ => { }); + using var config = new KmsConnectionConfig(handle); + return new ReadonlyKmsConnectionConfig( + config.KmsInstanceId, config.KmsInstanceUrl, config.KeyAccessToken, config.CustomKmsConf); + } + + /// + /// Update the access token + /// + /// The new token to use + public void RefreshKeyAccessToken(string newToken) + { + ExceptionInfo.Check(KmsConnectionConfig_SetKeyAccessToken(Handle.IntPtr, newToken)); + } + + /// + /// ID of the KMS instance that will be used for encryption + /// + public string KmsInstanceId + { + get => ExceptionInfo.ReturnString(Handle, KmsConnectionConfig_GetKmsInstanceId); + set => ExceptionInfo.Check(KmsConnectionConfig_SetKmsInstanceId(Handle.IntPtr, value)); + } + + /// + /// URL of the KMS instance + /// + public string KmsInstanceUrl + { + get => ExceptionInfo.ReturnString(Handle, KmsConnectionConfig_GetKmsInstanceUrl); + set => ExceptionInfo.Check(KmsConnectionConfig_SetKmsInstanceUrl(Handle.IntPtr, value)); + } + + /// + /// Authorization token that will be passed to the KMS + /// + public string KeyAccessToken + { + get => ExceptionInfo.ReturnString(Handle, KmsConnectionConfig_GetKeyAccessToken); + set => ExceptionInfo.Check(KmsConnectionConfig_SetKeyAccessToken(Handle.IntPtr, value)); + } + + /// + /// KMS-type-specific configuration + /// + public IReadOnlyDictionary CustomKmsConf + { + get + { + var kvmHandle = ExceptionInfo.Return(Handle, KmsConnectionConfig_GetCustomKmsConf); + if (kvmHandle == IntPtr.Zero) + { + return new Dictionary(); + } + + using var keyValueMetadata = new KeyValueMetadata(kvmHandle); + return keyValueMetadata.ToDictionary(); + } + set + { + using var keyValueMetadata = new KeyValueMetadata(); + keyValueMetadata.SetData(value); + ExceptionInfo.Check(KmsConnectionConfig_SetCustomKmsConf(Handle.IntPtr, keyValueMetadata.Handle.IntPtr)); + } + } + + public void Dispose() + { + Handle.Dispose(); + } + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_Create(out IntPtr config); + + [DllImport(ParquetDll.Name)] + private static extern void KmsConnectionConfig_Free(IntPtr config); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_GetKmsInstanceId(IntPtr config, out IntPtr instanceId); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_SetKmsInstanceId(IntPtr config, [MarshalAs(UnmanagedType.LPUTF8Str)] string instanceId); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_GetKmsInstanceUrl(IntPtr config, out IntPtr instanceUrl); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_SetKmsInstanceUrl(IntPtr config, [MarshalAs(UnmanagedType.LPUTF8Str)] string instanceUrl); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_GetKeyAccessToken(IntPtr config, out IntPtr accessToken); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_SetKeyAccessToken(IntPtr config, [MarshalAs(UnmanagedType.LPUTF8Str)] string accessToken); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_GetCustomKmsConf(IntPtr config, out IntPtr conf); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr KmsConnectionConfig_SetCustomKmsConf(IntPtr config, IntPtr conf); + + internal readonly ParquetHandle Handle; + } +} diff --git a/csharp/Encryption/ReadonlyKmsConnectionConfig.cs b/csharp/Encryption/ReadonlyKmsConnectionConfig.cs new file mode 100644 index 00000000..a388853f --- /dev/null +++ b/csharp/Encryption/ReadonlyKmsConnectionConfig.cs @@ -0,0 +1,42 @@ +using System.Collections.Generic; + +namespace ParquetSharp.Encryption +{ + /// + /// Readonly version of KmsConnectionConfig. This is passed to KmsClient factories + /// + public class ReadonlyKmsConnectionConfig + { + internal ReadonlyKmsConnectionConfig( + string kmsInstanceId, + string kmsInstanceUrl, + string keyAccessToken, + IReadOnlyDictionary customKmsConf) + { + KmsInstanceId = kmsInstanceId; + KmsInstanceUrl = kmsInstanceUrl; + KeyAccessToken = keyAccessToken; + CustomKmsConf = customKmsConf; + } + + /// + /// ID of the KMS instance that will be used for encryption + /// + public string KmsInstanceId { get; } + + /// + /// URL of the KMS instance + /// + public string KmsInstanceUrl { get; } + + /// + /// Authorization token that will be passed to the KMS + /// + public string KeyAccessToken { get; } + + /// + /// KMS-type-specific configuration + /// + public IReadOnlyDictionary CustomKmsConf { get; } + } +} diff --git a/csharp/ExceptionInfo.cs b/csharp/ExceptionInfo.cs index 6abc0605..3c2360c6 100644 --- a/csharp/ExceptionInfo.cs +++ b/csharp/ExceptionInfo.cs @@ -12,6 +12,7 @@ internal sealed class ExceptionInfo public delegate IntPtr GetAction(TArg0 arg0, out TValue value); public delegate IntPtr GetAction(TArg0 arg0, TArg1 arg1, out TValue value); public delegate IntPtr GetAction(TArg0 arg0, TArg1 arg1, TArg2 arg2, out TValue value); + public delegate IntPtr GetAction(TArg0 arg0, TArg1 arg1, TArg2 arg2, TArg3 arg3, out TValue value); public delegate IntPtr GetFunction(IntPtr handle, out TValue value); public delegate IntPtr GetFunction(IntPtr handle, TArg0 arg0, out TValue value); public delegate IntPtr GetFunction(IntPtr handle, TArg0 arg0, TArg1 arg1, out TValue value); @@ -62,6 +63,12 @@ public static TValue Return(TArg0 arg0, TArg1 arg1, return value; } + public static TValue Return(TArg0 arg0, TArg1 arg1, TArg2 arg2, TArg3 arg3, GetAction getter) + { + Check(getter(arg0, arg1, arg2, arg3, out var value)); + return value; + } + public static TValue Return(ParquetHandle handle, GetFunction getter) { var value = Return(handle.IntPtr, getter); diff --git a/csharp/FileEncryptionProperties.cs b/csharp/FileEncryptionProperties.cs index 2212160c..dcbd289d 100644 --- a/csharp/FileEncryptionProperties.cs +++ b/csharp/FileEncryptionProperties.cs @@ -24,7 +24,13 @@ public void Dispose() public string FooterKeyMetadata => ExceptionInfo.ReturnString(Handle, FileEncryptionProperties_Footer_Key_Metadata, FileEncryptionProperties_Footer_Key_Metadata_Free); public string FileAad => ExceptionInfo.ReturnString(Handle, FileEncryptionProperties_File_Aad, FileEncryptionProperties_File_Aad_Free); - public ColumnEncryptionProperties ColumnEncryptionProperties(string columnPath) => new ColumnEncryptionProperties(ExceptionInfo.Return(Handle, columnPath, FileEncryptionProperties_Column_Encryption_Properties)); + public ColumnEncryptionProperties? ColumnEncryptionProperties(string columnPath) + { + var columnHandle = ExceptionInfo.Return( + Handle, columnPath, FileEncryptionProperties_Column_Encryption_Properties); + return columnHandle == IntPtr.Zero ? null : new ColumnEncryptionProperties(columnHandle); + } + public FileEncryptionProperties DeepClone() => new FileEncryptionProperties(ExceptionInfo.Return(Handle, FileEncryptionProperties_Deep_Clone)); [DllImport(ParquetDll.Name)] diff --git a/csharp/IO/Buffer.cs b/csharp/IO/Buffer.cs index 1835ce16..48462779 100644 --- a/csharp/IO/Buffer.cs +++ b/csharp/IO/Buffer.cs @@ -18,6 +18,11 @@ internal Buffer(IntPtr handle) Handle = new ParquetHandle(handle, Buffer_Free); } + internal Buffer(ParquetHandle handle) + { + Handle = handle; + } + public void Dispose() { Handle.Dispose(); @@ -25,6 +30,7 @@ public void Dispose() public long Capacity => ExceptionInfo.Return(Handle, Buffer_Capacity); public IntPtr Data => ExceptionInfo.Return(Handle, Buffer_Data); + public IntPtr MutableData => ExceptionInfo.Return(Handle, Buffer_MutableData); public long Size => ExceptionInfo.Return(Handle, Buffer_Size); public byte[] ToArray() @@ -52,6 +58,9 @@ private static IntPtr Make(IntPtr data, long size) [DllImport(ParquetDll.Name)] private static extern IntPtr Buffer_Data(IntPtr buffer, out IntPtr data); + [DllImport(ParquetDll.Name)] + private static extern IntPtr Buffer_MutableData(IntPtr buffer, out IntPtr data); + [DllImport(ParquetDll.Name)] private static extern IntPtr Buffer_Size(IntPtr buffer, out long size); diff --git a/csharp/IO/ResizableBuffer.cs b/csharp/IO/ResizableBuffer.cs index 6814ff45..fb00b04c 100644 --- a/csharp/IO/ResizableBuffer.cs +++ b/csharp/IO/ResizableBuffer.cs @@ -13,7 +13,24 @@ public ResizableBuffer(long initialSize = 128L) { } + internal static ResizableBuffer FromNonOwnedPtr(IntPtr handle) + { + return new ResizableBuffer(new ParquetHandle(handle, _ => { })); + } + + private ResizableBuffer(ParquetHandle handle) : base(handle) + { + } + + internal void Resize(long newSize) + { + ExceptionInfo.Check(ResizableBuffer_Resize(Handle.IntPtr, newSize)); + } + [DllImport(ParquetDll.Name)] private static extern IntPtr ResizableBuffer_Create(long initialSize, out IntPtr resizableBuffer); + + [DllImport(ParquetDll.Name)] + private static extern IntPtr ResizableBuffer_Resize(IntPtr resizableBuffer, long newSize); } } diff --git a/docs/Encryption.md b/docs/Encryption.md new file mode 100644 index 00000000..6df11416 --- /dev/null +++ b/docs/Encryption.md @@ -0,0 +1,328 @@ +# Parquet Modular Encryption + +The Parquet format supports [modular encryption](https://github.com/apache/parquet-format/blob/master/Encryption.md), +where different parts of a Parquet file can be encrypted separately. +This allows controlling access to data on a per-column basis for example, +or allowing some clients to read the file schema and metadata but not the column data. + +ParquetSharp supports two approaches for working with encryption; +the recommended approach is to use the high-level Key Management Tools API, +but you may also use the lower-level API to configure data encryption keys directly. +Note that PyArrow only supports the Key Management Tools API, +so this should be used if compatibility with PyArrow is required. + +## Key Management Tools + +_This API was added in ParquetSharp 15.0.0_ + +The Key Management Tools API implements envelope encryption, +where data is encrypted with randomly generated data encryption keys (DEKs), +and the DEKs are encrypted with master encryption keys (MEKs). +The master encryption keys are managed by a Key Management Service (KMS). + +If double wrapping is used, DEKs are first encrypted with key encryption keys (KEKs), +which are then encrypted with MEKs. The KEKs are cached to reduce interaction with the KMS. +Double wrapping is enabled by default. + +For further details, see the +[Key Management Tools design document](https://docs.google.com/document/d/1bEu903840yb95k9q2X-BlsYKuXoygE4VnMDl9xz_zhk). + +The Key Management Tools API is contained in the `ParquetSharp.Encryption` namespace. +In order to use this API, +a client for a Key Management Service must be implemented: + +```c# +using ParquetSharp.Encryption; + +internal sealed class MyKmsClient : IKmsClient +{ + public MyKmsClient(ReadonlyKmsConnectionConfig config) + { + // KMS specific initialization using the connection configuration + } + + public string WrapKey(byte[] keyBytes, string masterKeyIdentifier) + { + // Use the KMS to wrap (encrypt) keyBytes using the specified master key, + // and return the wrapped key as a string that can be stored in the Parquet metadata. + } + + public byte[] UnwrapKey(string wrappedKey, string masterKeyIdentifier) + { + // Use the KMS to unwrap the wrapped key using the specified master key + } +} +``` + +The main entrypoint for the Key Management Tools API is the `CryptoFactory` class. +This requires a factory method for creating KMS clients, +which are cached internally and periodically recreated: + +```c# +using var cryptoFactory = new CryptoFactory(config => new MyKmsClient(config)); +``` + +### Writing Encrypted Files + +To write an encrypted Parquet file, the KMS connection is first configured: + +```c# +using var kmsConnectionConfig = new KmsConnectionConfig(); +// ParquetSharp doesn't require any config fields to be set, +// the fields needed will depend on the IKmsClient implementation +kmsConnectionConfig.KmsInstanceId = ...; +kmsConnectionConfig.KmsInstanceUrl = ...; +kmsConnectionConfig.KeyAccessToken = ...; +``` + +Then to configure how the file is encrypted, an `EncryptionConfiguration` is created: + +```c# +string footerKeyId = ...; +using var encryptionConfig = new EncryptionConfiguration(footerKeyId); +``` + +You can specify that uniform encryption is used, in which case all columns +are encrypted with the same master key as the footer: + +```c# +encryptionConfig.UniformEncryption = true; +``` + +Or you can specify master encryption keys to use per column: +```c# +encryptionConfig.ColumnKeys = new Dictionary> +{ + {"MasterKey1", new[] {"Column0", "Column1", "Column2"}}, + {"MasterKey2", new[] {"Column3", "Column4"}}, +}; +``` + +And you can configure whether double or single wrapping is used: + +```c# +encryptionConfig.DoubleWrapping = false; // Single-wrapping mode +``` + +You can also disable footer encryption, in which case the file schema and metadata +may be read by any user able to read the file: + +```c# +encryptionConfig.PlaintextFooter = true; +``` + +The `kmsConnectionConfig` and `encryptionConfiguration` are used to generate +file encryption properties, which are used to build the `WriterProperties`: + +```c# +using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + kmsConnectionConfig, encryptionConfig); + +using var writerPropertiesBuilder = new WriterPropertiesBuilder(); +using var writerProperties = writerPropertiesBuilder + .Compression(Compression.Snappy) + .Encryption(fileEncryptionProperties) + .Build(); +``` + +Finally, the Parquet file can be written using the `WriterProperties`: + +```c# +Column[] columns = ...; +using var fileWriter = new ParquetFileWriter(parquetFilePath, columns, writerProperties); +// Write data with fileWriter +``` + +### Reading Encrypted Files + +Reading encrypted files requires creating `FileDecryptionProperties` with a `CryptoFactory`, +and adding these to the `ReaderProperties`: + +```c# +using var decryptionConfig = new DecryptionConfiguration(); +using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + kmsConnectionConfig, decryptionConfig); + +using var readerProperties = ReaderProperties.GetDefaultReaderProperties(); +readerProperties.FileDecryptionProperties = fileDecryptionProperties; + +using var fileReader = new ParquetFileReader(parquetFilePath, readerProperties); +// Read data as normal +``` + +**Important**: The `CryptoFactory` instance used to generate the `FileDecryptionProperties` +must remain alive and not disposed until after the file has been read, +as internally the `FileDecryptionProperties` contains references to +data in the `CryptoFactory` that cannot be managed by ParquetSharp. +Failure to do so may result in native memory access violations and crashes that cannot be caught as exceptions. + +### External Metadata and Key Rotation + +Key material is stored inside the Parquet file metadata by default, +but key material can also be stored in separate JSON files alongside Parquet files, +to allow rotation of master keys without needing to rewrite the Parquet files. + +This is configured in the `EncryptionConfiguration`: + +```c# +using var encryptionConfig = new EncryptionConfiguration(footerKeyId); +encryptionConfig.InternalKeyMaterial = false; // External key material +``` + +When using external key material, the path to the Parquet file being written or read +must be specified when creating `FileEncryptionProperties` and `FileDecryptionProperties`: + +```c# +using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( + kmsConnectionConfig, encryptionConfig, parquetFilePath); + +// ... + +using var fileDecryptionProperties = cryptoFactory.GetFileDecryptionProperties( + kmsConnectionConfig, decryptionConfig, parquetFilePath); +``` + +After writing a Parquet file using external key material, master keys can be rotated with a `CryptoFactory`: + +```c# +cryptoFactory.RotateMasterKeys(kmsConnectionConfig, parquetFilePath, doubleWrapping: true); +``` + +Key rotation requires that the KMS supports versioning, such that the old master key is +used when unwrapping a key, and the latest version is used when wrapping a key. + +## Low-Level Encryption + +It is also possible to directly specify the AES keys used for footer and column encryption +by creating `FileEncryptionProperties` and `FileDecryptionProperties` manually, +without using a `CryptoFactory`. +These properties can be then used as above, by building `WriterProperties` or `ReaderProperties`. + +This example demonstrates creating encryption properties: + +```c# +byte[] key0 = ...; // Bytes for 128, 192 or 256 bit AES key +byte[] key1 = ...; +byte[] key2 = ...; + +// Use key0 as the footer key +using var builder = new FileEncryptionPropertiesBuilder(key0); + +// Configure encryption for two columns, using different keys. +// Key metadata can be set in order to identify which key to use when later decrypting data. +using var col0Builder = new ColumnEncryptionPropertiesBuilder("Column0"); +using var col0Properties = col0Builder.Key(key1).KeyMetadata("key1").Build(); + +using var col1Builder = new ColumnEncryptionPropertiesBuilder("Column1"); +using var col1Properties = col1Builder.Key(key2).KeyMetadata("key2").Build(); + +using var fileEncryptionProperties = builder + .FooterKeyMetadata("key0") + .EncryptedColumns(new[] + { + col0Properties, + col1Properties, + }) + .Build(); +``` + +Creating decryption properties works similarly: + +```c# +using var builder = new FileDecryptionPropertiesBuilder(); + +using var col0Builder = new ColumnDecryptionPropertiesBuilder("Column0"); +using var col0Properties = col0Builder.Key(key1).Build(); + +using var col1Builder = new ColumnDecryptionPropertiesBuilder("Column1"); +using var col1Properties = col1Builder.Key(key2).Build(); + +using var fileDecryptionProperties = builder + .FooterKey(key0) + .ColumnKeys(new[] {col0Properties, col1Properties}) + .Build(); +``` + +Rather than having to specify decryption keys directly, +a `DecryptionKeyRetriever` can be used to retrieve keys based on the key metadata, +to allow more flexibility: + +```c# +internal sealed class MyKeyRetriever : ParquetSharp.DecryptionKeyRetriever +{ + public override byte[] GetKey(string keyMetadata) + { + // Return AES key bytes based on the contents of the key metadata + } +} +``` + +The `FileDecryptionProperties` are then built using the key retriever: + +```c# +using var builder = new FileDecryptionPropertiesBuilder(); +using var fileDecryptionProperties = builder + .KeyRetriever(new MyKeyRetriever()) + .Build(); +``` + +### AAD Verification + +When using the lower-level encryption API, you may specify "additional authenticated data" (AAD) +to allow verifying that data has not been replaced with different data encrypted with the same key. +See the [Parquet format AAD documentation](https://github.com/apache/parquet-format/blob/master/Encryption.md#44-additional-authenticated-data) +for details of how this is implemented. + +An AAD prefix can be specified when creating `FileEncryptionProperties`: + +```c# +using var builder = new FileEncryptionPropertiesBuilder(key0); +using var fileEncryptionProperties = builder + .FooterKeyMetadata("key0") + .AadPrefix("expected-prefix") + .Build(); +``` + +And then the expected prefix should be provided when creating `FileDecryptionProperties`: + +```c# +using var builder = new FileDecryptionPropertiesBuilder(); +using var fileDecryptionProperties = builder + .KeyRetriever(new MyKeyRetriever()) + .AadPrefix("expected-prefix") + .Build(); +``` + +If the AAD prefix doesn't match the expected prefix an exception will be thrown when reading the file. + +Alternatively, you can implement an `AadPrefixVerifier` if you have more complex verification logic: + +```c# +internal sealed class MyAadVerifier : ParquetSharp.AadPrefixVerifier +{ + public override void Verify(string aadPrefix) + { + if (aadPrefix != "expected-prefix") + { + throw new Exception($"Got unexpected AAD prefix: {aadPrefix}"); + } + } +} +``` + +And then provide an instance of this verifier when creating decryption properties: +```c# +using var builder = new FileDecryptionPropertiesBuilder(); +using var fileDecryptionProperties = builder + .KeyRetriever(new MyKeyRetriever()) + .AadPrefixVerifier(new MyAadVerifier()) + .Build(); +``` + +## Arrow API Compatibility + +Note that the above examples use the `ParquetFileReader` and `ParquetFileWriter` classes, +but encryption may also be used with the Arrow API. +The `ParquetSharp.Arrow.FileReader` and `ParquetSharp.Arrow.FileWriter` constructors +accept `ReaderProperties` and `WriterProperties` parameters respectively, +which can have encryption properties configured.