From a392dbc427557a089b7796d9175956fb66e20c1b Mon Sep 17 00:00:00 2001 From: Andreas Karlsson Date: Wed, 6 Aug 2025 19:14:38 +0200 Subject: [PATCH 1/3] Do not try to delete keys or log WAL for temporary tables We forgot to have a check against trying to delete leftover SMGR keys for temporary tables which is a useless operation since they are store in memory. Additionally we forgot to prevent WAL from being written when creating or removing a key in smgrcreate() for temporary tables. --- contrib/pg_tde/src/access/pg_tde_xlog.c | 2 +- contrib/pg_tde/src/include/smgr/pg_tde_smgr.h | 2 +- contrib/pg_tde/src/smgr/pg_tde_smgr.c | 72 ++++++++++--------- 3 files changed, 41 insertions(+), 35 deletions(-) diff --git a/contrib/pg_tde/src/access/pg_tde_xlog.c b/contrib/pg_tde/src/access/pg_tde_xlog.c index 2ffe99e7f4478..e7fe62e8ce8d0 100644 --- a/contrib/pg_tde/src/access/pg_tde_xlog.c +++ b/contrib/pg_tde/src/access/pg_tde_xlog.c @@ -62,7 +62,7 @@ tdeheap_rmgr_redo(XLogReaderState *record) { XLogRelKey *xlrec = (XLogRelKey *) XLogRecGetData(record); - tde_smgr_delete_key_redo(&xlrec->rlocator); + tde_smgr_remove_leftover_key_redo(&xlrec->rlocator); } else if (info == XLOG_TDE_ROTATE_PRINCIPAL_KEY) { diff --git a/contrib/pg_tde/src/include/smgr/pg_tde_smgr.h b/contrib/pg_tde/src/include/smgr/pg_tde_smgr.h index a97421cfef914..bd899ab3efb01 100644 --- a/contrib/pg_tde/src/include/smgr/pg_tde_smgr.h +++ b/contrib/pg_tde/src/include/smgr/pg_tde_smgr.h @@ -6,7 +6,7 @@ extern void RegisterStorageMgr(void); extern void tde_smgr_create_key_redo(const RelFileLocator *rlocator); -extern void tde_smgr_delete_key_redo(const RelFileLocator *rlocator); +extern void tde_smgr_remove_leftover_key_redo(const RelFileLocator *rlocator); extern bool tde_smgr_rel_is_encrypted(SMgrRelation reln); #endif /* PG_TDE_SMGR_H */ diff --git a/contrib/pg_tde/src/smgr/pg_tde_smgr.c b/contrib/pg_tde/src/smgr/pg_tde_smgr.c index 939b4ea552da2..6ab5bd3290a0f 100644 --- a/contrib/pg_tde/src/smgr/pg_tde_smgr.c +++ b/contrib/pg_tde/src/smgr/pg_tde_smgr.c @@ -70,6 +70,26 @@ static bool tde_smgr_has_temp_key(const RelFileLocator *rel); static void tde_smgr_remove_temp_key(const RelFileLocator *rel); static void CalcBlockIv(ForkNumber forknum, BlockNumber bn, const unsigned char *base_iv, unsigned char *iv); +static void +tde_smgr_log_create_key(const RelFileLocator *rlocator) +{ + XLogRelKey xlrec = {.rlocator = *rlocator}; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(xlrec)); + XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ADD_RELATION_KEY); +} + +static void +tde_smgr_log_remove_leftover_key(const RelFileLocator *rlocator) +{ + XLogRelKey xlrec = {.rlocator = *rlocator}; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(xlrec)); + XLogInsert(RM_TDERMGR_ID, XLOG_TDE_DELETE_RELATION_KEY); +} + static InternalKey * tde_smgr_create_key(const RelFileLocatorBackend *smgr_rlocator) { @@ -80,23 +100,14 @@ tde_smgr_create_key(const RelFileLocatorBackend *smgr_rlocator) if (RelFileLocatorBackendIsTemp(*smgr_rlocator)) tde_smgr_save_temp_key(&smgr_rlocator->locator, key); else + { pg_tde_save_smgr_key(smgr_rlocator->locator, key); + tde_smgr_log_create_key(&smgr_rlocator->locator); + } return key; } -static void -tde_smgr_log_create_key(const RelFileLocatorBackend *smgr_rlocator) -{ - XLogRelKey xlrec = { - .rlocator = smgr_rlocator->locator, - }; - - XLogBeginInsert(); - XLogRegisterData((char *) &xlrec, sizeof(xlrec)); - XLogInsert(RM_TDERMGR_ID, XLOG_TDE_ADD_RELATION_KEY); -} - void tde_smgr_create_key_redo(const RelFileLocator *rlocator) { @@ -111,21 +122,26 @@ tde_smgr_create_key_redo(const RelFileLocator *rlocator) } static void -tde_smgr_delete_key(const RelFileLocatorBackend *smgr_rlocator) +tde_smgr_remove_key(const RelFileLocatorBackend *smgr_rlocator) { - XLogRelKey xlrec = { - .rlocator = smgr_rlocator->locator, - }; - - pg_tde_free_key_map_entry(smgr_rlocator->locator); + if (RelFileLocatorBackendIsTemp(*smgr_rlocator)) + tde_smgr_remove_temp_key(&smgr_rlocator->locator); + else + pg_tde_free_key_map_entry(smgr_rlocator->locator); +} - XLogBeginInsert(); - XLogRegisterData((char *) &xlrec, sizeof(xlrec)); - XLogInsert(RM_TDERMGR_ID, XLOG_TDE_DELETE_RELATION_KEY); +static void +tde_smgr_remove_leftover_key(const RelFileLocatorBackend *smgr_rlocator) +{ + if (!RelFileLocatorBackendIsTemp(*smgr_rlocator)) + { + pg_tde_free_key_map_entry(smgr_rlocator->locator); + tde_smgr_log_remove_leftover_key(&smgr_rlocator->locator); + } } void -tde_smgr_delete_key_redo(const RelFileLocator *rlocator) +tde_smgr_remove_leftover_key_redo(const RelFileLocator *rlocator) { pg_tde_free_key_map_entry(*rlocator); } @@ -148,15 +164,6 @@ tde_smgr_get_key(const RelFileLocatorBackend *smgr_rlocator) return pg_tde_get_smgr_key(smgr_rlocator->locator); } -static void -tde_smgr_remove_key(const RelFileLocatorBackend *smgr_rlocator) -{ - if (RelFileLocatorBackendIsTemp(*smgr_rlocator)) - tde_smgr_remove_temp_key(&smgr_rlocator->locator); - else - pg_tde_free_key_map_entry(smgr_rlocator->locator); -} - static bool tde_smgr_should_encrypt(const RelFileLocatorBackend *smgr_rlocator, RelFileLocator *old_locator) { @@ -384,7 +391,7 @@ tde_mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool * If we're in redo, a separate WAL record will make sure the key is * removed. */ - tde_smgr_delete_key(&reln->smgr_rlocator); + tde_smgr_remove_leftover_key(&reln->smgr_rlocator); if (!tde_smgr_should_encrypt(&reln->smgr_rlocator, &relold)) { @@ -393,7 +400,6 @@ tde_mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool } key = tde_smgr_create_key(&reln->smgr_rlocator); - tde_smgr_log_create_key(&reln->smgr_rlocator); tdereln->encryption_status = RELATION_KEY_AVAILABLE; tdereln->relKey = *key; From 8ea421fd3f42788511236c45631847434ba3d502 Mon Sep 17 00:00:00 2001 From: Andreas Karlsson Date: Wed, 6 Aug 2025 18:45:23 +0200 Subject: [PATCH 2/3] WIP: Do not log key deletion separately for encrypted tables --- contrib/pg_tde/src/access/pg_tde_tdemap.c | 19 ++++++++++++------- contrib/pg_tde/src/smgr/pg_tde_smgr.c | 23 ++++++++++------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/contrib/pg_tde/src/access/pg_tde_tdemap.c b/contrib/pg_tde/src/access/pg_tde_tdemap.c index ed01210adccf1..e250f3b0c86b7 100644 --- a/contrib/pg_tde/src/access/pg_tde_tdemap.c +++ b/contrib/pg_tde/src/access/pg_tde_tdemap.c @@ -101,7 +101,7 @@ static void finalize_key_rotation(const char *path_old, const char *path_new); static int pg_tde_file_header_write(const char *tde_filename, int fd, const TDESignedPrincipalKeyInfo *signed_key_info, off_t *bytes_written); static void pg_tde_initialize_map_entry(TDEMapEntry *map_entry, const TDEPrincipalKey *principal_key, const RelFileLocator *rlocator, const InternalKey *rel_key_data); static int pg_tde_open_file_write(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info, bool truncate, off_t *curr_pos); -static void pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key); +static void pg_tde_replace_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key); void pg_tde_save_smgr_key(RelFileLocator rel, const InternalKey *rel_key_data) @@ -118,7 +118,7 @@ pg_tde_save_smgr_key(RelFileLocator rel, const InternalKey *rel_key_data) errhint("Use pg_tde_set_key_using_database_key_provider() or pg_tde_set_key_using_global_key_provider() to configure one.")); } - pg_tde_write_key_map_entry(&rel, rel_key_data, principal_key); + pg_tde_replace_key_map_entry(&rel, rel_key_data, principal_key); LWLockRelease(lock_pk); } @@ -483,11 +483,12 @@ pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, * concurrent in place updates leading to data conflicts. */ void -pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key) +pg_tde_replace_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key) { char db_map_path[MAXPGPATH]; int map_fd; off_t curr_pos = 0; + off_t write_pos = 0; TDEMapEntry write_map_entry; TDESignedPrincipalKeyInfo signed_key_Info; @@ -512,22 +513,26 @@ pg_tde_write_key_map_entry(const RelFileLocator *rlocator, const InternalKey *re if (!pg_tde_read_one_map_entry(map_fd, &read_map_entry, &curr_pos)) { - curr_pos = prev_pos; + if (write_pos == 0) + write_pos = prev_pos; break; } - if (read_map_entry.type == MAP_ENTRY_TYPE_EMPTY) + if (read_map_entry.spcOid == rlocator->spcOid && read_map_entry.relNumber == rlocator->relNumber) { - curr_pos = prev_pos; + write_pos = prev_pos; break; } + + if (write_pos == 0 && read_map_entry.type == MAP_ENTRY_TYPE_EMPTY) + write_pos = prev_pos; } /* Initialize map entry and encrypt key */ pg_tde_initialize_map_entry(&write_map_entry, principal_key, rlocator, rel_key_data); /* Write the given entry at curr_pos; i.e. the free entry. */ - pg_tde_write_one_map_entry(map_fd, &write_map_entry, &curr_pos, db_map_path); + pg_tde_write_one_map_entry(map_fd, &write_map_entry, &write_pos, db_map_path); CloseTransientFile(map_fd); } diff --git a/contrib/pg_tde/src/smgr/pg_tde_smgr.c b/contrib/pg_tde/src/smgr/pg_tde_smgr.c index 6ab5bd3290a0f..bf969b6b22aa7 100644 --- a/contrib/pg_tde/src/smgr/pg_tde_smgr.c +++ b/contrib/pg_tde/src/smgr/pg_tde_smgr.c @@ -113,9 +113,6 @@ tde_smgr_create_key_redo(const RelFileLocator *rlocator) { InternalKey key; - if (pg_tde_has_smgr_key(*rlocator)) - return; - pg_tde_generate_internal_key(&key); pg_tde_save_smgr_key(*rlocator, &key); @@ -383,18 +380,18 @@ tde_mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool if (forknum != MAIN_FORKNUM) return; - /* - * If we have a key for this relation already, we need to remove it. This - * can happen if OID is re-used after a crash left a key for a - * non-existing relation in the key file. - * - * If we're in redo, a separate WAL record will make sure the key is - * removed. - */ - tde_smgr_remove_leftover_key(&reln->smgr_rlocator); - if (!tde_smgr_should_encrypt(&reln->smgr_rlocator, &relold)) { + /* + * If we have a key for this relation already, we need to remove it. + * This can happen if OID is re-used after a crash left a key for a + * non-existing relation in the key file. + * + * Old keys for encrypted tables are replace when creating the new + * key. + */ + tde_smgr_remove_leftover_key(&reln->smgr_rlocator); + tdereln->encryption_status = RELATION_NOT_ENCRYPTED; return; } From db898dc8057a2b0e4ad1691198425a6f74413a98 Mon Sep 17 00:00:00 2001 From: Andreas Karlsson Date: Wed, 6 Aug 2025 21:41:42 +0200 Subject: [PATCH 3/3] WIP: Attempt at using buffered IO when adding a new key --- contrib/pg_tde/src/access/pg_tde_tdemap.c | 86 ++++++++++++++++++++--- 1 file changed, 76 insertions(+), 10 deletions(-) diff --git a/contrib/pg_tde/src/access/pg_tde_tdemap.c b/contrib/pg_tde/src/access/pg_tde_tdemap.c index e250f3b0c86b7..f3bb4b534ef25 100644 --- a/contrib/pg_tde/src/access/pg_tde_tdemap.c +++ b/contrib/pg_tde/src/access/pg_tde_tdemap.c @@ -478,6 +478,74 @@ pg_tde_write_one_map_entry(int fd, const TDEMapEntry *map_entry, off_t *offset, #endif #ifndef FRONTEND +static FILE * +pg_tde_open_file_basic_copy(const char *tde_filename, const char *mode, bool ignore_missing) +{ + FILE *file; + + file = AllocateFile(tde_filename, mode); + if (file == NULL && !(errno == ENOENT && ignore_missing == true)) + { + ereport(ERROR, + errcode_for_file_access(), + errmsg("could not open tde file \"%s\": %m", tde_filename)); + } + + return file; +} + +static void +pg_tde_file_header_read_copy(const char *tde_filename, FILE *file, TDEFileHeader *fheader) +{ + size_t bytes_read; + + Assert(fheader); + + bytes_read = fread(fheader, TDE_FILE_HEADER_SIZE, 1, file); + + if (bytes_read != 1 || fheader->file_version != PG_TDE_FILEMAGIC) + { + ereport(FATAL, + errcode_for_file_access(), + errmsg("TDE map file \"%s\" is corrupted %ld %d: %m", tde_filename, bytes_read, fheader->file_version)); + } +} + +static FILE * +pg_tde_open_file_write_copy(const char *tde_filename, const TDESignedPrincipalKeyInfo *signed_key_info) +{ + FILE *file; + TDEFileHeader fheader; + + Assert(LWLockHeldByMeInMode(tde_lwlock_enc_keys(), LW_EXCLUSIVE)); + + file = pg_tde_open_file_basic_copy(tde_filename, "rb+", false); + + pg_tde_file_header_read_copy(tde_filename, file, &fheader); + + ///* In case it's a new file, let's add the header now. */ + //if (bytes_read == 0 && signed_key_info) + // pg_tde_file_header_write(tde_filename, fd, signed_key_info, &bytes_written); + + return file; +} + +static bool +pg_tde_read_one_map_entry_copy(FILE *map_file, TDEMapEntry *map_entry) +{ + size_t entries_read; + + Assert(map_entry); + + entries_read = fread(map_entry, MAP_ENTRY_SIZE, 1, map_file); + + /* We've reached the end of the file. */ + if (entries_read != 1) + return false; + + return true; +} + /* * The caller must hold an exclusive lock on the key file to avoid * concurrent in place updates leading to data conflicts. @@ -486,8 +554,7 @@ void pg_tde_replace_key_map_entry(const RelFileLocator *rlocator, const InternalKey *rel_key_data, TDEPrincipalKey *principal_key) { char db_map_path[MAXPGPATH]; - int map_fd; - off_t curr_pos = 0; + FILE *map_file; off_t write_pos = 0; TDEMapEntry write_map_entry; TDESignedPrincipalKeyInfo signed_key_Info; @@ -499,7 +566,7 @@ pg_tde_replace_key_map_entry(const RelFileLocator *rlocator, const InternalKey * pg_tde_sign_principal_key_info(&signed_key_Info, principal_key); /* Open and validate file for basic correctness. */ - map_fd = pg_tde_open_file_write(db_map_path, &signed_key_Info, false, &curr_pos); + map_file = pg_tde_open_file_write_copy(db_map_path, &signed_key_Info); /* * Read until we find an empty slot. Otherwise, read until end. This seems @@ -509,32 +576,31 @@ pg_tde_replace_key_map_entry(const RelFileLocator *rlocator, const InternalKey * while (1) { TDEMapEntry read_map_entry; - off_t prev_pos = curr_pos; - if (!pg_tde_read_one_map_entry(map_fd, &read_map_entry, &curr_pos)) + if (!pg_tde_read_one_map_entry_copy(map_file, &read_map_entry)) { if (write_pos == 0) - write_pos = prev_pos; + write_pos = ftell(map_file); break; } if (read_map_entry.spcOid == rlocator->spcOid && read_map_entry.relNumber == rlocator->relNumber) { - write_pos = prev_pos; + write_pos = ftell(map_file) - MAP_ENTRY_SIZE; break; } if (write_pos == 0 && read_map_entry.type == MAP_ENTRY_TYPE_EMPTY) - write_pos = prev_pos; + write_pos = ftell(map_file) - MAP_ENTRY_SIZE; } /* Initialize map entry and encrypt key */ pg_tde_initialize_map_entry(&write_map_entry, principal_key, rlocator, rel_key_data); /* Write the given entry at curr_pos; i.e. the free entry. */ - pg_tde_write_one_map_entry(map_fd, &write_map_entry, &write_pos, db_map_path); + pg_tde_write_one_map_entry(fileno(map_file), &write_map_entry, &write_pos, db_map_path); - CloseTransientFile(map_fd); + FreeFile(map_file); } #endif