@@ -246,6 +246,16 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
246
246
TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
247
247
}
248
248
249
+ keys = pg_tde_get_wal_cache_keys ();
250
+
251
+ if (keys == NULL )
252
+ {
253
+ WalLocation start = {.tli = 1 ,.lsn = 0 };
254
+
255
+ /* cache is empty, prefetch keys from disk */
256
+ pg_tde_fetch_wal_keys (start );
257
+ }
258
+
249
259
if (key )
250
260
pfree (key );
251
261
}
@@ -263,6 +273,32 @@ TDEXLogSmgrInitWriteReuseKey()
263
273
}
264
274
}
265
275
276
+ /*
277
+ * Encrypt XLog page(s) from the buf and write to the segment file.
278
+ */
279
+ static ssize_t
280
+ TDEXLogWriteEncryptedPagesOldKeys (int fd , const void * buf , size_t count , off_t offset ,
281
+ TimeLineID tli , XLogSegNo segno , int segSize )
282
+ {
283
+ char * enc_buff = EncryptionBuf ;
284
+
285
+ #ifndef FRONTEND
286
+ Assert (count <= TDEXLogEncryptBuffSize ());
287
+ #endif
288
+
289
+ /* Copy the data as-is, as we might have unencrypted parts */
290
+ memcpy (enc_buff , buf , count );
291
+
292
+ /*
293
+ * This method potentially allocates, but only in very early execution
294
+ * Shouldn't happen in a write, where we are in a critical section
295
+ */
296
+ TDEXLogCryptBuffer (buf , enc_buff , count , offset , tli , segno , segSize );
297
+
298
+ return pg_pwrite (fd , enc_buff , count , offset );
299
+ }
300
+
301
+
266
302
/*
267
303
* Encrypt XLog page(s) from the buf and write to the segment file.
268
304
*/
@@ -284,6 +320,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
284
320
#endif
285
321
286
322
CalcXLogPageIVPrefix (tli , segno , key -> base_iv , iv_prefix );
323
+
287
324
pg_tde_stream_crypt (iv_prefix ,
288
325
offset ,
289
326
(char * ) buf ,
@@ -299,26 +336,66 @@ static ssize_t
299
336
tdeheap_xlog_seg_write (int fd , const void * buf , size_t count , off_t offset ,
300
337
TimeLineID tli , XLogSegNo segno , int segSize )
301
338
{
339
+ bool lastKeyUsable ;
340
+ bool afterLastKey ;
341
+ #ifdef FRONTEND
342
+ bool crashRecovery = false;
343
+ #else
344
+ bool crashRecovery = GetRecoveryState () == RECOVERY_STATE_CRASH ;
345
+ #endif
346
+
347
+ WalLocation loc = {.tli = tli };
348
+ WalLocation last_key_loc ;
349
+
350
+ XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
351
+
302
352
/*
303
353
* Set the last (most recent) key's start LSN if not set.
304
354
*
305
355
* This func called with WALWriteLock held, so no need in any extra sync.
306
356
*/
307
- if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && TDEXLogGetEncKeyLsn () == 0 )
308
- {
309
- WalLocation loc = {.tli = tli };
310
357
311
- XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
358
+ last_key_loc .lsn = TDEXLogGetEncKeyLsn ();
359
+ pg_read_barrier ();
360
+ last_key_loc .tli = TDEXLogGetEncKeyTli ();
361
+
362
+ lastKeyUsable = (TDEXLogGetEncKeyLsn () != 0 );
363
+ afterLastKey = wal_location_cmp (last_key_loc , loc ) <= 0 ;
312
364
313
- pg_tde_wal_last_key_set_location (loc );
314
- EncryptionKey .wal_start = loc ;
315
- TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
365
+ Assert (EncryptionKey .type != WAL_KEY_TYPE_INVALID );
366
+
367
+ if (!lastKeyUsable )
368
+ {
369
+ WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ();
370
+
371
+ if (!crashRecovery || EncryptionKey .type == WAL_KEY_TYPE_UNENCRYPTED )
372
+ {
373
+ /*
374
+ * TODO: the unencrypted case is still not perfect, we need to
375
+ * report an error in some cornercases
376
+ */
377
+ if (last_key == NULL || last_key -> start .lsn < loc .lsn )
378
+ {
379
+ pg_tde_wal_last_key_set_location (loc );
380
+ EncryptionKey .wal_start = loc ;
381
+ TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
382
+ lastKeyUsable = true;
383
+ }
384
+ }
316
385
}
317
386
318
- if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
387
+ if ((!afterLastKey || !lastKeyUsable ) && EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
388
+ {
389
+ return TDEXLogWriteEncryptedPagesOldKeys (fd , buf , count , offset , tli , segno , segSize );
390
+ }
391
+ else if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
392
+ {
319
393
return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
394
+ }
320
395
else
396
+ {
321
397
return pg_pwrite (fd , buf , count , offset );
398
+ }
322
399
}
323
400
324
401
/*
@@ -340,7 +417,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
340
417
if (readsz <= 0 )
341
418
return readsz ;
342
419
343
- TDEXLogCryptBuffer (buf , count , offset , tli , segno , segSize );
420
+ TDEXLogCryptBuffer (buf , buf , count , offset , tli , segno , segSize );
344
421
345
422
return readsz ;
346
423
}
@@ -349,15 +426,15 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
349
426
* [De]Crypt buffer if needed based on provided segment offset, number and TLI
350
427
*/
351
428
void
352
- TDEXLogCryptBuffer (void * buf , size_t count , off_t offset ,
429
+ TDEXLogCryptBuffer (const void * buf , void * out_buf , size_t count , off_t offset ,
353
430
TimeLineID tli , XLogSegNo segno , int segSize )
354
431
{
355
432
WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ();
356
433
XLogRecPtr write_key_lsn ;
357
434
WalLocation data_end = {.tli = tli };
358
435
WalLocation data_start = {.tli = tli };
359
436
360
- if (! keys )
437
+ if (keys == NULL )
361
438
{
362
439
WalLocation start = {.tli = 1 ,.lsn = 0 };
363
440
@@ -454,6 +531,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
454
531
XLogSegmentOffset (end_lsn , segSize );
455
532
size_t dec_sz ;
456
533
char * dec_buf = (char * ) buf + (dec_off - offset );
534
+ char * o_buf = (char * ) out_buf + (dec_off - offset );
457
535
458
536
Assert (dec_off >= offset );
459
537
@@ -468,17 +546,19 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
468
546
dec_end = offset + count ;
469
547
}
470
548
549
+ Assert (dec_end > dec_off );
471
550
dec_sz = dec_end - dec_off ;
472
551
473
552
#ifdef TDE_XLOG_DEBUG
474
553
elog (DEBUG1 , "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X" ,
475
554
dec_off , dec_off - offset , dec_sz , curr_key -> key .wal_start .tli , LSN_FORMAT_ARGS (curr_key -> key .wal_start .lsn ));
476
555
#endif
556
+
477
557
pg_tde_stream_crypt (iv_prefix ,
478
558
dec_off ,
479
559
dec_buf ,
480
560
dec_sz ,
481
- dec_buf ,
561
+ o_buf ,
482
562
curr_key -> key .key ,
483
563
& curr_key -> crypt_ctx );
484
564
}
0 commit comments