Skip to content

Commit 19e1be9

Browse files
committed
Add recovery tap test cases to pg_tde tap tests
1 parent 48985eb commit 19e1be9

37 files changed

+10876
-0
lines changed

contrib/pg_tde/t/recovery/001_stream_rep.pl

Lines changed: 651 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
2+
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
3+
4+
# test for archiving with hot standby
5+
use strict;
6+
use warnings FATAL => 'all';
7+
use PostgreSQL::Test::Cluster;
8+
use PostgreSQL::Test::Utils;
9+
use Test::More;
10+
use File::Copy;
11+
use lib 't';
12+
use pgtde;
13+
14+
# Initialize primary node, doing archives
15+
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
16+
$node_primary->init(
17+
has_archiving => 1,
18+
allows_streaming => 1);
19+
$node_primary->append_conf('postgresql.conf',
20+
"shared_preload_libraries = 'pg_tde'");
21+
$node_primary->append_conf('postgresql.conf',
22+
"default_table_access_method = 'tde_heap'");
23+
my $backup_name = 'my_backup';
24+
25+
# Start it
26+
$node_primary->start;
27+
unlink('/tmp/global_keyring.file');
28+
unlink('/tmp/local_keyring.file');
29+
# Create and enable tde extension
30+
$node_primary->safe_psql('postgres', 'CREATE EXTENSION IF NOT EXISTS pg_tde;');
31+
$node_primary->safe_psql('postgres',
32+
"SELECT pg_tde_add_global_key_provider_file('global_key_provider', '/tmp/global_keyring.file');");
33+
$node_primary->safe_psql('postgres',
34+
"SELECT pg_tde_create_key_using_global_key_provider('global_test_key_arch', 'global_key_provider');");
35+
$node_primary->safe_psql('postgres',
36+
"SELECT pg_tde_set_server_key_using_global_key_provider('global_test_key_arch', 'global_key_provider');");
37+
$node_primary->safe_psql('postgres',
38+
"SELECT pg_tde_add_database_key_provider_file('local_key_provider', '/tmp/local_keyring.file');");
39+
$node_primary->safe_psql('postgres',
40+
"SELECT pg_tde_create_key_using_database_key_provider('local_test_key_arch', 'local_key_provider');");
41+
$node_primary->safe_psql('postgres',
42+
"SELECT pg_tde_set_key_using_database_key_provider('local_test_key_arch', 'local_key_provider');");
43+
44+
my $WAL_ENCRYPTION = $ENV{WAL_ENCRYPTION} // 'on';
45+
46+
$node_primary->append_conf(
47+
'postgresql.conf',
48+
($WAL_ENCRYPTION eq 'off')
49+
? "pg_tde.wal_encrypt = off\n"
50+
: "pg_tde.wal_encrypt = on\n"
51+
);
52+
53+
$node_primary->restart;
54+
55+
# Take backup for standby
56+
PGTDE::backup($node_primary, $backup_name);
57+
58+
# Initialize standby node from backup, fetching WAL from archives
59+
my $node_standby = PostgreSQL::Test::Cluster->new('standby');
60+
# Note that this makes the standby store its contents on the archives
61+
# of the primary.
62+
$node_standby->init_from_backup($node_primary, $backup_name,
63+
has_restoring => 1);
64+
$node_standby->append_conf('postgresql.conf',
65+
"wal_retrieve_retry_interval = '100ms'");
66+
67+
# Set archive_cleanup_command and recovery_end_command, checking their
68+
# execution by the backend with dummy commands.
69+
my $data_dir = $node_standby->data_dir;
70+
my $archive_cleanup_command_file = "archive_cleanup_command.done";
71+
my $recovery_end_command_file = "recovery_end_command.done";
72+
$node_standby->append_conf(
73+
'postgresql.conf', qq(
74+
archive_cleanup_command = 'echo archive_cleanup_done > $archive_cleanup_command_file'
75+
recovery_end_command = 'echo recovery_ended_done > $recovery_end_command_file'
76+
));
77+
$node_standby->start;
78+
79+
# Create some content on primary
80+
$node_primary->safe_psql('postgres',
81+
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
82+
83+
# Note the presence of this checkpoint for the archive_cleanup_command
84+
# check done below, before switching to a new segment.
85+
$node_primary->safe_psql('postgres', "CHECKPOINT");
86+
87+
# Done after the checkpoint to ensure that it is replayed on the standby,
88+
# for archive_cleanup_command.
89+
my $current_lsn =
90+
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
91+
92+
# Force archiving of WAL file to make it present on primary
93+
$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");
94+
95+
# Add some more content, it should not be present on standby
96+
$node_primary->safe_psql('postgres',
97+
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
98+
99+
# Wait until necessary replay has been done on standby
100+
my $caughtup_query =
101+
"SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
102+
$node_standby->poll_query_until('postgres', $caughtup_query)
103+
or die "Timed out while waiting for standby to catch up";
104+
105+
my $result =
106+
$node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
107+
is($result, qq(1000), 'check content from archives');
108+
109+
# archive_cleanup_command is executed after generating a restart point,
110+
# with a checkpoint.
111+
$node_standby->safe_psql('postgres', q{CHECKPOINT});
112+
ok( -f "$data_dir/$archive_cleanup_command_file",
113+
'archive_cleanup_command executed on checkpoint');
114+
ok( !-f "$data_dir/$recovery_end_command_file",
115+
'recovery_end_command not executed yet');
116+
117+
# Check the presence of temporary files specifically generated during
118+
# archive recovery. To ensure the presence of the temporary history
119+
# file, switch to a timeline large enough to allow a standby to recover
120+
# a history file from an archive. As this requires at least two timeline
121+
# switches, promote the existing standby first. Then create a second
122+
# standby based on the primary, using its archives. Finally, the second
123+
# standby is promoted.
124+
$node_standby->promote;
125+
126+
# Wait until the history file has been stored on the archives of the
127+
# primary once the promotion of the standby completes. This ensures that
128+
# the second standby created below will be able to restore this file,
129+
# creating a RECOVERYHISTORY.
130+
my $primary_archive = $node_primary->archive_dir;
131+
$caughtup_query =
132+
"SELECT size IS NOT NULL FROM pg_stat_file('$primary_archive/00000002.history', true)";
133+
$node_primary->poll_query_until('postgres', $caughtup_query)
134+
or die "Timed out while waiting for archiving of 00000002.history";
135+
136+
# recovery_end_command should have been triggered on promotion.
137+
ok( -f "$data_dir/$recovery_end_command_file",
138+
'recovery_end_command executed after promotion');
139+
140+
my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2');
141+
$node_standby2->init_from_backup($node_primary, $backup_name,
142+
has_restoring => 1);
143+
144+
# Make execution of recovery_end_command fail. This should not affect
145+
# promotion, and its failure should be logged.
146+
$node_standby2->append_conf(
147+
'postgresql.conf', qq(
148+
recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file'
149+
));
150+
151+
$node_standby2->start;
152+
153+
# Save the log location, to see the failure of recovery_end_command.
154+
my $log_location = -s $node_standby2->logfile;
155+
156+
# Now promote standby2, and check that temporary files specifically
157+
# generated during archive recovery are removed by the end of recovery.
158+
$node_standby2->promote;
159+
160+
# Check the logs of the standby to see that the commands have failed.
161+
my $log_contents = slurp_file($node_standby2->logfile, $log_location);
162+
my $node_standby2_data = $node_standby2->data_dir;
163+
164+
like(
165+
$log_contents,
166+
qr/restored log file "00000002.history" from archive/s,
167+
"00000002.history retrieved from the archives");
168+
ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY",
169+
"RECOVERYHISTORY removed after promotion");
170+
ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG",
171+
"RECOVERYXLOG removed after promotion");
172+
like(
173+
$log_contents,
174+
qr/WARNING:.*recovery_end_command/s,
175+
"recovery_end_command failure detected in logs after promotion");
176+
177+
done_testing();
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
2+
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
3+
4+
# Test for recovery targets: name, timestamp, XID
5+
use strict;
6+
use warnings FATAL => 'all';
7+
use PostgreSQL::Test::Cluster;
8+
use PostgreSQL::Test::Utils;
9+
use Test::More;
10+
use Time::HiRes qw(usleep);
11+
use lib 't';
12+
use pgtde;
13+
14+
# Create and test a standby from given backup, with a certain recovery target.
15+
# Choose $until_lsn later than the transaction commit that causes the row
16+
# count to reach $num_rows, yet not later than the recovery target.
17+
sub test_recovery_standby
18+
{
19+
local $Test::Builder::Level = $Test::Builder::Level + 1;
20+
21+
my $test_name = shift;
22+
my $node_name = shift;
23+
my $node_primary = shift;
24+
my $recovery_params = shift;
25+
my $num_rows = shift;
26+
my $until_lsn = shift;
27+
28+
my $node_standby = PostgreSQL::Test::Cluster->new($node_name);
29+
$node_standby->init_from_backup($node_primary, 'my_backup',
30+
has_restoring => 1);
31+
32+
foreach my $param_item (@$recovery_params)
33+
{
34+
$node_standby->append_conf('postgresql.conf', qq($param_item));
35+
}
36+
37+
$node_standby->start;
38+
39+
# Wait until standby has replayed enough data
40+
my $caughtup_query =
41+
"SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
42+
$node_standby->poll_query_until('postgres', $caughtup_query)
43+
or die "Timed out while waiting for standby to catch up";
44+
45+
# Create some content on primary and check its presence in standby
46+
my $result =
47+
$node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
48+
is($result, qq($num_rows), "check standby content for $test_name");
49+
50+
# Stop standby node
51+
$node_standby->teardown_node;
52+
53+
return;
54+
}
55+
56+
# Initialize primary node
57+
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
58+
$node_primary->init(has_archiving => 1, allows_streaming => 1);
59+
$node_primary->append_conf('postgresql.conf',
60+
"shared_preload_libraries = 'pg_tde'");
61+
$node_primary->append_conf('postgresql.conf',
62+
"default_table_access_method = 'tde_heap'");
63+
64+
# Bump the transaction ID epoch. This is useful to stress the portability
65+
# of recovery_target_xid parsing.
66+
system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir);
67+
68+
# Start it
69+
$node_primary->start;
70+
unlink('/tmp/global_keyring.file');
71+
unlink('/tmp/local_keyring.file');
72+
# Create and enable tde extension
73+
$node_primary->safe_psql('postgres', 'CREATE EXTENSION IF NOT EXISTS pg_tde;');
74+
$node_primary->safe_psql('postgres',
75+
"SELECT pg_tde_add_global_key_provider_file('global_key_provider', '/tmp/global_keyring.file');");
76+
$node_primary->safe_psql('postgres',
77+
"SELECT pg_tde_create_key_using_global_key_provider('global_test_key_recovery', 'global_key_provider');");
78+
$node_primary->safe_psql('postgres',
79+
"SELECT pg_tde_set_server_key_using_global_key_provider('global_test_key_recovery', 'global_key_provider');");
80+
$node_primary->safe_psql('postgres',
81+
"SELECT pg_tde_add_database_key_provider_file('local_key_provider', '/tmp/local_keyring.file');");
82+
$node_primary->safe_psql('postgres',
83+
"SELECT pg_tde_create_key_using_database_key_provider('local_test_key_recovery', 'local_key_provider');");
84+
$node_primary->safe_psql('postgres',
85+
"SELECT pg_tde_set_key_using_database_key_provider('local_test_key_recovery', 'local_key_provider');");
86+
87+
my $WAL_ENCRYPTION = $ENV{WAL_ENCRYPTION} // 'on';
88+
89+
$node_primary->append_conf(
90+
'postgresql.conf',
91+
($WAL_ENCRYPTION eq 'off')
92+
? "pg_tde.wal_encrypt = off\n"
93+
: "pg_tde.wal_encrypt = on\n"
94+
);
95+
96+
$node_primary->restart;
97+
98+
# Create data before taking the backup, aimed at testing
99+
# recovery_target = 'immediate'
100+
$node_primary->safe_psql('postgres',
101+
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
102+
my $lsn1 =
103+
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
104+
105+
# Take backup from which all operations will be run
106+
PGTDE::backup($node_primary, 'my_backup');
107+
108+
# Insert some data with used as a replay reference, with a recovery
109+
# target TXID.
110+
$node_primary->safe_psql('postgres',
111+
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
112+
my $ret = $node_primary->safe_psql('postgres',
113+
"SELECT pg_current_wal_lsn(), pg_current_xact_id();");
114+
my ($lsn2, $recovery_txid) = split /\|/, $ret;
115+
116+
# More data, with recovery target timestamp
117+
$node_primary->safe_psql('postgres',
118+
"INSERT INTO tab_int VALUES (generate_series(2001,3000))");
119+
my $lsn3 =
120+
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
121+
my $recovery_time = $node_primary->safe_psql('postgres', "SELECT now()");
122+
123+
# Even more data, this time with a recovery target name
124+
$node_primary->safe_psql('postgres',
125+
"INSERT INTO tab_int VALUES (generate_series(3001,4000))");
126+
my $recovery_name = "my_target";
127+
my $lsn4 =
128+
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
129+
$node_primary->safe_psql('postgres',
130+
"SELECT pg_create_restore_point('$recovery_name');");
131+
132+
# And now for a recovery target LSN
133+
$node_primary->safe_psql('postgres',
134+
"INSERT INTO tab_int VALUES (generate_series(4001,5000))");
135+
my $lsn5 = my $recovery_lsn =
136+
$node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
137+
138+
$node_primary->safe_psql('postgres',
139+
"INSERT INTO tab_int VALUES (generate_series(5001,6000))");
140+
141+
# Force archiving of WAL file
142+
$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");
143+
144+
# Test recovery targets
145+
my @recovery_params = ("recovery_target = 'immediate'");
146+
test_recovery_standby('immediate target',
147+
'standby_1', $node_primary, \@recovery_params, "1000", $lsn1);
148+
@recovery_params = ("recovery_target_xid = '$recovery_txid'");
149+
test_recovery_standby('XID', 'standby_2', $node_primary, \@recovery_params,
150+
"2000", $lsn2);
151+
@recovery_params = ("recovery_target_time = '$recovery_time'");
152+
test_recovery_standby('time', 'standby_3', $node_primary, \@recovery_params,
153+
"3000", $lsn3);
154+
@recovery_params = ("recovery_target_name = '$recovery_name'");
155+
test_recovery_standby('name', 'standby_4', $node_primary, \@recovery_params,
156+
"4000", $lsn4);
157+
@recovery_params = ("recovery_target_lsn = '$recovery_lsn'");
158+
test_recovery_standby('LSN', 'standby_5', $node_primary, \@recovery_params,
159+
"5000", $lsn5);
160+
161+
# Multiple targets
162+
#
163+
# Multiple conflicting settings are not allowed, but setting the same
164+
# parameter multiple times or unsetting a parameter and setting a
165+
# different one is allowed.
166+
167+
@recovery_params = (
168+
"recovery_target_name = '$recovery_name'",
169+
"recovery_target_name = ''",
170+
"recovery_target_time = '$recovery_time'");
171+
test_recovery_standby('multiple overriding settings',
172+
'standby_6', $node_primary, \@recovery_params, "3000", $lsn3);
173+
174+
my $node_standby = PostgreSQL::Test::Cluster->new('standby_7');
175+
$node_standby->init_from_backup($node_primary, 'my_backup',
176+
has_restoring => 1);
177+
$node_standby->append_conf(
178+
'postgresql.conf', "recovery_target_name = '$recovery_name'
179+
recovery_target_time = '$recovery_time'");
180+
181+
my $res = run_log(
182+
[
183+
'pg_ctl', '-D', $node_standby->data_dir, '-l',
184+
$node_standby->logfile, 'start'
185+
]);
186+
ok(!$res, 'invalid recovery startup fails');
187+
188+
my $logfile = slurp_file($node_standby->logfile());
189+
ok($logfile =~ qr/multiple recovery targets specified/,
190+
'multiple conflicting settings');
191+
192+
# Check behavior when recovery ends before target is reached
193+
194+
$node_standby = PostgreSQL::Test::Cluster->new('standby_8');
195+
$node_standby->init_from_backup(
196+
$node_primary, 'my_backup',
197+
has_restoring => 1,
198+
standby => 0);
199+
$node_standby->append_conf('postgresql.conf',
200+
"recovery_target_name = 'does_not_exist'");
201+
202+
run_log(
203+
[
204+
'pg_ctl', '-D', $node_standby->data_dir, '-l',
205+
$node_standby->logfile, 'start'
206+
]);
207+
208+
# wait for postgres to terminate
209+
foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default)
210+
{
211+
last if !-f $node_standby->data_dir . '/postmaster.pid';
212+
usleep(100_000);
213+
}
214+
$logfile = slurp_file($node_standby->logfile());
215+
ok( $logfile =~
216+
qr/FATAL: .* recovery ended before configured recovery target was reached/,
217+
'recovery end before target reached is a fatal error');
218+
219+
done_testing();

0 commit comments

Comments
 (0)