Skip to content

Commit 41b86ff

Browse files
committed
fix: no longer retries the transaction on 40001 errors
1 parent bf75869 commit 41b86ff

8 files changed

Lines changed: 46 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ All notable changes to this project will be documented in this file. From versio
1919

2020
- Shutdown should wait for in flight requests by @mkleczek in #4702
2121
- Fix login with uppercase and mixed case role names by @taimoorzaeem in #4678
22+
- Remove automatic transaction retries on `40001 (serialization_failure)` errors to prevent replication lag by @laurence in #3673
2223

2324
### Changed
2425

nix/tools/withTools.nix

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ let
105105
106106
log "Starting replica on $replica_host"
107107
108-
pg_ctl -D "$replica_dir" -l "$replica_dblog" -w start -o "-F -c listen_addresses=\"\" -c hba_file=$HBA_FILE -k $replica_host -c log_statement=\"all\" " \
108+
# We set a low max_standby_streaming_delay to make the replication conflict fail faster in tests (otherwise it waits for the default 30s)
109+
pg_ctl -D "$replica_dir" -l "$replica_dblog" -w start -o "-F -c listen_addresses=\"\" -c hba_file=$HBA_FILE -k $replica_host -c log_statement=\"all\" -c max_standby_streaming_delay=\"3s\" " \
109110
>> "$setuplog"
110111
111112
>&2 echo "${commandName}: Replica enabled. You can connect to it with: psql 'postgres:///$PGDATABASE?host=$replica_host' -U postgres"

src/PostgREST/AppState.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea
343343
qSchemaCache = do
344344
conf@AppConfig{..} <- getConfig appState
345345
(resultTime, result) <-
346-
timeItT $ usePool appState (SQL.transaction SQL.ReadCommitted SQL.Read $ querySchemaCache conf)
346+
timeItT $ usePool appState (SQL.transactionNoRetry SQL.ReadCommitted SQL.Read $ querySchemaCache conf)
347347
case result of
348348
Left e -> do
349349
markSchemaCachePending appState

src/PostgREST/CLI.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ dumpSchema :: AppState -> IO LBS.ByteString
6262
dumpSchema appState = do
6363
conf@AppConfig{..} <- AppState.getConfig appState
6464
result <-
65-
AppState.usePool appState (SQL.transaction SQL.ReadCommitted SQL.Read $ querySchemaCache conf)
65+
AppState.usePool appState (SQL.transactionNoRetry SQL.ReadCommitted SQL.Read $ querySchemaCache conf)
6666
case result of
6767
Left e -> do
6868
let observer = AppState.getObserver appState

src/PostgREST/Config/Database.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ pgVersionStatement = SQL.Statement sql HE.noParams versionRow
9494
-- A setting on the database only will have no effect: ALTER DATABASE postgres SET <prefix>jwt_aud = 'xx'
9595
queryDbSettings :: Maybe Text -> Session [(Text, Text)]
9696
queryDbSettings preConfFunc =
97-
SQL.transaction SQL.ReadCommitted SQL.Read $ SQL.statement dbSettingsNames $ SQL.Statement sql (arrayParam HE.text) decodeSettings True
97+
SQL.transactionNoRetry SQL.ReadCommitted SQL.Read $ SQL.statement dbSettingsNames $ SQL.Statement sql (arrayParam HE.text) decodeSettings True
9898
where
9999
sql = encodeUtf8 [trimming|
100100
WITH
@@ -134,7 +134,7 @@ queryDbSettings preConfFunc =
134134

135135
queryRoleSettings :: PgVersion -> Session (RoleSettings, RoleIsolationLvl)
136136
queryRoleSettings pgVer =
137-
SQL.transaction SQL.ReadCommitted SQL.Read $ SQL.statement mempty $ SQL.Statement sql HE.noParams (processRows <$> rows) True
137+
SQL.transactionNoRetry SQL.ReadCommitted SQL.Read $ SQL.statement mempty $ SQL.Statement sql HE.noParams (processRows <$> rows) True
138138
where
139139
sql = encodeUtf8 [trimming|
140140
with

src/PostgREST/MainTx.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ data ResultSet
9696
mainTx :: MainQuery -> AppConfig -> AuthResult -> ApiRequest -> ActionPlan -> SchemaCache -> MainTx
9797
mainTx _ _ _ _ (NoDb x) _ = NoDbTx $ NoDbResult x
9898
mainTx genQ@MainQuery{..} conf@AppConfig{..} AuthResult{..} apiReq (Db plan) sCache =
99-
DbTx isoLvl txMode dbHandler SQL.transaction
99+
DbTx isoLvl txMode dbHandler SQL.transactionNoRetry
100100
where
101101
isoLvl = planIsoLvl conf authRole plan
102102
txMode = planTxMode plan

test/io/fixtures/replica.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ $$ language sql;
1010

1111
create table replica.items as select x as id from generate_series(1, 10) x;
1212

13+
create table replica.conflict as select x as id from generate_series(1, 1000000) x;
14+
15+
create view replica.conflict_view as select * from replica.conflict where (pg_sleep(0.01) is not null);
16+
1317
DROP ROLE IF EXISTS postgrest_test_anonymous;
1418
CREATE ROLE postgrest_test_anonymous;
1519

test/io/test_replica.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
"IO tests for PostgREST started on replicas"
22

3+
import os
4+
import time
5+
36
from postgrest import run
7+
from util import Thread
48

59

610
def test_sanity_replica(replicaenv):
@@ -22,3 +26,33 @@ def test_sanity_replica(replicaenv):
2226

2327
response = postgrest.session.get("/items?select=count")
2428
assert response.text == '[{"count":10}]'
29+
30+
31+
def test_conflict_replica(replicaenv):
32+
"Test that PostgREST does not retry the transaction on conflict with recovery (PG error code 40001)"
33+
34+
with run(env=replicaenv["replica"]) as postgrest:
35+
36+
def conflict():
37+
response = postgrest.session.get("/conflict_view")
38+
# Checks that the transaction stops and returns the 40001 error instead of retrying
39+
assert response.json()["code"] == "40001"
40+
assert response.status_code == 500
41+
42+
t = Thread(target=conflict)
43+
t.start()
44+
45+
# make sure the request has started
46+
time.sleep(0.1)
47+
48+
prienv = replicaenv["primary"]
49+
connopts = f'-d {prienv["PGDATABASE"]} -U postgres -h {prienv["PGHOST"]}'
50+
51+
# Delete the table data while the request with the lock is running to trigger the recovery conflict
52+
os.system(
53+
f'psql {connopts} --set ON_ERROR_STOP=1 -a -c "DELETE FROM replica.conflict;"'
54+
)
55+
# Vacuum the table to accelerate the process
56+
os.system(f"vacuumdb {connopts} -t replica.conflict")
57+
58+
t.join()

0 commit comments

Comments
 (0)