Skip to content

Commit 08ec8d1

Browse files
laurenceislasteve-chavez
authored andcommitted
fix: call pg_notification_queue_usage() automatically when the LISTEN channel bug is detected (#4858)
There's a PostgreSQL bug that doesn't let any listener to register in the DB: https://www.postgresql.org/message-id/flat/CAK98qZ3wZLE-RZJN_Y%2BTFjiTRPPFPBwNBpBi5K5CU8hUHkzDpw%40mail.gmail.com The only workaround is to advance the async notification queue tail, which can be done by executing: "SELECT pg_notification_queue_usage();". Before we just logged a HINT with this suggestion, but now we call that function directly and then let the listener to automatically recover. No automated tests were added here as it would be too complex and this is a PostgreSQL bug. But this was manually tested following the steps on #4581 (comment)
1 parent b963cb7 commit 08ec8d1

3 files changed

Lines changed: 19 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file. From versio
88

99
- Fix login with uppercase and mixed case role names by @taimoorzaeem in #4678
1010
- Restore Listener query shape so it can be found in pg_stat_activity by @mkleczek in #4857 #4859
11+
- The LISTEN channel now automatically recovers when it stops working due to a PostgreSQL bug @laurenceisla in #3147
1112

1213
## [14.10] - 2026-04-16
1314

src/PostgREST/Listener.hs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ import qualified Hasql.Connection as SQL
1010
import qualified Hasql.Notifications as SQL
1111
import PostgREST.AppState (AppState, getConfig)
1212
import PostgREST.Config (AppConfig (..))
13-
import PostgREST.Observation (Observation (..),
14-
isDbListenerBug)
13+
import PostgREST.Observation (Observation (..))
1514
import PostgREST.Version (prettyVersion)
1615

1716
import qualified PostgREST.AppState as AppState
@@ -20,6 +19,7 @@ import qualified PostgREST.Config as Config
2019
import Control.Arrow ((&&&))
2120
import Data.Bitraversable (bisequence)
2221
import Data.Either.Combinators (whenRight)
22+
import qualified Data.Text as T
2323
import qualified Database.PostgreSQL.LibPQ as LibPQ
2424
import qualified Hasql.Session as SQL
2525
import PostgREST.Config.Database (queryPgVersion)
@@ -31,20 +31,20 @@ runListener :: AppState -> IO ()
3131
runListener appState = do
3232
AppConfig{..} <- getConfig appState
3333
when configDbChannelEnabled $
34-
void . forkIO . void $ retryingListen appState
34+
void . forkIO . void $ retryingListen appState False
3535

3636
-- | Starts a LISTEN connection and handles notifications. It recovers with exponential backoff with a cap of 32 seconds, if the LISTEN connection is lost.
3737
-- | This function never returns (but can throw) and return type enforces that.
38-
retryingListen :: AppState -> IO Void
39-
retryingListen appState = do
38+
retryingListen :: AppState -> Bool -> IO Void
39+
retryingListen appState hasDbListenerBug = do
4040
AppConfig{..} <- AppState.getConfig appState
4141
let
4242
dbChannel = toS configDbChannel
4343
onError err = do
4444
AppState.putIsListenerOn appState False
4545
observer $ DBListenFail dbChannel (Right err)
4646
when (isDbListenerBug err) $
47-
observer DBListenBugHint
47+
observer DBListenBugCallQueryFix
4848
unless configDbPoolAutomaticRecovery $
4949
killThread mainThreadId
5050

@@ -55,7 +55,7 @@ retryingListen appState = do
5555
unless (delay == maxDelay) $
5656
AppState.putNextListenerDelay appState (delay * 2)
5757
-- loop running the listener
58-
retryingListen appState
58+
retryingListen appState (isDbListenerBug err)
5959

6060
-- Execute the listener with with error handling
6161
handle onError $ do
@@ -70,6 +70,7 @@ retryingListen appState = do
7070
Right db -> do
7171
(pqHost, pqPort) <- SQL.withLibPQConnection db $ bisequence . (LibPQ.host &&& LibPQ.port)
7272
pgFullName <- SQL.run (queryPgVersion False) db >>= either throwIO (pure . pgvFullName)
73+
when hasDbListenerBug $ SQL.run callNotifQueryUsage db >>= either throwIO pure
7374
SQL.listen db $ SQL.toPgIdentifier dbChannel
7475

7576
AppState.putIsListenerOn appState True
@@ -106,3 +107,10 @@ retryingListen appState = do
106107
AppState.schemaCacheLoader appState
107108

108109
releaseConnection = void . forkIO . handle (observer . DBListenerConnectionCleanupFail) . SQL.release
110+
111+
isDbListenerBug e = "could not access status of transaction" `T.isInfixOf` show e
112+
113+
-- Used to fix a Postgres bug in the listener, see: https://github.com/PostgREST/postgrest/issues/3147#issuecomment-3494591361
114+
-- This query advances the async notification query tail, which solves this issue.
115+
callNotifQueryUsage :: SQL.Session ()
116+
callNotifQueryUsage = SQL.sql "SELECT pg_notification_queue_usage();"

src/PostgREST/Observation.hs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ module PostgREST.Observation
1313
, observationMessage
1414
, ObservationHandler
1515
, showOnSingleLine
16-
, isDbListenerBug
1716
) where
1817

1918
import qualified Data.ByteString.Lazy as LBS
@@ -48,7 +47,7 @@ data Observation
4847
| DBListenStart (Maybe ByteString) (Maybe ByteString) Text Text -- host, port, version string, channel
4948
| DBListenFail Text (Either SQL.ConnectionError SomeException)
5049
| DBListenRetry Int
51-
| DBListenBugHint -- https://github.com/PostgREST/postgrest/issues/3147
50+
| DBListenBugCallQueryFix
5251
| DBListenerGotSCacheMsg ByteString
5352
| DBListenerGotConfigMsg ByteString
5453
| DBListenerConnectionCleanupFail SomeException
@@ -122,8 +121,8 @@ observationMessage = \case
122121
either showListenerConnError showListenerException listenErr
123122
DBListenRetry delay ->
124123
"Retrying listening for database notifications in " <> (show delay::Text) <> " seconds..."
125-
DBListenBugHint ->
126-
"HINT: This is likely a bug in the notification queue, try executing the following to solve it: select pg_notification_queue_usage();"
124+
DBListenBugCallQueryFix ->
125+
"This is likely a PostgreSQL bug in the notification queue, executing the following to try to solve it: SELECT pg_notification_queue_usage();"
127126
DBListenerGotSCacheMsg channel ->
128127
"Received a schema cache reload message on the " <> show channel <> " channel"
129128
DBListenerGotConfigMsg channel ->
@@ -188,6 +187,3 @@ observationMessage = \case
188187

189188
showOnSingleLine :: Char -> Text -> Text
190189
showOnSingleLine split txt = T.intercalate " " $ T.filter (/= split) <$> T.lines txt -- the errors from hasql-notifications come intercalated with "\t\n"
191-
192-
isDbListenerBug :: SomeException -> Bool
193-
isDbListenerBug e = "could not access status of transaction" `T.isInfixOf` show e

0 commit comments

Comments
 (0)