From 00a40765dc2b58bda34202c73ae6fbfa34760cf9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 14:31:08 -0500 Subject: [PATCH 01/12] Add failing test --- tests/util/test_logcontext.py | 85 +++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index 0ecf712bab7..ba11c04f860 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -33,6 +33,7 @@ current_context, make_deferred_yieldable, nested_logging_context, + run_coroutine_in_background, run_in_background, ) from synapse.types import ISynapseReactor @@ -332,6 +333,90 @@ async def testfunc() -> None: return self._test_run_in_background(testfunc) + @logcontext_clean + async def test_run_coroutine_in_background(self) -> None: + """ + Test `run_coroutine_in_background` + """ + clock = Clock(reactor) + + # Sanity check that we start in the sentinel context + self._check_test_key("sentinel") + + callback_finished = False + + async def competing_callback() -> None: + nonlocal callback_finished + try: + # The callback should have the same logcontext as the caller + self._check_test_key("foo") + + with LoggingContext("competing"): + await clock.sleep(0) + self._check_test_key("competing") + + self._check_test_key("foo") + finally: + # When exceptions happen, we still want to mark the callback as finished + # so that the test can complete and we see the underlying error. + callback_finished = True + + with LoggingContext("foo"): + run_coroutine_in_background(competing_callback()) + self._check_test_key("foo") + await clock.sleep(0) + self._check_test_key("foo") + + self.assertTrue( + callback_finished, + "Callback never finished which means the test probably didn't wait long enough", + ) + + # Back to the sentinel context + self._check_test_key("sentinel") + + # @logcontext_clean + async def test_run_coroutine_in_background_already_complete(self) -> None: + """ + Test `run_coroutine_in_background` with a coroutine that is already complete + """ + # Sanity check that we start in the sentinel context + self._check_test_key("sentinel") + + callback_finished = False + + async def competing_callback() -> None: + nonlocal callback_finished + try: + # The callback should have the same logcontext as the caller + self._check_test_key("foo") + + with LoggingContext("competing"): + # We `await` here but there is nothing to wait for here since the + # deferred is already complete so we should immediately continue + # executing in the same context. + await defer.succeed(None) + + self._check_test_key("competing") + + self._check_test_key("foo") + finally: + # When exceptions happen, we still want to mark the callback as finished + # so that the test can complete and we see the underlying error. + callback_finished = True + + with LoggingContext("foo"): + run_coroutine_in_background(competing_callback()) + self._check_test_key("foo") + + self.assertTrue( + callback_finished, + "Callback never finished which means the test probably didn't wait long enough", + ) + + # Back to the sentinel context + self._check_test_key("sentinel") + @logcontext_clean @defer.inlineCallbacks def test_make_deferred_yieldable( From 29b121e12c22d5d3e95f7a21fb2c0bf0eee137f3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 14:33:34 -0500 Subject: [PATCH 02/12] Fix logcontext being messed up if the deferred was already complete --- synapse/logging/context.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index aa4b98e7c76..c2aa042c08a 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -914,6 +914,12 @@ def run_coroutine_in_background( # coroutine in the background. d = defer.ensureDeferred(coroutine) + # The deferred has already completed + if d.called and not d.paused: + # The function should have maintained the logcontext, so we can + # optimise out the messing about + return d + # The function may have reset the context before returning, so we need to restore it # now. # From c1f243192775d56074e66418c12f69b4ab96a4b2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 14:40:55 -0500 Subject: [PATCH 03/12] Simplify `run_coroutine_in_background` to use `run_in_background` This way we don't have to share tricks and ensure both stay in sync with the new meta. --- synapse/logging/context.py | 41 ++++---------------------------------- 1 file changed, 4 insertions(+), 37 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index c2aa042c08a..c85f7d3810e 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -894,10 +894,9 @@ def run_coroutine_in_background( Useful for wrapping coroutines that you don't yield or await on (for instance because you want to pass it to deferred.gatherResults()). - This is a special case of `run_in_background` where we can accept a - coroutine directly rather than a function. We can do this because coroutines - do not run until called, and so calling an async function without awaiting - cannot change the log contexts. + This is a special case of `run_in_background` where we can accept a coroutine + directly rather than a function. We can do this because coroutines do not continue + running once they have yielded. This is an ergonomic helper so we can do this: ```python @@ -908,39 +907,7 @@ def run_coroutine_in_background( run_in_background(lambda: func1(arg1)) ``` """ - calling_context = current_context() - - # Wrap the coroutine in a deferred, which will have the side effect of executing the - # coroutine in the background. - d = defer.ensureDeferred(coroutine) - - # The deferred has already completed - if d.called and not d.paused: - # The function should have maintained the logcontext, so we can - # optimise out the messing about - return d - - # The function may have reset the context before returning, so we need to restore it - # now. - # - # Our goal is to have the caller logcontext unchanged after firing off the - # background task and returning. - set_current_context(calling_context) - - # The original logcontext will be restored when the deferred completes, but - # there is nothing waiting for it, so it will get leaked into the reactor (which - # would then get picked up by the next thing the reactor does). We therefore - # need to reset the logcontext here (set the `sentinel` logcontext) before - # yielding control back to the reactor. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - d.addBoth(_set_context_cb, SENTINEL_CONTEXT) - return d + return run_in_background(lambda: coroutine) T = TypeVar("T") From 2feae9aa8110f5d222443dc07065e108e127bd93 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 15:07:08 -0500 Subject: [PATCH 04/12] No longer just an optimization --- synapse/logging/context.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index c85f7d3810e..fdf6f30e07e 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -857,8 +857,11 @@ def run_in_background( # The deferred has already completed if d.called and not d.paused: - # The function should have maintained the logcontext, so we can - # optimise out the messing about + # The function should have maintained the calling logcontext, so we can avoid + # messing with it further. Additionally, if the deferred has already completed, + # then it would be a mistake to then add a deferred callback (below) to reset + # the logcontext to the sentinel logcontext as that would run immediately + # (remember our goal is to maintain the calling logcontext). return d # The function may have reset the context before returning, so we need to restore it From ba22b734de0883ad3ab7a691a2dae8e937f64bca Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 17:37:08 -0500 Subject: [PATCH 05/12] Uncomment `@logcontext_clean` See https://github.com/element-hq/synapse/pull/18964#discussion_r2373346174 --- tests/util/test_logcontext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index ba11c04f860..418665812ca 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -375,7 +375,7 @@ async def competing_callback() -> None: # Back to the sentinel context self._check_test_key("sentinel") - # @logcontext_clean + @logcontext_clean async def test_run_coroutine_in_background_already_complete(self) -> None: """ Test `run_coroutine_in_background` with a coroutine that is already complete From 7fcec49b33b1804799b7d0ed5ac535f3b187b04e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 17:46:23 -0500 Subject: [PATCH 06/12] Better comments --- synapse/logging/context.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index fdf6f30e07e..80eaafd61f1 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -802,13 +802,15 @@ def run_in_background( deferred returned by the function completes. To explain how the log contexts work here: - - When `run_in_background` is called, the current context is stored ("original"), - we kick off the background task in the current context, and we restore that - original context before returning - - When the background task finishes, we don't want to leak our context into the - reactor which would erroneously get attached to the next operation picked up by - the event loop. We add a callback to the deferred which will clear the logging - context after it finishes and yields control back to the reactor. + - When `run_in_background` is called, the calling logcontext is stored + ("original"), we kick off the background task in the current context, and we + restore that original context before returning. + - For a completed deferred, that's the end of the story. + - For an incomplete deferred, when the background task finishes, we don't want to + leak our context into the reactor which would erroneously get attached to the + next operation picked up by the event loop. We add a callback to the deferred + which will clear the logging context after it finishes and yields control back to + the reactor. Useful for wrapping functions that return a deferred or coroutine, which you don't yield or await on (for instance because you want to pass it to @@ -861,7 +863,7 @@ def run_in_background( # messing with it further. Additionally, if the deferred has already completed, # then it would be a mistake to then add a deferred callback (below) to reset # the logcontext to the sentinel logcontext as that would run immediately - # (remember our goal is to maintain the calling logcontext). + # (remember our goal is to maintain the calling logcontext when we return). return d # The function may have reset the context before returning, so we need to restore it From ade9853631543d8f732180d9784e3252f9e93fb2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 17:47:07 -0500 Subject: [PATCH 07/12] Add changelog --- changelog.d/18964.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/18964.misc diff --git a/changelog.d/18964.misc b/changelog.d/18964.misc new file mode 100644 index 00000000000..69be53ad27b --- /dev/null +++ b/changelog.d/18964.misc @@ -0,0 +1 @@ +Fix `run_coroutine_in_background(...)` incorrectly handling logcontext. From c6e814100e8f718013b345bd9c73bc7ba1400a8f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 18:18:10 -0500 Subject: [PATCH 08/12] Refactor `run_in_background` tests to be a little more straight-forward --- tests/util/test_logcontext.py | 78 +++++++++++++++++------------------ 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index 418665812ca..8252c4cc689 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -250,73 +250,73 @@ async def competing_callback() -> None: # Back to the sentinel context self._check_test_key("sentinel") - def _test_run_in_background(self, function: Callable[[], object]) -> defer.Deferred: - sentinel_context = current_context() + async def _test_run_in_background(self, function: Callable[[], object]) -> None: + clock = Clock(reactor) + + # Sanity check that we start in the sentinel context + self._check_test_key("sentinel") - callback_completed = False + callback_finished = False with LoggingContext("foo"): - # fire off function, but don't wait on it. - d2 = run_in_background(function) + # Fire off the function, but don't wait on it. + deferred = run_in_background(function) + self._check_test_key("foo") - def cb(res: object) -> object: - nonlocal callback_completed - callback_completed = True - return res + def callback(result: object) -> object: + nonlocal callback_finished + callback_finished = True + # Pass through the result + return result - d2.addCallback(cb) + # We `addBoth` because when exceptions happen, we still want to mark the + # callback as finished so that the test can complete and we see the + # underlying error. + deferred.addBoth(callback) self._check_test_key("foo") - # now wait for the function under test to have run, and check that - # the logcontext is left in a sane state. - d2 = defer.Deferred() - - def check_logcontext() -> None: - if not callback_completed: - reactor.callLater(0.01, check_logcontext) - return - - # make sure that the context was reset before it got thrown back - # into the reactor - try: - self.assertIs(current_context(), sentinel_context) - d2.callback(None) - except BaseException: - d2.errback(twisted.python.failure.Failure()) + # Now wait for the function under test to have run, and check that + # the logcontext is left in a sane state. + while not callback_finished: + await clock.sleep(0) + self._check_test_key("foo") - reactor.callLater(0.01, check_logcontext) + self.assertTrue( + callback_finished, + "Callback never finished which means the test probably didn't wait long enough", + ) - # test is done once d2 finishes - return d2 + # Back to the sentinel context + self._check_test_key("sentinel") @logcontext_clean - def test_run_in_background_with_blocking_fn(self) -> defer.Deferred: + async def test_run_in_background_with_blocking_fn(self): async def blocking_function() -> None: await Clock(reactor).sleep(0) - return self._test_run_in_background(blocking_function) + await self._test_run_in_background(blocking_function) @logcontext_clean - def test_run_in_background_with_non_blocking_fn(self) -> defer.Deferred: + async def test_run_in_background_with_non_blocking_fn(self): @defer.inlineCallbacks def nonblocking_function() -> Generator["defer.Deferred[object]", object, None]: with PreserveLoggingContext(): yield defer.succeed(None) - return self._test_run_in_background(nonblocking_function) + await self._test_run_in_background(nonblocking_function) @logcontext_clean - def test_run_in_background_with_chained_deferred(self) -> defer.Deferred: + async def test_run_in_background_with_chained_deferred(self): # a function which returns a deferred which looks like it has been # called, but is actually paused def testfunc() -> defer.Deferred: return make_deferred_yieldable(_chained_deferred_function()) - return self._test_run_in_background(testfunc) + await self._test_run_in_background(testfunc) @logcontext_clean - def test_run_in_background_with_coroutine(self) -> defer.Deferred: + async def test_run_in_background_with_coroutine(self): async def testfunc() -> None: self._check_test_key("foo") d = defer.ensureDeferred(Clock(reactor).sleep(0)) @@ -324,14 +324,14 @@ async def testfunc() -> None: await d self._check_test_key("foo") - return self._test_run_in_background(testfunc) + await self._test_run_in_background(testfunc) @logcontext_clean - def test_run_in_background_with_nonblocking_coroutine(self) -> defer.Deferred: + async def test_run_in_background_with_nonblocking_coroutine(self): async def testfunc() -> None: self._check_test_key("foo") - return self._test_run_in_background(testfunc) + await self._test_run_in_background(testfunc) @logcontext_clean async def test_run_coroutine_in_background(self) -> None: From 2ae72a6e217004d771d7e8117e0ba90e159231f1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 18:27:49 -0500 Subject: [PATCH 09/12] Better test descriptions --- tests/util/test_logcontext.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index 8252c4cc689..62946e18af6 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -317,6 +317,13 @@ def testfunc() -> defer.Deferred: @logcontext_clean async def test_run_in_background_with_coroutine(self): + """ + Test `run_in_background` with a coroutine that yields control back to the + reactor. + + This will stress the logic around incomplete deferreds in `run_in_background`. + """ + async def testfunc() -> None: self._check_test_key("foo") d = defer.ensureDeferred(Clock(reactor).sleep(0)) @@ -328,6 +335,13 @@ async def testfunc() -> None: @logcontext_clean async def test_run_in_background_with_nonblocking_coroutine(self): + """ + Test `run_in_background` with a "nonblocking" coroutine (never yields control + back to the reactor). + + This will stress the logic around completed deferreds in `run_in_background`. + """ + async def testfunc() -> None: self._check_test_key("foo") @@ -336,7 +350,10 @@ async def testfunc() -> None: @logcontext_clean async def test_run_coroutine_in_background(self) -> None: """ - Test `run_coroutine_in_background` + Test `run_coroutine_in_background` with a coroutine that yields control back to the + reactor. + + This will stress the logic around incomplete deferreds in `run_coroutine_in_background`. """ clock = Clock(reactor) @@ -378,7 +395,10 @@ async def competing_callback() -> None: @logcontext_clean async def test_run_coroutine_in_background_already_complete(self) -> None: """ - Test `run_coroutine_in_background` with a coroutine that is already complete + Test `run_coroutine_in_background` with a "nonblocking" coroutine (never yields control + back to the reactor). + + This will stress the logic around completed deferreds in `run_coroutine_in_background`. """ # Sanity check that we start in the sentinel context self._check_test_key("sentinel") From caaa6a5d5b0d9f1a0257693697b96e48297f4d57 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 18:29:56 -0500 Subject: [PATCH 10/12] Align names --- tests/util/test_logcontext.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index 62946e18af6..83e017e7ca3 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -22,7 +22,6 @@ import logging from typing import Callable, Generator, cast -import twisted.python.failure from twisted.internet import defer, reactor as _reactor from synapse.logging.context import ( @@ -393,7 +392,7 @@ async def competing_callback() -> None: self._check_test_key("sentinel") @logcontext_clean - async def test_run_coroutine_in_background_already_complete(self) -> None: + async def test_run_coroutine_in_background_with_nonblocking_coroutine(self) -> None: """ Test `run_coroutine_in_background` with a "nonblocking" coroutine (never yields control back to the reactor). From c9d5cda8575a483a1a98f91fea6718d6a3991b02 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 23 Sep 2025 18:33:01 -0500 Subject: [PATCH 11/12] Fix lints --- tests/util/test_logcontext.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index 83e017e7ca3..651dd844835 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -290,14 +290,14 @@ def callback(result: object) -> object: self._check_test_key("sentinel") @logcontext_clean - async def test_run_in_background_with_blocking_fn(self): + async def test_run_in_background_with_blocking_fn(self) -> None: async def blocking_function() -> None: await Clock(reactor).sleep(0) await self._test_run_in_background(blocking_function) @logcontext_clean - async def test_run_in_background_with_non_blocking_fn(self): + async def test_run_in_background_with_non_blocking_fn(self) -> None: @defer.inlineCallbacks def nonblocking_function() -> Generator["defer.Deferred[object]", object, None]: with PreserveLoggingContext(): @@ -306,7 +306,7 @@ def nonblocking_function() -> Generator["defer.Deferred[object]", object, None]: await self._test_run_in_background(nonblocking_function) @logcontext_clean - async def test_run_in_background_with_chained_deferred(self): + async def test_run_in_background_with_chained_deferred(self) -> None: # a function which returns a deferred which looks like it has been # called, but is actually paused def testfunc() -> defer.Deferred: @@ -315,7 +315,7 @@ def testfunc() -> defer.Deferred: await self._test_run_in_background(testfunc) @logcontext_clean - async def test_run_in_background_with_coroutine(self): + async def test_run_in_background_with_coroutine(self) -> None: """ Test `run_in_background` with a coroutine that yields control back to the reactor. @@ -333,7 +333,7 @@ async def testfunc() -> None: await self._test_run_in_background(testfunc) @logcontext_clean - async def test_run_in_background_with_nonblocking_coroutine(self): + async def test_run_in_background_with_nonblocking_coroutine(self) -> None: """ Test `run_in_background` with a "nonblocking" coroutine (never yields control back to the reactor). From 8e5935c289037b09733e82677dede61f2839a4a7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 24 Sep 2025 09:44:08 -0500 Subject: [PATCH 12/12] Clarify why we can assume the logcontext is unchanged See https://github.com/element-hq/synapse/pull/18964#discussion_r2375389840 --- synapse/logging/context.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 80eaafd61f1..b5b434f3a8d 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -859,25 +859,36 @@ def run_in_background( # The deferred has already completed if d.called and not d.paused: - # The function should have maintained the calling logcontext, so we can avoid - # messing with it further. Additionally, if the deferred has already completed, - # then it would be a mistake to then add a deferred callback (below) to reset - # the logcontext to the sentinel logcontext as that would run immediately - # (remember our goal is to maintain the calling logcontext when we return). + # If the function messes with logcontexts, we can assume it follows the Synapse + # logcontext rules (Rules for functions returning awaitables: "If the awaitable + # is already complete, the function returns with the same logcontext it started + # with."). If it function doesn't touch logcontexts at all, we can also assume + # the logcontext is unchanged. + # + # Either way, the function should have maintained the calling logcontext, so we + # can avoid messing with it further. Additionally, if the deferred has already + # completed, then it would be a mistake to then add a deferred callback (below) + # to reset the logcontext to the sentinel logcontext as that would run + # immediately (remember our goal is to maintain the calling logcontext when we + # return). return d - # The function may have reset the context before returning, so we need to restore it - # now. + # Since the function we called may follow the Synapse logcontext rules (Rules for + # functions returning awaitables: "If the awaitable is incomplete, the function + # clears the logcontext before returning"), the function may have reset the + # logcontext before returning, so we need to restore the calling logcontext now + # before we return ourselves. # # Our goal is to have the caller logcontext unchanged after firing off the # background task and returning. set_current_context(calling_context) - # The original logcontext will be restored when the deferred completes, but - # there is nothing waiting for it, so it will get leaked into the reactor (which - # would then get picked up by the next thing the reactor does). We therefore - # need to reset the logcontext here (set the `sentinel` logcontext) before - # yielding control back to the reactor. + # If the function we called is playing nice and following the Synapse logcontext + # rules, it will restore original calling logcontext when the deferred completes; + # but there is nothing waiting for it, so it will get leaked into the reactor (which + # would then get picked up by the next thing the reactor does). We therefore need to + # reset the logcontext here (set the `sentinel` logcontext) before yielding control + # back to the reactor. # # (If this feels asymmetric, consider it this way: we are # effectively forking a new thread of execution. We are