From d3f4391b2caf954032b09a927cd5d6b078a338f4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 12:34:42 -0500 Subject: [PATCH 01/15] Explain the `sentinel` context better --- docs/log_contexts.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/log_contexts.md b/docs/log_contexts.md index 9d087d11ef2..0403b274a30 100644 --- a/docs/log_contexts.md +++ b/docs/log_contexts.md @@ -59,6 +59,23 @@ def do_request_handling(): logger.debug("phew") ``` +### The `sentinel` context + +The default context is `synapse.logging.context.SENTINEL_CONTEXT`, which is an empty +sentinel value to represent the root context. This is what is used when there is no +other context set. The phrase "clear the logcontext" means to set the current context to +the `sentinel` context. + +No CPU/database usage metrics are recorded against the `sentinel` context. + +Ideally, nothing from the Synapse homeserver would be logged against the `sentinel` +context as we want to know which server the logs came from. In practice, this is not +always the case yet especially outside of request handling. + +Whenever yielding control back to the reactor (event loop), the `sentinel` logcontext is +used to avoid leaking the current logcontext into the other reactor which would +erroneously get attached to the next operation picked up by the event loop. + ## Using logcontexts with awaitables Awaitables break the linear flow of code so that there is no longer a single entry point From 8de6a8620bf003cf0c4215359f82c7fb610e34ee Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 12:35:22 -0500 Subject: [PATCH 02/15] Clarify when to use the `sentinel` context --- docs/log_contexts.md | 2 +- synapse/logging/context.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/log_contexts.md b/docs/log_contexts.md index 0403b274a30..ae3c3fee70a 100644 --- a/docs/log_contexts.md +++ b/docs/log_contexts.md @@ -73,7 +73,7 @@ context as we want to know which server the logs came from. In practice, this is always the case yet especially outside of request handling. Whenever yielding control back to the reactor (event loop), the `sentinel` logcontext is -used to avoid leaking the current logcontext into the other reactor which would +set to avoid leaking the current logcontext into the other reactor which would erroneously get attached to the next operation picked up by the event loop. ## Using logcontexts with awaitables diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 7ea3f3d726d..984e440f7f0 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -227,7 +227,16 @@ class ContextRequest: class _Sentinel: - """Sentinel to represent the root context""" + """ + Sentinel to represent the root context + + This should only be used for tasks outside of Synapse like when we yield control + back to the Twisted reactor (event loop) so we don't leak the current logging + context to other tasks that are scheduled next in the event loop. + + Nothing from the Synapse homeserver should be logged with the sentinel context. i.e. + we should always know which server the logs are coming from. + """ __slots__ = ["previous_context", "finished", "request", "tag"] From af1770d37af0d0a235d6583715824dec52edce8e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 15:13:01 -0500 Subject: [PATCH 03/15] Explain logging contexts in `run_as_background_process` --- synapse/metrics/background_process_metrics.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index f7f2d88885e..d349f72a73c 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -228,6 +228,11 @@ def run_as_background_process( clock.looping_call and friends (or for firing-and-forgetting in the middle of a normal synapse async function). + Because the returned Deferred does not follow the synapse logcontext rules, awaiting + the result of this function will result in the log context being cleared. In order + to properly await the result of this function and maintain the current log context, + use `make_deferred_yieldable`. + Args: desc: a description for this background process type server_name: The homeserver name that this background process is being run for @@ -280,7 +285,20 @@ async def run() -> Optional[R]: name=desc, **{SERVER_NAME_LABEL: server_name} ).dec() + # To explain how the log contexts work here: + # - When this function is called, the current context is stored (using + # `PreserveLoggingContext`), we kick off the background task, and we restore the + # original context before returning (also part of `PreserveLoggingContext`). + # - When the background task finishes, we don't want to leak our background context + # into the reactor which would erroneously get attached to the next operation + # picked up by the event loop. We use `PreserveLoggingContext` to set the + # `sentinel` context and means the new `BackgroundProcessLoggingContext` will + # remember the `sentinel` context as its previous context to return to when it + # exits and yields control back to the reactor. with PreserveLoggingContext(): + # The async `run` task is wrapped in a deferred, which will have the side effect + # of executing the coroutine. + # # Note that we return a Deferred here so that it can be used in a # looping_call and other places that expect a Deferred. return defer.ensureDeferred(run()) From 064ca822eb20d3f6bd7ecbc3bfd492343205fb01 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 13:21:20 -0500 Subject: [PATCH 04/15] Add potential future simplication of `run_as_background_process` --- synapse/metrics/background_process_metrics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index d349f72a73c..4487d223375 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -295,6 +295,9 @@ async def run() -> Optional[R]: # `sentinel` context and means the new `BackgroundProcessLoggingContext` will # remember the `sentinel` context as its previous context to return to when it # exits and yields control back to the reactor. + # + # TODO: I think we could simplify this whole block by using `return run_in_background(run)` + # which appears to give equivalent behaviour. with PreserveLoggingContext(): # The async `run` task is wrapped in a deferred, which will have the side effect # of executing the coroutine. From 108e893436c3e4e4a204da6c992741365370bac8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 13:34:52 -0500 Subject: [PATCH 05/15] Add log context explanation to `run_in_background` --- synapse/logging/context.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 984e440f7f0..9bb5f3ba257 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -793,6 +793,14 @@ def run_in_background( return from the function, and that the sentinel context is set once the deferred returned by the function completes. + To explain how the log contexts work here: + - When this function is called, the current context is stored ("original"), we kick + off the background task, and we restore that original context before returning + - When the background task finishes, we don't want to leak our context into the + reactor which would erroneously get attached to the next operation picked up by + the event loop. We add a callback to the deferred which will clear the logging + context after it finishes and yields control back to the reactor. + Useful for wrapping functions that return a deferred or coroutine, which you don't yield or await on (for instance because you want to pass it to deferred.gatherResults()). @@ -850,6 +858,9 @@ def run_in_background( # which is supposed to have a single entry and exit point. But # by spawning off another deferred, we are effectively # adding a new exit point.) + # + # TODO: Why aren't we using `sentinel` context here. We can't guarantee that `ctx` + # is `sentinel` here? d.addBoth(_set_context_cb, ctx) return d From 3cf60d7746224ea03eb7b905bcea9a8672fbf11e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 15:50:51 -0500 Subject: [PATCH 06/15] Explain `make_deferred_yieldable` --- synapse/logging/context.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 9bb5f3ba257..279e7f2341f 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -908,24 +908,38 @@ def run_coroutine_in_background( def make_deferred_yieldable(deferred: "defer.Deferred[T]") -> "defer.Deferred[T]": - """Given a deferred, make it follow the Synapse logcontext rules: + """ + Given a deferred, make it follow the Synapse logcontext rules: - If the deferred has completed, essentially does nothing (just returns another - completed deferred with the result/failure). + - If the deferred has completed, essentially does nothing (just returns another + completed deferred with the result/failure). + - If the deferred has not yet completed, resets the logcontext before returning a + incomplete deferred. Then, when the deferred completes, restores the current + logcontext before running callbacks/errbacks. - If the deferred has not yet completed, resets the logcontext before - returning a deferred. Then, when the deferred completes, restores the - current logcontext before running callbacks/errbacks. + This means the resultant deferred can be awaited without leaking the current + logcontext to the reactor (which would then get erroneously picked up by the next + thing the reactor does), and also means that the logcontext is preserved when the + deferred completes. - (This is more-or-less the opposite operation to run_in_background.) + (This is more-or-less the opposite operation to run_in_background in terms of how it + handles log contexts.) """ + # The deferred has already completed if deferred.called and not deferred.paused: # it looks like this deferred is ready to run any callbacks we give it # immediately. We may as well optimise out the logcontext faffery. return deferred - # ok, we can't be sure that a yield won't block, so let's reset the - # logcontext, and add a callback to the deferred to restore it. + # Our goal is to have the caller logcontext unchanged after they yield/await the + # returned deferred. + # + # When the caller yield/await's the returned deferred, it may yield + # control back to the reactor. To avoid leaking the current logcontext to the + # reactor (which would then get erroneously picked up by the next thing the reactor + # does) while the deferred runs in the reactor event loop, we reset the logcontext + # and add a callback to the deferred to restore it so the caller's logcontext is + # active when the deferred completes. prev_context = set_current_context(SENTINEL_CONTEXT) deferred.addBoth(_set_context_cb, prev_context) return deferred From 21f8fcc2c2ef883239c6fee575a154d64ae9bddf Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 16:00:35 -0500 Subject: [PATCH 07/15] Docs: clear/reset log context --- docs/log_contexts.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/log_contexts.md b/docs/log_contexts.md index ae3c3fee70a..ea99e1749b3 100644 --- a/docs/log_contexts.md +++ b/docs/log_contexts.md @@ -63,8 +63,8 @@ def do_request_handling(): The default context is `synapse.logging.context.SENTINEL_CONTEXT`, which is an empty sentinel value to represent the root context. This is what is used when there is no -other context set. The phrase "clear the logcontext" means to set the current context to -the `sentinel` context. +other context set. The phrase "clear/reset the logcontext" means to set the current +context to the `sentinel` context. No CPU/database usage metrics are recorded against the `sentinel` context. From 53c5ce742c8a6345cc6e4886054dc051362f49d2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 16:02:06 -0500 Subject: [PATCH 08/15] WIP: `run_in_background` matches comments but context is lost --- synapse/logging/context.py | 146 +++++++++++++++++++------------------ 1 file changed, 76 insertions(+), 70 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 279e7f2341f..2b2c507deb8 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -812,57 +812,62 @@ def run_in_background( `f` doesn't raise any deferred exceptions, otherwise a scary-looking CRITICAL error about an unhandled error will be logged without much indication about where it came from. - """ - current = current_context() - try: - res = f(*args, **kwargs) - except Exception: - # the assumption here is that the caller doesn't want to be disturbed - # by synchronous exceptions, so let's turn them into Failures. - return defer.fail() - - # `res` may be a coroutine, `Deferred`, some other kind of awaitable, or a plain - # value. Convert it to a `Deferred`. - d: "defer.Deferred[R]" - if isinstance(res, typing.Coroutine): - # Wrap the coroutine in a `Deferred`. - d = defer.ensureDeferred(res) - elif isinstance(res, defer.Deferred): - d = res - elif isinstance(res, Awaitable): - # `res` is probably some kind of completed awaitable, such as a `DoneAwaitable` - # or `Future` from `make_awaitable`. - d = defer.ensureDeferred(_unwrap_awaitable(res)) - else: - # `res` is a plain value. Wrap it in a `Deferred`. - d = defer.succeed(res) - - if d.called and not d.paused: - # The function should have maintained the logcontext, so we can - # optimise out the messing about - return d - # The function may have reset the context before returning, so - # we need to restore it now. - ctx = set_current_context(current) - - # The original context will be restored when the deferred - # completes, but there is nothing waiting for it, so it will - # get leaked into the reactor or some other function which - # wasn't expecting it. We therefore need to reset the context - # here. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) + Returns: + Deferred which returns the result of func, or `None` if func raises. + Note that the returned Deferred does not follow the synapse logcontext + rules. + """ + # Our goal is to have the caller logcontext unchanged after firing off the + # background task and returning. # - # TODO: Why aren't we using `sentinel` context here. We can't guarantee that `ctx` - # is `sentinel` here? - d.addBoth(_set_context_cb, ctx) - return d + # When this function is called, the current context is stored (using + # `PreserveLoggingContext`), we kick off the background task, and we restore the + # original context before returning (also part of `PreserveLoggingContext`). + with PreserveLoggingContext(current_context()): + try: + res = f(*args, **kwargs) + except Exception: + # the assumption here is that the caller doesn't want to be disturbed + # by synchronous exceptions, so let's turn them into Failures. + return defer.fail() + + # `res` may be a coroutine, `Deferred`, some other kind of awaitable, or a plain + # value. Convert it to a `Deferred`. + d: "defer.Deferred[R]" + if isinstance(res, typing.Coroutine): + # Wrap the coroutine in a `Deferred`. + d = defer.ensureDeferred(res) + elif isinstance(res, defer.Deferred): + d = res + elif isinstance(res, Awaitable): + # `res` is probably some kind of completed awaitable, such as a `DoneAwaitable` + # or `Future` from `make_awaitable`. + d = defer.ensureDeferred(_unwrap_awaitable(res)) + else: + # `res` is a plain value. Wrap it in a `Deferred`. + d = defer.succeed(res) + + # The deferred has already completed + if d.called and not d.paused: + # The function should have maintained the logcontext, so we can + # optimise out the messing about + return d + + # The original logcontext will be restored when the deferred completes, but + # there is nothing waiting for it, so it will get leaked into the reactor (which + # would then get picked up by the next thing the reactor does). We therefore + # need to reset the logcontext here (set the `sentinel` logcontext) before + # yielding control back to the reactor. + # + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + d.addBoth(_set_context_cb, SENTINEL_CONTEXT) + return d def run_coroutine_in_background( @@ -880,28 +885,29 @@ def run_coroutine_in_background( do not run until called, and so calling an async function without awaiting cannot change the log contexts. """ - - current = current_context() - d = defer.ensureDeferred(coroutine) - - # The function may have reset the context before returning, so - # we need to restore it now. - ctx = set_current_context(current) - - # The original context will be restored when the deferred - # completes, but there is nothing waiting for it, so it will - # get leaked into the reactor or some other function which - # wasn't expecting it. We therefore need to reset the context - # here. + # Our goal is to have the caller logcontext unchanged after firing off the + # background task and returning. # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - d.addBoth(_set_context_cb, ctx) - return d + # When this function is called, the current context is stored (using + # `PreserveLoggingContext`), we kick off the background task, and we restore the + # original context before returning (also part of `PreserveLoggingContext`). + with PreserveLoggingContext(current_context()): + d = defer.ensureDeferred(coroutine) + + # The original logcontext will be restored when the deferred completes, but + # there is nothing waiting for it, so it will get leaked into the reactor (which + # would then get picked up by the next thing the reactor does). We therefore + # need to reset the logcontext here (set the `sentinel` logcontext) before + # yielding control back to the reactor. + # + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + d.addBoth(_set_context_cb, SENTINEL_CONTEXT) + return d T = TypeVar("T") From 8b1be4e369f43795ee5001822dbddf0f0a14c3cb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 16:15:07 -0500 Subject: [PATCH 09/15] Simplify `run_as_background_process` --- synapse/metrics/background_process_metrics.py | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 4487d223375..c07975526ec 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -47,7 +47,7 @@ from synapse.logging.context import ( ContextResourceUsage, LoggingContext, - PreserveLoggingContext, + run_in_background, ) from synapse.logging.opentracing import SynapseTags, start_active_span from synapse.metrics import SERVER_NAME_LABEL @@ -285,26 +285,7 @@ async def run() -> Optional[R]: name=desc, **{SERVER_NAME_LABEL: server_name} ).dec() - # To explain how the log contexts work here: - # - When this function is called, the current context is stored (using - # `PreserveLoggingContext`), we kick off the background task, and we restore the - # original context before returning (also part of `PreserveLoggingContext`). - # - When the background task finishes, we don't want to leak our background context - # into the reactor which would erroneously get attached to the next operation - # picked up by the event loop. We use `PreserveLoggingContext` to set the - # `sentinel` context and means the new `BackgroundProcessLoggingContext` will - # remember the `sentinel` context as its previous context to return to when it - # exits and yields control back to the reactor. - # - # TODO: I think we could simplify this whole block by using `return run_in_background(run)` - # which appears to give equivalent behaviour. - with PreserveLoggingContext(): - # The async `run` task is wrapped in a deferred, which will have the side effect - # of executing the coroutine. - # - # Note that we return a Deferred here so that it can be used in a - # looping_call and other places that expect a Deferred. - return defer.ensureDeferred(run()) + return run_in_background(run) P = ParamSpec("P") From 9cd37e03880b3c4505ea5b2453aa9db91d3b5c1e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 16:19:17 -0500 Subject: [PATCH 10/15] Add changelog --- changelog.d/18900.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/18900.misc diff --git a/changelog.d/18900.misc b/changelog.d/18900.misc new file mode 100644 index 00000000000..d7d8b47eb0c --- /dev/null +++ b/changelog.d/18900.misc @@ -0,0 +1 @@ +Better explain how we manage the logcontext in `run_in_background(...)` and `run_as_background_process(...)`. From 2194f6f6b8faca6cd060e0409c6866de56ef678f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 17:05:31 -0500 Subject: [PATCH 11/15] Fix `run_in_background` throwing warning about losing context See https://github.com/element-hq/synapse/pull/18900#discussion_r2331418989 --- synapse/logging/context.py | 142 ++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 67 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 2b2c507deb8..8a36a913c45 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -818,56 +818,60 @@ def run_in_background( Note that the returned Deferred does not follow the synapse logcontext rules. """ + calling_context = current_context() + try: + res = f(*args, **kwargs) + except Exception: + # the assumption here is that the caller doesn't want to be disturbed + # by synchronous exceptions, so let's turn them into Failures. + return defer.fail() + + # `res` may be a coroutine, `Deferred`, some other kind of awaitable, or a plain + # value. Convert it to a `Deferred`. + # + # Wrapping the value in a deferred has the side effect of executing the coroutine, + # if it is one. If it's already a deferred, then we can just use that. + d: "defer.Deferred[R]" + if isinstance(res, typing.Coroutine): + # Wrap the coroutine in a `Deferred`. + d = defer.ensureDeferred(res) + elif isinstance(res, defer.Deferred): + d = res + elif isinstance(res, Awaitable): + # `res` is probably some kind of completed awaitable, such as a `DoneAwaitable` + # or `Future` from `make_awaitable`. + d = defer.ensureDeferred(_unwrap_awaitable(res)) + else: + # `res` is a plain value. Wrap it in a `Deferred`. + d = defer.succeed(res) + + # The deferred has already completed + if d.called and not d.paused: + # The function should have maintained the logcontext, so we can + # optimise out the messing about + return d + + # The function may have reset the context before returning, so we need to restore it + # now. + # # Our goal is to have the caller logcontext unchanged after firing off the # background task and returning. + set_current_context(calling_context) + + # The original logcontext will be restored when the deferred completes, but + # there is nothing waiting for it, so it will get leaked into the reactor (which + # would then get picked up by the next thing the reactor does). We therefore + # need to reset the logcontext here (set the `sentinel` logcontext) before + # yielding control back to the reactor. # - # When this function is called, the current context is stored (using - # `PreserveLoggingContext`), we kick off the background task, and we restore the - # original context before returning (also part of `PreserveLoggingContext`). - with PreserveLoggingContext(current_context()): - try: - res = f(*args, **kwargs) - except Exception: - # the assumption here is that the caller doesn't want to be disturbed - # by synchronous exceptions, so let's turn them into Failures. - return defer.fail() - - # `res` may be a coroutine, `Deferred`, some other kind of awaitable, or a plain - # value. Convert it to a `Deferred`. - d: "defer.Deferred[R]" - if isinstance(res, typing.Coroutine): - # Wrap the coroutine in a `Deferred`. - d = defer.ensureDeferred(res) - elif isinstance(res, defer.Deferred): - d = res - elif isinstance(res, Awaitable): - # `res` is probably some kind of completed awaitable, such as a `DoneAwaitable` - # or `Future` from `make_awaitable`. - d = defer.ensureDeferred(_unwrap_awaitable(res)) - else: - # `res` is a plain value. Wrap it in a `Deferred`. - d = defer.succeed(res) - - # The deferred has already completed - if d.called and not d.paused: - # The function should have maintained the logcontext, so we can - # optimise out the messing about - return d - - # The original logcontext will be restored when the deferred completes, but - # there is nothing waiting for it, so it will get leaked into the reactor (which - # would then get picked up by the next thing the reactor does). We therefore - # need to reset the logcontext here (set the `sentinel` logcontext) before - # yielding control back to the reactor. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - d.addBoth(_set_context_cb, SENTINEL_CONTEXT) - return d + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + d.addBoth(_set_context_cb, SENTINEL_CONTEXT) + return d def run_coroutine_in_background( @@ -885,29 +889,33 @@ def run_coroutine_in_background( do not run until called, and so calling an async function without awaiting cannot change the log contexts. """ + calling_context = current_context() + + # Wrap the coroutine in a deferred, which will have the side effect of executing the + # coroutine in the background. + d = defer.ensureDeferred(coroutine) + + # The function may have reset the context before returning, so we need to restore it + # now. + # # Our goal is to have the caller logcontext unchanged after firing off the # background task and returning. + set_current_context(calling_context) + + # The original logcontext will be restored when the deferred completes, but + # there is nothing waiting for it, so it will get leaked into the reactor (which + # would then get picked up by the next thing the reactor does). We therefore + # need to reset the logcontext here (set the `sentinel` logcontext) before + # yielding control back to the reactor. # - # When this function is called, the current context is stored (using - # `PreserveLoggingContext`), we kick off the background task, and we restore the - # original context before returning (also part of `PreserveLoggingContext`). - with PreserveLoggingContext(current_context()): - d = defer.ensureDeferred(coroutine) - - # The original logcontext will be restored when the deferred completes, but - # there is nothing waiting for it, so it will get leaked into the reactor (which - # would then get picked up by the next thing the reactor does). We therefore - # need to reset the logcontext here (set the `sentinel` logcontext) before - # yielding control back to the reactor. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - d.addBoth(_set_context_cb, SENTINEL_CONTEXT) - return d + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + d.addBoth(_set_context_cb, SENTINEL_CONTEXT) + return d T = TypeVar("T") From 0c4fa2a4236e374774850aff2751fd3caa81e9a2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Sep 2025 17:20:10 -0500 Subject: [PATCH 12/15] Revert simplification: Why can't use use `run_in_background` in `run_as_background_process`? We start to see some of the tests fail: ``` SYNAPSE_TEST_LOG_LEVEL=INFO poetry run trial tests.crypto.test_keyring ``` --- synapse/metrics/background_process_metrics.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index c07975526ec..c6ee21d42aa 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -47,7 +47,7 @@ from synapse.logging.context import ( ContextResourceUsage, LoggingContext, - run_in_background, + PreserveLoggingContext, ) from synapse.logging.opentracing import SynapseTags, start_active_span from synapse.metrics import SERVER_NAME_LABEL @@ -229,9 +229,9 @@ def run_as_background_process( normal synapse async function). Because the returned Deferred does not follow the synapse logcontext rules, awaiting - the result of this function will result in the log context being cleared. In order - to properly await the result of this function and maintain the current log context, - use `make_deferred_yieldable`. + the result of this function will result in the log context being cleared (bad). In + order to properly await the result of this function and maintain the current log + context, use `make_deferred_yieldable`. Args: desc: a description for this background process type @@ -285,7 +285,22 @@ async def run() -> Optional[R]: name=desc, **{SERVER_NAME_LABEL: server_name} ).dec() - return run_in_background(run) + # To explain how the log contexts work here: + # - When this function is called, the current context is stored (using + # `PreserveLoggingContext`), we kick off the background task, and we restore the + # original context before returning (also part of `PreserveLoggingContext`). + # - When the background task finishes, we don't want to leak our background context + # into the reactor which would erroneously get attached to the next operation + # picked up by the event loop. We use `PreserveLoggingContext` to set the + # `sentinel` context and means the new `BackgroundProcessLoggingContext` will + # remember the `sentinel` context as its previous context to return to when it + # exits and yields control back to the reactor. + # + # TODO: Why can't we simplify to using `return run_in_background(run)`? + with PreserveLoggingContext(): + # Note that we return a Deferred here so that it can be used in a + # looping_call and other places that expect a Deferred. + return defer.ensureDeferred(run()) P = ParamSpec("P") From 5794e8f4eb49d2fadbe670de6170ea00fa07e94b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Sep 2025 10:04:20 -0500 Subject: [PATCH 13/15] Explain `make_deferred_yieldable`/`with PreserveLoggingContext():` equivalency See https://github.com/element-hq/synapse/pull/18900#discussion_r2331888692 --- synapse/logging/context.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 8a36a913c45..214a246d73f 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -625,9 +625,17 @@ def filter(self, record: logging.LogRecord) -> Literal[True]: class PreserveLoggingContext: - """Context manager which replaces the logging context + """ + Context manager which replaces the logging context + + The previous logging context is restored on exit. - The previous logging context is restored on exit.""" + `make_deferred_yieldable` is pretty equivalent to using `with + PreserveLoggingContext():` (using the default sentinel context), i.e. it clears the + logcontext before awaiting (and so before execution passes back to the reactor) and + restores the old context once the awaitable completes (execution passes from the + reactor back to the code). + """ __slots__ = ["_old_context", "_new_context"] @@ -938,6 +946,11 @@ def make_deferred_yieldable(deferred: "defer.Deferred[T]") -> "defer.Deferred[T] (This is more-or-less the opposite operation to run_in_background in terms of how it handles log contexts.) + + Pretty much equivalent to using `with PreserveLoggingContext():`, i.e. it clears the + logcontext before awaiting (and so before execution passes back to the reactor) and + restores the old context once the awaitable completes (execution passes from the + reactor back to the code). """ # The deferred has already completed if deferred.called and not deferred.paused: From 0fb74609b597059233deec585d05526da854873c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Sep 2025 11:27:43 -0500 Subject: [PATCH 14/15] Explain ergonomic benefit of `run_coroutine_in_background` See https://github.com/element-hq/synapse/pull/18900#discussion_r2331556786 --- synapse/logging/context.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 214a246d73f..6eaa19d2f6a 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -896,6 +896,15 @@ def run_coroutine_in_background( coroutine directly rather than a function. We can do this because coroutines do not run until called, and so calling an async function without awaiting cannot change the log contexts. + + This is an ergonomic helper so we can do this: + ```python + run_coroutine_in_background(func1(arg1)) + ``` + Rather than having to do this: + ```python + run_in_background(lambda: func1(arg1)) + ``` """ calling_context = current_context() From d35a9fffdfe419878e115c66a0cb64c1d2cdea45 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Sep 2025 11:40:09 -0500 Subject: [PATCH 15/15] Incorporate "in other words" about reactor using sentinel context See https://github.com/element-hq/synapse/pull/18900#discussion_r2334019641 --- docs/log_contexts.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/docs/log_contexts.md b/docs/log_contexts.md index ea99e1749b3..4e0c0e64f79 100644 --- a/docs/log_contexts.md +++ b/docs/log_contexts.md @@ -61,20 +61,25 @@ def do_request_handling(): ### The `sentinel` context -The default context is `synapse.logging.context.SENTINEL_CONTEXT`, which is an empty -sentinel value to represent the root context. This is what is used when there is no -other context set. The phrase "clear/reset the logcontext" means to set the current -context to the `sentinel` context. +The default logcontext is `synapse.logging.context.SENTINEL_CONTEXT`, which is an empty +sentinel value to represent the root logcontext. This is what is used when there is no +other logcontext set. The phrase "clear/reset the logcontext" means to set the current +logcontext to the `sentinel` logcontext. -No CPU/database usage metrics are recorded against the `sentinel` context. +No CPU/database usage metrics are recorded against the `sentinel` logcontext. Ideally, nothing from the Synapse homeserver would be logged against the `sentinel` -context as we want to know which server the logs came from. In practice, this is not +logcontext as we want to know which server the logs came from. In practice, this is not always the case yet especially outside of request handling. -Whenever yielding control back to the reactor (event loop), the `sentinel` logcontext is -set to avoid leaking the current logcontext into the other reactor which would -erroneously get attached to the next operation picked up by the event loop. +Global things outside of Synapse (e.g. Twisted reactor code) should run in the +`sentinel` logcontext. It's only when it calls into application code that a logcontext +gets activated. This means the reactor should be started in the `sentinel` logcontext, +and any time an awaitable yields control back to the reactor, it should reset the +logcontext to be the `sentinel` logcontext. This is important to avoid leaking the +current logcontext to the reactor (which would then get picked up and associated with +the next thing the reactor does). + ## Using logcontexts with awaitables