Fix logcontext problems in test_run_in_background_active_scope_still_available

MadLittleMods · MadLittleMods · commit b1c867f47d4c · 2025-10-01T17:18:10.000-05:00
This test is still expected to fail with our current `LogContextScopeManager`
implementation but there were some flaws in how we were handling
the logcontext before (like pumping the reactor in a non-sentinel logcontext)
diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py
@@ -19,13 +19,15 @@
 #
 #
 
-from typing import Awaitable, cast
+from typing import Awaitable, Dict, cast
 
 from twisted.internet import defer
 from twisted.internet.testing import MemoryReactorClock
 
 from synapse.logging.context import (
     LoggingContext,
+    PreserveLoggingContext,
+    current_context,
     make_deferred_yieldable,
     run_in_background,
 )
@@ -37,9 +39,16 @@
 )
 from synapse.util.clock import Clock
 
+from tests.server import (
+    get_clock,
+)
+
 try:
+    import opentracing
+
     from synapse.logging.scopecontextmanager import LogContextScopeManager
 except ImportError:
+    opentracing = None  # type: ignore
     LogContextScopeManager = None  # type: ignore
 
 try:
@@ -65,7 +74,7 @@ class LogContextScopeManagerTestCase(TestCase):
     opentracing backend is Jaeger.
     """
 
-    if LogContextScopeManager is None:
+    if opentracing is None or LogContextScopeManager is None:
         skip = "Requires opentracing"  # type: ignore[unreachable]
     if jaeger_client is None:
         skip = "Requires jaeger_client"  # type: ignore[unreachable]
@@ -218,6 +227,167 @@ async def root() -> None:
             [scopes[1].span, scopes[2].span, scopes[0].span],
         )
 
+    def test_run_in_background_active_scope_still_available(self) -> None:
+        """
+        Test that tasks running via `run_in_background` still have access to the
+        active tracing scope.
+
+        This is a regression test for a previous Synapse issue where the tracing scope
+        would `__exit__` and close before the `run_in_background` task completed and our
+        own previous custom `_LogContextScope.close(...)` would clear
+        `LoggingContext.scope` preventing further tracing spans from having the correct
+        parent.
+        """
+        reactor, clock = get_clock()
+
+        scope_map: Dict[str, opentracing.Scope] = {}
+
+        callback_finished = False
+
+        async def async_task() -> None:
+            nonlocal callback_finished
+            try:
+                logger.info("asdf async_task1 - %s", current_context().scope)
+                root_scope = scope_map["root"]
+                root_context = cast(jaeger_client.SpanContext, root_scope.span.context)
+
+                self.assertEqual(
+                    self._tracer.active_span,
+                    root_scope.span,
+                    "expected to inherit the root tracing scope from where this was run",
+                )
+
+                # Return control back to the reactor thread and wait an arbitrary amount
+                await clock.sleep(4)
+
+                logger.info("asdf async_task2 - %s", current_context().scope)
+
+                # This is a key part of what we're testing! In a previous version of
+                # Synapse, we would lose the active span at this point.
+                self.assertEqual(
+                    self._tracer.active_span,
+                    root_scope.span,
+                    "expected to still have a root tracing scope/span active",
+                )
+
+                # For complete-ness sake, let's also trace more sub-tasks here and assert
+                # they have the correct span parents as well (root)
+
+                # Start tracing some other sub-task.
+                #
+                # This is a key part of what we're testing! In a previous version of
+                # Synapse, it would have the incorrect span parents.
+                scope = start_active_span(
+                    "task1",
+                    tracer=self._tracer,
+                )
+                scope_map["task1"] = scope
+
+                # Ensure the span parent is pointing to the root scope
+                context = cast(jaeger_client.SpanContext, scope.span.context)
+                self.assertEqual(
+                    context.parent_id,
+                    root_context.span_id,
+                    "expected task1 parent to be the root span",
+                )
+
+                # Ensure that the active span is our new sub-task now
+                self.assertEqual(self._tracer.active_span, scope.span)
+                # Return control back to the reactor thread and wait an arbitrary amount
+                await clock.sleep(4)
+                # We should still see the active span as the scope wasn't closed yet
+                self.assertEqual(self._tracer.active_span, scope.span)
+                # Clock the scope using the context manager lifecycle (`scope.close()`)
+                with scope:
+                    pass
+            finally:
+                # When exceptions happen, we still want to mark the callback as finished
+                # so that the test can complete and we see the underlying error.
+                callback_finished = True
+
+        async def root() -> None:
+            with start_active_span(
+                "root span",
+                tracer=self._tracer,
+                # We will close this off later. We're basically just mimicking the same
+                # pattern for how we handle requests. We pass the span off to the
+                # request for it to finish.
+                finish_on_close=False,
+            ) as root_scope:
+                scope_map["root"] = root_scope
+                self.assertEqual(self._tracer.active_span, root_scope.span)
+
+                logger.info("asdf root1 - %s", current_context().scope)
+
+                # Fire-and-forget a task
+                #
+                # XXX: The root scope context manager will `__exit__` before this task
+                # completes.
+                run_in_background(async_task)
+
+                logger.info("asdf root2 - %s", current_context().scope)
+
+                # Because we used `run_in_background`, the active span should still be
+                # the root.
+                self.assertEqual(self._tracer.active_span, root_scope.span)
+
+            logger.info("asdf root3 - %s", current_context().scope)
+
+            # We shouldn't see any active spans outside of the scope
+            self.assertIsNone(self._tracer.active_span)
+
+        with LoggingContext(name="root logcontext", server_name="test_server"):
+            # Start the test off
+            d_root = defer.ensureDeferred(root())
+
+            # Pump the reactor until the tasks complete
+            #
+            # We're manually pumping the reactor (and causing any pending callbacks to
+            # be called) so we need to be in the sentinel logcontext to avoid leaking
+            # our current logcontext into the reactor (which would then get picked up
+            # and associated with the next thing the reactor does). `with
+            # PreserveLoggingContext()` will reset the logcontext to the sentinel while
+            # we're pumping the reactor in the block and return us back to our current
+            # logcontext after the block.
+            with PreserveLoggingContext():
+                reactor.pump((2,) * 8)
+                self.successResultOf(d_root)
+
+            self.assertTrue(
+                callback_finished,
+                "Callback never finished which means the test probably didn't wait long enough",
+            )
+
+            # After we see all of the tasks are done (like a request when it
+            # `_finished_processing`), let's finish our root span
+            scope_map["root"].span.finish()
+
+            # Sanity check again: We shouldn't see any active spans leftover in this
+            # this context.
+            self.assertIsNone(self._tracer.active_span)
+
+        # The spans should be reported in order of their finishing: task 1, task 2,
+        # root.
+        #
+        # We use `assertIncludes` just as an easier way to see if items are missing or
+        # added. We assert the order just below
+        self.assertIncludes(
+            set(self._reporter.get_spans()),
+            {
+                scope_map["task1"].span,
+                scope_map["root"].span,
+            },
+            exact=True,
+        )
+        # This is where we actually assert the correct order
+        self.assertEqual(
+            self._reporter.get_spans(),
+            [
+                scope_map["task1"].span,
+                scope_map["root"].span,
+            ],
+        )
+
     def test_trace_decorator_sync(self) -> None:
         """
         Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`