Skip to content

Commit b1c867f

Browse files
committed
Fix logcontext problems in test_run_in_background_active_scope_still_available
This test is still expected to fail with our current `LogContextScopeManager` implementation but there were some flaws in how we were handling the logcontext before (like pumping the reactor in a non-sentinel logcontext)
1 parent 9c4ea3c commit b1c867f

File tree

1 file changed

+172
-2
lines changed

1 file changed

+172
-2
lines changed

tests/logging/test_opentracing.py

Lines changed: 172 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@
1919
#
2020
#
2121

22-
from typing import Awaitable, cast
22+
from typing import Awaitable, Dict, cast
2323

2424
from twisted.internet import defer
2525
from twisted.internet.testing import MemoryReactorClock
2626

2727
from synapse.logging.context import (
2828
LoggingContext,
29+
PreserveLoggingContext,
30+
current_context,
2931
make_deferred_yieldable,
3032
run_in_background,
3133
)
@@ -37,9 +39,16 @@
3739
)
3840
from synapse.util.clock import Clock
3941

42+
from tests.server import (
43+
get_clock,
44+
)
45+
4046
try:
47+
import opentracing
48+
4149
from synapse.logging.scopecontextmanager import LogContextScopeManager
4250
except ImportError:
51+
opentracing = None # type: ignore
4352
LogContextScopeManager = None # type: ignore
4453

4554
try:
@@ -65,7 +74,7 @@ class LogContextScopeManagerTestCase(TestCase):
6574
opentracing backend is Jaeger.
6675
"""
6776

68-
if LogContextScopeManager is None:
77+
if opentracing is None or LogContextScopeManager is None:
6978
skip = "Requires opentracing" # type: ignore[unreachable]
7079
if jaeger_client is None:
7180
skip = "Requires jaeger_client" # type: ignore[unreachable]
@@ -218,6 +227,167 @@ async def root() -> None:
218227
[scopes[1].span, scopes[2].span, scopes[0].span],
219228
)
220229

230+
def test_run_in_background_active_scope_still_available(self) -> None:
231+
"""
232+
Test that tasks running via `run_in_background` still have access to the
233+
active tracing scope.
234+
235+
This is a regression test for a previous Synapse issue where the tracing scope
236+
would `__exit__` and close before the `run_in_background` task completed and our
237+
own previous custom `_LogContextScope.close(...)` would clear
238+
`LoggingContext.scope` preventing further tracing spans from having the correct
239+
parent.
240+
"""
241+
reactor, clock = get_clock()
242+
243+
scope_map: Dict[str, opentracing.Scope] = {}
244+
245+
callback_finished = False
246+
247+
async def async_task() -> None:
248+
nonlocal callback_finished
249+
try:
250+
logger.info("asdf async_task1 - %s", current_context().scope)
251+
root_scope = scope_map["root"]
252+
root_context = cast(jaeger_client.SpanContext, root_scope.span.context)
253+
254+
self.assertEqual(
255+
self._tracer.active_span,
256+
root_scope.span,
257+
"expected to inherit the root tracing scope from where this was run",
258+
)
259+
260+
# Return control back to the reactor thread and wait an arbitrary amount
261+
await clock.sleep(4)
262+
263+
logger.info("asdf async_task2 - %s", current_context().scope)
264+
265+
# This is a key part of what we're testing! In a previous version of
266+
# Synapse, we would lose the active span at this point.
267+
self.assertEqual(
268+
self._tracer.active_span,
269+
root_scope.span,
270+
"expected to still have a root tracing scope/span active",
271+
)
272+
273+
# For complete-ness sake, let's also trace more sub-tasks here and assert
274+
# they have the correct span parents as well (root)
275+
276+
# Start tracing some other sub-task.
277+
#
278+
# This is a key part of what we're testing! In a previous version of
279+
# Synapse, it would have the incorrect span parents.
280+
scope = start_active_span(
281+
"task1",
282+
tracer=self._tracer,
283+
)
284+
scope_map["task1"] = scope
285+
286+
# Ensure the span parent is pointing to the root scope
287+
context = cast(jaeger_client.SpanContext, scope.span.context)
288+
self.assertEqual(
289+
context.parent_id,
290+
root_context.span_id,
291+
"expected task1 parent to be the root span",
292+
)
293+
294+
# Ensure that the active span is our new sub-task now
295+
self.assertEqual(self._tracer.active_span, scope.span)
296+
# Return control back to the reactor thread and wait an arbitrary amount
297+
await clock.sleep(4)
298+
# We should still see the active span as the scope wasn't closed yet
299+
self.assertEqual(self._tracer.active_span, scope.span)
300+
# Clock the scope using the context manager lifecycle (`scope.close()`)
301+
with scope:
302+
pass
303+
finally:
304+
# When exceptions happen, we still want to mark the callback as finished
305+
# so that the test can complete and we see the underlying error.
306+
callback_finished = True
307+
308+
async def root() -> None:
309+
with start_active_span(
310+
"root span",
311+
tracer=self._tracer,
312+
# We will close this off later. We're basically just mimicking the same
313+
# pattern for how we handle requests. We pass the span off to the
314+
# request for it to finish.
315+
finish_on_close=False,
316+
) as root_scope:
317+
scope_map["root"] = root_scope
318+
self.assertEqual(self._tracer.active_span, root_scope.span)
319+
320+
logger.info("asdf root1 - %s", current_context().scope)
321+
322+
# Fire-and-forget a task
323+
#
324+
# XXX: The root scope context manager will `__exit__` before this task
325+
# completes.
326+
run_in_background(async_task)
327+
328+
logger.info("asdf root2 - %s", current_context().scope)
329+
330+
# Because we used `run_in_background`, the active span should still be
331+
# the root.
332+
self.assertEqual(self._tracer.active_span, root_scope.span)
333+
334+
logger.info("asdf root3 - %s", current_context().scope)
335+
336+
# We shouldn't see any active spans outside of the scope
337+
self.assertIsNone(self._tracer.active_span)
338+
339+
with LoggingContext(name="root logcontext", server_name="test_server"):
340+
# Start the test off
341+
d_root = defer.ensureDeferred(root())
342+
343+
# Pump the reactor until the tasks complete
344+
#
345+
# We're manually pumping the reactor (and causing any pending callbacks to
346+
# be called) so we need to be in the sentinel logcontext to avoid leaking
347+
# our current logcontext into the reactor (which would then get picked up
348+
# and associated with the next thing the reactor does). `with
349+
# PreserveLoggingContext()` will reset the logcontext to the sentinel while
350+
# we're pumping the reactor in the block and return us back to our current
351+
# logcontext after the block.
352+
with PreserveLoggingContext():
353+
reactor.pump((2,) * 8)
354+
self.successResultOf(d_root)
355+
356+
self.assertTrue(
357+
callback_finished,
358+
"Callback never finished which means the test probably didn't wait long enough",
359+
)
360+
361+
# After we see all of the tasks are done (like a request when it
362+
# `_finished_processing`), let's finish our root span
363+
scope_map["root"].span.finish()
364+
365+
# Sanity check again: We shouldn't see any active spans leftover in this
366+
# this context.
367+
self.assertIsNone(self._tracer.active_span)
368+
369+
# The spans should be reported in order of their finishing: task 1, task 2,
370+
# root.
371+
#
372+
# We use `assertIncludes` just as an easier way to see if items are missing or
373+
# added. We assert the order just below
374+
self.assertIncludes(
375+
set(self._reporter.get_spans()),
376+
{
377+
scope_map["task1"].span,
378+
scope_map["root"].span,
379+
},
380+
exact=True,
381+
)
382+
# This is where we actually assert the correct order
383+
self.assertEqual(
384+
self._reporter.get_spans(),
385+
[
386+
scope_map["task1"].span,
387+
scope_map["root"].span,
388+
],
389+
)
390+
221391
def test_trace_decorator_sync(self) -> None:
222392
"""
223393
Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`

0 commit comments

Comments
 (0)