Skip to content

Commit dffe272

Browse files
authored
Merge pull request #1061 from oremanj/notice-cancel-misnesting
Attempt to recover from cancel scope nesting violations
2 parents 3c4c9aa + 00c4d04 commit dffe272

File tree

3 files changed

+219
-4
lines changed

3 files changed

+219
-4
lines changed

newsfragments/882.feature.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Trio now gives a reasonable traceback and error message in most cases
2+
when its invariants surrounding cancel scope nesting have been
3+
violated. (One common source of such violations is an async generator
4+
that yields within a cancel scope.) The previous behavior was an
5+
inscrutable chain of TrioInternalErrors.

trio/_core/_run.py

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ class CancelStatus:
173173
# The CancelStatus whose cancellations can propagate to us; we
174174
# become effectively cancelled when they do, unless scope.shield
175175
# is True. May be None (for the outermost CancelStatus in a call
176-
# to trio.run(), or briefly during TaskStatus.started()).
176+
# to trio.run(), briefly during TaskStatus.started(), or during
177+
# recovery from mis-nesting of cancel scopes).
177178
_parent = attr.ib(default=None, repr=False)
178179

179180
# All of the CancelStatuses that have this CancelStatus as their parent.
@@ -185,6 +186,12 @@ class CancelStatus:
185186
# Invariant: all(task._cancel_status is self for task in self._tasks)
186187
_tasks = attr.ib(factory=set, init=False, repr=False)
187188

189+
# Set to True on still-active cancel statuses that are children
190+
# of a cancel status that's been closed. This is used to permit
191+
# recovery from mis-nested cancel scopes (well, at least enough
192+
# recovery to show a useful traceback).
193+
abandoned_by_misnesting = attr.ib(default=False, init=False, repr=False)
194+
188195
def __attrs_post_init__(self):
189196
if self._parent is not None:
190197
self._parent._children.add(self)
@@ -213,9 +220,43 @@ def children(self):
213220
def tasks(self):
214221
return frozenset(self._tasks)
215222

223+
def encloses(self, other):
224+
"""Returns true if this cancel status is a direct or indirect
225+
parent of cancel status *other*, or if *other* is *self*.
226+
"""
227+
while other is not None:
228+
if other is self:
229+
return True
230+
other = other.parent
231+
return False
232+
216233
def close(self):
217-
assert not self._tasks and not self._children
218234
self.parent = None # now we're not a child of self.parent anymore
235+
if self._tasks or self._children:
236+
# Cancel scopes weren't exited in opposite order of being
237+
# entered. CancelScope._close() deals with raising an error
238+
# if appropriate; our job is to leave things in a reasonable
239+
# state for unwinding our dangling children. We choose to leave
240+
# this part of the CancelStatus tree unlinked from everyone
241+
# else, cancelled, and marked so that exiting a CancelScope
242+
# within the abandoned subtree doesn't affect the active
243+
# CancelStatus. Note that it's possible for us to get here
244+
# without CancelScope._close() raising an error, if a
245+
# nursery's cancel scope is closed within the nursery's
246+
# nested child and no other cancel scopes are involved,
247+
# but in that case task_exited() will deal with raising
248+
# the error.
249+
self._mark_abandoned()
250+
251+
# Since our CancelScope is about to forget about us, and we
252+
# have no parent anymore, there's nothing left to call
253+
# recalculate(). So, we can stay cancelled by setting
254+
# effectively_cancelled and updating our children.
255+
self.effectively_cancelled = True
256+
for task in self._tasks:
257+
task._attempt_delivery_of_any_pending_cancel()
258+
for child in self._children:
259+
child.recalculate()
219260

220261
@property
221262
def parent_cancellation_is_visible_to_us(self):
@@ -237,6 +278,11 @@ def recalculate(self):
237278
for child in self._children:
238279
child.recalculate()
239280

281+
def _mark_abandoned(self):
282+
self.abandoned_by_misnesting = True
283+
for child in self._children:
284+
child._mark_abandoned()
285+
240286
def effective_deadline(self):
241287
if self.effectively_cancelled:
242288
return -inf
@@ -245,6 +291,30 @@ def effective_deadline(self):
245291
return min(self._scope.deadline, self._parent.effective_deadline())
246292

247293

294+
MISNESTING_ADVICE = """
295+
This is probably a bug in your code, that has caused Trio's internal state to
296+
become corrupted. We'll do our best to recover, but from now on there are
297+
no guarantees.
298+
299+
Typically this is caused by one of the following:
300+
- yielding within a generator or async generator that's opened a cancel
301+
scope or nursery (unless the generator is a @contextmanager or
302+
@asynccontextmanager); see https://github.com/python-trio/trio/issues/638
303+
- manually calling __enter__ or __exit__ on a trio.CancelScope, or
304+
__aenter__ or __aexit__ on the object returned by trio.open_nursery();
305+
doing so correctly is difficult and you should use @[async]contextmanager
306+
instead, or maybe [Async]ExitStack
307+
- using [Async]ExitStack to interleave the entries/exits of cancel scopes
308+
and/or nurseries in a way that couldn't be achieved by some nesting of
309+
'with' and 'async with' blocks
310+
- using the low-level coroutine object protocol to execute some parts of
311+
an async function in a different cancel scope/nursery context than
312+
other parts
313+
If you don't believe you're doing any of these things, please file a bug:
314+
https://github.com/python-trio/trio/issues/new
315+
"""
316+
317+
248318
@attr.s(cmp=False, repr=False, slots=True)
249319
class CancelScope(metaclass=Final):
250320
"""A *cancellation scope*: the link between a unit of cancellable
@@ -319,13 +389,60 @@ def _exc_filter(self, exc):
319389
return exc
320390

321391
def _close(self, exc):
392+
if self._cancel_status is None:
393+
new_exc = RuntimeError(
394+
"Cancel scope stack corrupted: attempted to exit {!r} "
395+
"which had already been exited".format(self)
396+
)
397+
new_exc.__context__ = exc
398+
return new_exc
322399
scope_task = current_task()
400+
if scope_task._cancel_status is not self._cancel_status:
401+
# Cancel scope mis-nesting: this cancel scope isn't the most
402+
# recently opened by this task (that's still open). That is,
403+
# our assumptions about context managers forming a stack
404+
# have been violated. Try and make the best of it.
405+
if self._cancel_status.abandoned_by_misnesting:
406+
# We are an inner cancel scope that was still active when
407+
# some outer scope was closed. The closure of that outer
408+
# scope threw an error, so we don't need to throw another
409+
# one; it would just confuse the traceback.
410+
pass
411+
elif not self._cancel_status.encloses(scope_task._cancel_status):
412+
# This task isn't even indirectly contained within the
413+
# cancel scope it's trying to close. Raise an error
414+
# without changing any state.
415+
new_exc = RuntimeError(
416+
"Cancel scope stack corrupted: attempted to exit {!r} "
417+
"from unrelated {!r}\n{}".format(
418+
self, scope_task, MISNESTING_ADVICE
419+
)
420+
)
421+
new_exc.__context__ = exc
422+
return new_exc
423+
else:
424+
# Otherwise, there's some inner cancel scope(s) that
425+
# we're abandoning by closing this outer one.
426+
# CancelStatus.close() will take care of the plumbing;
427+
# we just need to make sure we don't let the error
428+
# pass silently.
429+
new_exc = RuntimeError(
430+
"Cancel scope stack corrupted: attempted to exit {!r} "
431+
"in {!r} that's still within its child {!r}\n{}".format(
432+
self, scope_task, scope_task._cancel_status._scope,
433+
MISNESTING_ADVICE
434+
)
435+
)
436+
new_exc.__context__ = exc
437+
exc = new_exc
438+
scope_task._activate_cancel_status(self._cancel_status.parent)
439+
else:
440+
scope_task._activate_cancel_status(self._cancel_status.parent)
323441
if (
324442
exc is not None
325443
and not self._cancel_status.parent_cancellation_is_visible_to_us
326444
):
327445
exc = MultiError.filter(self._exc_filter, exc)
328-
scope_task._activate_cancel_status(self._cancel_status.parent)
329446
self._cancel_status.close()
330447
with self._might_change_registered_deadline():
331448
self._cancel_status = None
@@ -1155,6 +1272,29 @@ def _return_value_looks_like_wrong_library(value):
11551272
return task
11561273

11571274
def task_exited(self, task, outcome):
1275+
if (
1276+
task._cancel_status is not None
1277+
and task._cancel_status.abandoned_by_misnesting
1278+
and task._cancel_status.parent is None
1279+
):
1280+
# The cancel scope surrounding this task's nursery was closed
1281+
# before the task exited. Force the task to exit with an error,
1282+
# since the error might not have been caught elsewhere. See the
1283+
# comments in CancelStatus.close().
1284+
try:
1285+
# Raise this, rather than just constructing it, to get a
1286+
# traceback frame included
1287+
raise RuntimeError(
1288+
"Cancel scope stack corrupted: cancel scope surrounding "
1289+
"{!r} was closed before the task exited\n{}".format(
1290+
task, MISNESTING_ADVICE
1291+
)
1292+
)
1293+
except RuntimeError as new_exc:
1294+
if isinstance(outcome, Error):
1295+
new_exc.__context__ = outcome.error
1296+
outcome = Error(new_exc)
1297+
11581298
task._activate_cancel_status(None)
11591299
self.tasks.remove(task)
11601300
if task is self.main_task:

trio/_core/tests/test_run.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import time
77
import types
88
import warnings
9-
from contextlib import contextmanager
9+
from contextlib import contextmanager, ExitStack
1010
from math import inf
1111

1212
import attr
@@ -923,6 +923,76 @@ async def enter_scope():
923923
assert scope.cancel_called # never become un-cancelled
924924

925925

926+
async def test_cancel_scope_misnesting():
927+
outer = _core.CancelScope()
928+
inner = _core.CancelScope()
929+
with ExitStack() as stack:
930+
stack.enter_context(outer)
931+
with inner:
932+
with pytest.raises(RuntimeError, match="still within its child"):
933+
stack.close()
934+
# No further error is raised when exiting the inner context
935+
936+
# If there are other tasks inside the abandoned part of the cancel tree,
937+
# they get cancelled when the misnesting is detected
938+
async def task1():
939+
with pytest.raises(_core.Cancelled):
940+
await sleep_forever()
941+
942+
# Even if inside another cancel scope
943+
async def task2():
944+
with _core.CancelScope():
945+
with pytest.raises(_core.Cancelled):
946+
await sleep_forever()
947+
948+
with ExitStack() as stack:
949+
stack.enter_context(_core.CancelScope())
950+
async with _core.open_nursery() as nursery:
951+
nursery.start_soon(task1)
952+
nursery.start_soon(task2)
953+
await wait_all_tasks_blocked()
954+
with pytest.raises(RuntimeError, match="still within its child"):
955+
stack.close()
956+
957+
# Variant that makes the child tasks direct children of the scope
958+
# that noticed the misnesting:
959+
nursery_mgr = _core.open_nursery()
960+
nursery = await nursery_mgr.__aenter__()
961+
try:
962+
nursery.start_soon(task1)
963+
nursery.start_soon(task2)
964+
nursery.start_soon(sleep_forever)
965+
await wait_all_tasks_blocked()
966+
nursery.cancel_scope.__exit__(None, None, None)
967+
finally:
968+
with pytest.raises(RuntimeError) as exc_info:
969+
await nursery_mgr.__aexit__(*sys.exc_info())
970+
assert "which had already been exited" in str(exc_info.value)
971+
assert type(exc_info.value.__context__) is _core.MultiError
972+
assert len(exc_info.value.__context__.exceptions) == 3
973+
cancelled_in_context = False
974+
for exc in exc_info.value.__context__.exceptions:
975+
assert isinstance(exc, RuntimeError)
976+
assert "closed before the task exited" in str(exc)
977+
cancelled_in_context |= isinstance(
978+
exc.__context__, _core.Cancelled
979+
)
980+
assert cancelled_in_context # for the sleep_forever
981+
982+
# Trying to exit a cancel scope from an unrelated task raises an error
983+
# without affecting any state
984+
async def task3(task_status):
985+
with _core.CancelScope() as scope:
986+
task_status.started(scope)
987+
await sleep_forever()
988+
989+
async with _core.open_nursery() as nursery:
990+
scope = await nursery.start(task3)
991+
with pytest.raises(RuntimeError, match="from unrelated"):
992+
scope.__exit__(None, None, None)
993+
scope.cancel()
994+
995+
926996
async def test_timekeeping():
927997
# probably a good idea to use a real clock for *one* test anyway...
928998
TARGET = 0.1

0 commit comments

Comments
 (0)