Skip to content

check for nursery misnesting on task exit #3307

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/trio/_core/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2013,7 +2013,29 @@ def task_exited(self, task: Task, outcome: Outcome[object]) -> None:
lot.break_lot(task)
del GLOBAL_PARKING_LOT_BREAKER[task]

if (
if task._child_nurseries:
# Forcefully abort any tasks spawned by the misnested nursery to
# avoid internal errors.
runner = GLOBAL_RUN_CONTEXT.runner
for nursery in task._child_nurseries:
nursery.cancel_scope.cancel()
for child in nursery._children:
if child in runner.runq:
runner.runq.remove(child)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very cool method to abort tasks but to me having nice behavior in this failure mode isn't important. If it's simple to just cancel the nursery then that's fine by me (if it isn't simple then oh well, this is fine. I just don't know what invariants this is relying on/wouldn't be able to spot bugs in this)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is if I don't do this cleanup then a lot of assumptions are broken elsewhere and the resulting TrioInternalError masks the RuntimeError we want to surface. But yeah, this seems ripe for bugs.

I think it's when the child tasks are finished they want to yield back to their caller, which now doesn't exist. So we can't even rely on Cancellation semantics either.

There might be a cleaner solution where we postpone the exiting of the current task after requesting cancellation on the nursery... hmm gonna go try that

runner.tasks.remove(child)
nursery._children.clear()
try:
# Raise this, rather than just constructing it, to get a
# traceback frame included
raise RuntimeError(
"Nursery stack corrupted: nurseries spawned by "
f"{task!r} was still live when the task exited\n{MISNESTING_ADVICE}",
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be inclined to make this some kind of BaseException, to make it really clear that this is not a recoverable error.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the cancel scope misnesting error is RuntimeError, so I think this should be the same. With cleaning up aborted tasks I think this is technically recoverable, the runtime loop is not corrupted beyond redemption or anything.

So unless we want to do a thorough replacing of a lot of RuntimeError I think we're better off staying consistent.

except RuntimeError as new_exc:
if isinstance(outcome, Error):
new_exc.__context__ = outcome.error
outcome = Error(new_exc)
elif (
task._cancel_status is not None
and task._cancel_status.abandoned_by_misnesting
and task._cancel_status.parent is None
Expand Down
67 changes: 65 additions & 2 deletions src/trio/_core/_tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
import time
import types
import weakref
from contextlib import ExitStack, contextmanager, suppress
from contextlib import (
AsyncExitStack,
ExitStack,
asynccontextmanager,
contextmanager,
suppress,
)
from math import inf, nan
from typing import TYPE_CHECKING, NoReturn, TypeVar
from unittest import mock
Expand Down Expand Up @@ -761,7 +767,7 @@ async def enter_scope() -> None:
assert scope.cancel_called # never become un-cancelled


async def test_cancel_scope_misnesting() -> None:
async def test_cancel_scope_misnesting_1() -> None:
outer = _core.CancelScope()
inner = _core.CancelScope()
with ExitStack() as stack:
Expand All @@ -771,6 +777,8 @@ async def test_cancel_scope_misnesting() -> None:
stack.close()
# No further error is raised when exiting the inner context


async def test_cancel_scope_misnesting_2() -> None:
# If there are other tasks inside the abandoned part of the cancel tree,
# they get cancelled when the misnesting is detected
async def task1() -> None:
Expand Down Expand Up @@ -828,6 +836,8 @@ def no_context(exc: RuntimeError) -> bool:
)
assert group.matches(exc_info.value.__context__)


async def test_cancel_scope_misnesting_3() -> None:
# Trying to exit a cancel scope from an unrelated task raises an error
# without affecting any state
async def task3(task_status: _core.TaskStatus[_core.CancelScope]) -> None:
Expand All @@ -844,6 +854,59 @@ async def task3(task_status: _core.TaskStatus[_core.CancelScope]) -> None:
scope.cancel()


async def test_nursery_misnest() -> None:
# See https://github.com/python-trio/trio/issues/3298
async def inner_func() -> None:
inner_nursery = await inner_cm.__aenter__()
inner_nursery.start_soon(sleep, 1)

with pytest.RaisesGroup(
pytest.RaisesExc(RuntimeError, match="Nursery stack corrupted")
):
async with _core.open_nursery() as outer_nursery:
inner_cm = _core.open_nursery()
outer_nursery.start_soon(inner_func)


async def test_asyncexitstack_nursery_misnest() -> None:
@asynccontextmanager
async def asynccontextmanager_that_creates_a_nursery_internally() -> (
AsyncGenerator[None]
):
async with _core.open_nursery() as nursery:
await nursery.start(started_sleeper)
nursery.start_soon(unstarted_task)
yield

async def started_sleeper(task_status: _core.TaskStatus[None]) -> None:
task_status.started()
await sleep_forever()

async def unstarted_task() -> None:
raise AssertionError("this should not even get a chance to run")

async with AsyncExitStack() as stack:
manager = _core.open_nursery()
nursery = await manager.__aenter__()
# The asynccontextmanager is going to create a nursery that outlives this nursery!
nursery.start_soon(
stack.enter_async_context,
asynccontextmanager_that_creates_a_nursery_internally(),
)
with pytest.RaisesGroup(
pytest.RaisesExc(RuntimeError, match="Nursery stack corrupted")
):
await manager.__aexit__(None, None, None)

# The outer nursery forcefully aborts the inner nursery and stops `unstarted_task`
# from ever being started.
with pytest.warns(
RuntimeWarning,
match="^coroutine 'test_asyncexitstack_nursery_misnest.<locals>.unstarted_task' was never awaited$",
):
gc_collect_harder()


@slow
async def test_timekeeping() -> None:
# probably a good idea to use a real clock for *one* test anyway...
Expand Down
2 changes: 1 addition & 1 deletion test-requirements.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# For tests
pytest >= 5.0 # for faulthandler in core
pytest >= 8.4 # for pytest.RaisesGroup
coverage >= 7.2.5
async_generator >= 1.9
pyright
Expand Down
Loading