From 87b9ca2fa5ba26a1b1d928b7fc0f11699473764e Mon Sep 17 00:00:00 2001 From: Konstantin Kolesnyak Date: Thu, 23 Apr 2026 19:08:59 +0200 Subject: [PATCH 1/3] Patch anyio _deliver_cancellation to prevent 100% CPU hot-loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit anyio 4.13.0's CancelScope._deliver_cancellation sets should_retry=True unconditionally for every task in self._tasks, then reschedules itself via call_soon(). When every task in the scope is the *current* task, nothing gets cancelled but the callback re-queues on every event-loop tick — pinning one CPU core at 100% with ~45k epoll_pwait syscalls/sec. Observed on April 22 and again on April 23 (24h+ of 97% CPU, no work done). The existing _safe_disconnect() workaround in agent.engine only clears the stuck scope during client.disconnect(), so spins triggered by telegram polling / cron / live SDK requests weren't covered. The patch sets should_retry=True only when we actually delivered a cancel or the task is still waiting pickup (_must_cancel). Semantics otherwise match upstream byte-for-byte. Applied via nerve/__init__.py so any import path picks it up. Includes a regression test that exercises the exact pathological shape (scope whose only task is the current task) and asserts the scope stops rescheduling itself. --- nerve/__init__.py | 5 ++ nerve/_anyio_patch.py | 146 ++++++++++++++++++++++++++++++++++++++ tests/test_anyio_patch.py | 116 ++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+) create mode 100644 nerve/_anyio_patch.py create mode 100644 tests/test_anyio_patch.py diff --git a/nerve/__init__.py b/nerve/__init__.py index d87ae80..c08ac23 100644 --- a/nerve/__init__.py +++ b/nerve/__init__.py @@ -1,3 +1,8 @@ """Nerve — Personal AI Assistant.""" +# Apply anyio hot-loop patch as early as possible — monkey-patches +# CancelScope._deliver_cancellation to prevent 100% CPU spins on +# unrecoverable cancellations. See nerve/_anyio_patch.py for details. +from nerve import _anyio_patch as _anyio_patch # noqa: F401 + __version__ = "0.1.0" diff --git a/nerve/_anyio_patch.py b/nerve/_anyio_patch.py new file mode 100644 index 0000000..4fe5e7e --- /dev/null +++ b/nerve/_anyio_patch.py @@ -0,0 +1,146 @@ +"""Monkey-patch for anyio 4.13.0 _deliver_cancellation hot-loop bug. + +Upstream bug (anyio 4.13.0, _backends/_asyncio.py:582-616): + + for task in self._tasks: + should_retry = True # ← set unconditionally + if task._must_cancel: + continue + if task is not current and (task is self._host_task or _task_started(task)): + waiter = task._fut_waiter + if not isinstance(waiter, asyncio.Future) or not waiter.done(): + task.cancel(origin._cancel_reason) + ... + ... + if origin is self and should_retry: + self._cancel_handle = get_running_loop().call_soon( + self._deliver_cancellation, origin + ) + +``should_retry`` is set to True simply because there is *any* task in the +scope's ``_tasks`` set, regardless of whether cancellation could actually be +delivered. When every task in the scope is the *current* task (a scope that +contains only itself, or a TaskGroup whose only live member is running the +cancel) nothing gets ``task.cancel()``-ed, but the scheduler reschedules +``_deliver_cancellation`` via ``call_soon`` on every event-loop tick. Result: +one CPU core pinned at 100%, tens of thousands of ``epoll_pwait`` syscalls per +second, and no forward progress. + +We have seen this trigger at least twice (April 22 and April 23, 2026). The +SDK-side mitigation in ``nerve.agent.engine._safe_disconnect`` only runs +during ``client.disconnect()``, so spins originating elsewhere (telegram +polling, cron, an active SDK request hitting a broken pipe) are not covered. + +The fix below sets ``should_retry = True`` only when we *actually* did +something — marked a task for cancellation, or found one already in +``_must_cancel`` state waiting to be processed. The rest of the semantics +match upstream byte-for-byte. + +Import this module once, before anyio is used (i.e. very early in the +process entry point — see ``nerve/__main__.py``). +""" + +from __future__ import annotations + +import asyncio +import logging +from asyncio import current_task +from asyncio import get_running_loop + +from anyio._backends import _asyncio as _anyio_asyncio + +logger = logging.getLogger(__name__) + +_APPLIED = False + + +def _patched_deliver_cancellation(self, origin): # type: ignore[no-untyped-def] + """Drop-in replacement for ``CancelScope._deliver_cancellation``. + + Behaves identically to upstream *except* that ``should_retry`` is only + set when the pass actually produced work (a ``task.cancel()`` call or a + task still in ``_must_cancel`` awaiting pickup). + """ + should_retry = False + current = current_task() + for task in self._tasks: + if task._must_cancel: # type: ignore[attr-defined] + # Already flagged; re-check next tick to see if it cleared. + should_retry = True + continue + + # The task is eligible for cancellation if it has started. + if task is not current and ( + task is self._host_task or _anyio_asyncio._task_started(task) + ): + waiter = task._fut_waiter # type: ignore[attr-defined] + if not isinstance(waiter, asyncio.Future) or not waiter.done(): + task.cancel(origin._cancel_reason) + # We actually delivered a cancel — re-check next tick. + should_retry = True + if ( + task is origin._host_task + and origin._pending_uncancellations is not None + ): + origin._pending_uncancellations += 1 + + # Deliver cancellation to child scopes that aren't shielded or running + # their own cancellation callbacks. + for scope in self._child_scopes: + if not scope._shield and not scope.cancel_called: + should_retry = scope._deliver_cancellation(origin) or should_retry + + # Schedule another callback if there are still tasks left. + if origin is self: + if should_retry: + self._cancel_handle = get_running_loop().call_soon( + self._deliver_cancellation, origin + ) + else: + self._cancel_handle = None + + return should_retry + + +def apply() -> bool: + """Install the patch. Safe to call multiple times. + + Returns True if the patch was applied in this call, False if it was + already applied (or the target symbol is missing). + """ + global _APPLIED + if _APPLIED: + return False + + cancel_scope_cls = getattr(_anyio_asyncio, "CancelScope", None) + if cancel_scope_cls is None: + logger.warning( + "anyio patch: CancelScope not found in %s; patch NOT applied", + _anyio_asyncio.__name__, + ) + return False + + if not hasattr(cancel_scope_cls, "_deliver_cancellation"): + logger.warning( + "anyio patch: _deliver_cancellation not found on CancelScope; " + "patch NOT applied (anyio API changed?)" + ) + return False + + if not hasattr(_anyio_asyncio, "_task_started"): + logger.warning( + "anyio patch: _task_started helper not found; patch NOT applied" + ) + return False + + cancel_scope_cls._deliver_cancellation = _patched_deliver_cancellation + _APPLIED = True + logger.info( + "anyio patch applied: CancelScope._deliver_cancellation fixed " + "(prevents 100%% CPU spin on unrecoverable cancel)" + ) + return True + + +# Apply on import so anyone who imports this module gets the patch. +apply() diff --git a/tests/test_anyio_patch.py b/tests/test_anyio_patch.py new file mode 100644 index 0000000..6da4a5f --- /dev/null +++ b/tests/test_anyio_patch.py @@ -0,0 +1,116 @@ +"""Regression tests for the anyio _deliver_cancellation hot-loop patch. + +See nerve/_anyio_patch.py for the root-cause explanation. +""" + +from __future__ import annotations + +import asyncio + +import anyio +import pytest + +# Importing nerve applies the patch as a side effect of nerve/__init__.py. +import nerve # noqa: F401 +from nerve import _anyio_patch + + +def test_patch_is_applied(): + """The patch must be active after ``import nerve``.""" + from anyio._backends import _asyncio as anyio_asyncio + + assert _anyio_patch._APPLIED is True + assert ( + anyio_asyncio.CancelScope._deliver_cancellation.__module__ + == "nerve._anyio_patch" + ) + + +def test_apply_is_idempotent(): + """Calling apply() twice must not raise and must return False the 2nd time.""" + # Already applied from module import, so this should be a no-op. + assert _anyio_patch.apply() is False + + +@pytest.mark.asyncio +async def test_fail_after_still_works(): + """Cancellation via fail_after must still raise TimeoutError.""" + with pytest.raises(TimeoutError): + with anyio.fail_after(0.01): + await anyio.sleep(1) + + +@pytest.mark.asyncio +async def test_move_on_after_still_works(): + """Cancellation via move_on_after must exit cleanly.""" + with anyio.move_on_after(0.01) as scope: + await anyio.sleep(1) + assert scope.cancelled_caught + + +@pytest.mark.asyncio +async def test_task_group_cancellation_still_works(): + """Cancelling a task group must cancel its children.""" + cancelled = [] + + async def child(index): + try: + await anyio.sleep(10) + except asyncio.CancelledError: + cancelled.append(index) + raise + + async with anyio.create_task_group() as tg: + tg.start_soon(child, 0) + tg.start_soon(child, 1) + await anyio.sleep(0.01) + tg.cancel_scope.cancel() + + assert sorted(cancelled) == [0, 1] + + +@pytest.mark.asyncio +async def test_no_hot_loop_when_only_task_is_current(): + """Regression: a CancelScope whose only task is the current task must not + re-schedule ``_deliver_cancellation`` forever. + + This is the exact shape of the bug: ``should_retry`` used to be set to + True simply because ``self._tasks`` was non-empty, even when no task + could actually be cancelled. The scheduler then re-queued itself via + ``call_soon()`` on every event-loop tick, pinning one CPU core. + + After the patch, the first pass must leave ``_cancel_handle = None`` + because nothing was delivered and nothing is awaiting pickup. + """ + from anyio._backends._asyncio import CancelScope + + scope = CancelScope() + # Enter/exit manually so we have a host_task bound to the running task + # without letting anyio's normal finalization run cancellation for us. + current = asyncio.current_task() + scope._host_task = current + scope._cancel_called = True + scope._cancel_reason = "regression test" + # Force the "pathological" shape: only task in scope is the current task, + # so the upstream loop would spin forever without the patch. + scope._tasks = {current} + + loop = asyncio.get_running_loop() + # Drop any stray handle so we can assert cleanly against the post-state. + scope._cancel_handle = None + + should_retry = scope._deliver_cancellation(scope) + + assert should_retry is False, ( + "Patched _deliver_cancellation must not retry when the only task " + "in the scope is the current task (nothing to cancel)." + ) + assert scope._cancel_handle is None, ( + "Patched _deliver_cancellation must clear _cancel_handle when no " + "retry is needed — otherwise the hot loop returns." + ) + + # Sanity: event loop must not have a queued _deliver_cancellation + # callback parked in it. We can't easily introspect the ready queue, but + # we *can* confirm we didn't stash a handle on the scope ourselves. + assert not loop.is_closed() From cd0cf4e6f234bc45d158128c1c8c7684f16deb9b Mon Sep 17 00:00:00 2001 From: Konstantin Kolesnyak Date: Thu, 23 Apr 2026 21:30:01 +0200 Subject: [PATCH 2/3] Fix anyio patch: skip current task and _must_cancel retries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The April 23 patch (ec0f8f7) fixed the 96%-CPU hot loop where should_retry was set unconditionally, but left a narrower version of the same bug: the `_must_cancel` branch still set should_retry=True. When the current task itself sits with _must_cancel=True while running the cancel callback (observed in production today, nerve/SDK path), this re-queues _deliver_cancellation on every event-loop tick: before fix: 20% CPU, ~61k epoll_pwait/sec after fix: should be idle (~5% CPU range) Changes: - Skip the current task entirely — it cannot cancel itself from inside the callback it is running. - Drop should_retry=True in the _must_cancel branch — asyncio's Task.__step raises CancelledError when the task resumes, no retry needed from us. - should_retry is now True only when we actually called task.cancel() in this pass. - Add regression test that poisons the _must_cancel branch with a fake task. The existing "current-task-only" test passed without reproducing this variant because it didn't set _must_cancel. --- nerve/_anyio_patch.py | 46 +++++++++++++++++++++-------- tests/test_anyio_patch.py | 62 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 13 deletions(-) diff --git a/nerve/_anyio_patch.py b/nerve/_anyio_patch.py index 4fe5e7e..ec6b6e8 100644 --- a/nerve/_anyio_patch.py +++ b/nerve/_anyio_patch.py @@ -31,10 +31,17 @@ during ``client.disconnect()``, so spins originating elsewhere (telegram polling, cron, an active SDK request hitting a broken pipe) are not covered. -The fix below sets ``should_retry = True`` only when we *actually* did -something — marked a task for cancellation, or found one already in -``_must_cancel`` state waiting to be processed. The rest of the semantics -match upstream byte-for-byte. +The fix below sets ``should_retry = True`` only when we *actually* called +``task.cancel()`` in this pass. Tasks already flagged with ``_must_cancel`` +do **not** justify a retry — asyncio's ``Task.__step`` raises +``CancelledError`` when such a task next resumes, without help from us. +Retrying on ``_must_cancel`` was the second iteration of this bug +(April 23, 2026 evening): ~20% CPU / 61k epoll_pwait/sec because the +current task itself was sitting with ``_must_cancel=True`` while running +the cancel callback, causing the callback to re-queue forever. + +We also skip the *current task* explicitly — it is running this very +callback and cannot be cancelled from inside it. Import this module once, before anyio is used (i.e. very early in the process entry point — see ``nerve/__main__.py``). @@ -57,22 +64,35 @@ def _patched_deliver_cancellation(self, origin): # type: ignore[no-untyped-def] """Drop-in replacement for ``CancelScope._deliver_cancellation``. - Behaves identically to upstream *except* that ``should_retry`` is only - set when the pass actually produced work (a ``task.cancel()`` call or a - task still in ``_must_cancel`` awaiting pickup). + Semantics: ``should_retry`` is True **only when we actually called + ``task.cancel()`` on some task in this pass**. A task already flagged + with ``_must_cancel`` does not justify a retry — asyncio's + ``Task.__step`` checks the flag when the task next resumes and will + raise ``CancelledError`` without our help. Retrying in that case means + we re-queue ourselves on every event-loop tick while the task is + blocked (shielded, awaiting I/O, or — critically — is the *current + task* running this very callback), which is the hot loop we're trying + to kill. """ should_retry = False current = current_task() for task in self._tasks: + # The current task is running this callback; it can't cancel + # itself from inside it. Whatever state it's in (including + # ``_must_cancel``) will be handled once we return and control + # flows back to ``Task.__step``. + if task is current: + continue + + # Already flagged for cancellation. asyncio will deliver the + # exception when the task resumes; no retry needed from us. + # (Upstream anyio 4.13.0 set should_retry=True here — that's the + # root-cause bug.) if task._must_cancel: # type: ignore[attr-defined] - # Already flagged; re-check next tick to see if it cleared. - should_retry = True continue # The task is eligible for cancellation if it has started. - if task is not current and ( - task is self._host_task or _anyio_asyncio._task_started(task) - ): + if task is self._host_task or _anyio_asyncio._task_started(task): waiter = task._fut_waiter # type: ignore[attr-defined] if not isinstance(waiter, asyncio.Future) or not waiter.done(): task.cancel(origin._cancel_reason) @@ -90,7 +110,7 @@ def _patched_deliver_cancellation(self, origin): # type: ignore[no-untyped-def] if not scope._shield and not scope.cancel_called: should_retry = scope._deliver_cancellation(origin) or should_retry - # Schedule another callback if there are still tasks left. + # Schedule another callback only if we actually did work this pass. if origin is self: if should_retry: self._cancel_handle = get_running_loop().call_soon( diff --git a/tests/test_anyio_patch.py b/tests/test_anyio_patch.py index 6da4a5f..a3f9034 100644 --- a/tests/test_anyio_patch.py +++ b/tests/test_anyio_patch.py @@ -114,3 +114,65 @@ async def test_no_hot_loop_when_only_task_is_current(): # callback parked in it. We can't easily introspect the ready queue, but # we *can* confirm we didn't stash a handle on the scope ourselves. assert not loop.is_closed() + + +@pytest.mark.asyncio +async def test_no_hot_loop_when_current_task_has_must_cancel(): + """Regression (April 23, 2026 evening): the first version of the patch + still spun when the *current task* already had ``_must_cancel=True``. + + In production this happens when a task that's about to be cancelled is + itself running the cancel callback (via a CancelScope it owns). The + previous patch unconditionally set ``should_retry = True`` whenever it + saw ``_must_cancel``, re-queuing ``_deliver_cancellation`` on every + event-loop tick. Result: ~20% CPU / ~61k epoll_pwait/sec, nerve kept + crowning the fan. + + The fix: skip the current task entirely, and only retry when we + actually called ``task.cancel()``. + """ + from anyio._backends._asyncio import CancelScope + + scope = CancelScope() + current = asyncio.current_task() + scope._host_task = current + scope._cancel_called = True + scope._cancel_reason = "regression test (must_cancel)" + scope._tasks = {current} + scope._cancel_handle = None + + # Simulate the production state: a task already flagged as + # "must cancel" is sitting in the scope. asyncio will deliver the + # CancelledError when the task next resumes — we must NOT re-queue + # ourselves in the meantime. + # + # The real ``_asyncio.Task._must_cancel`` attribute is read-only from + # Python, so we use a stub that mimics the shape the patch reads. + class _FakeTask: + def __init__(self): + self._must_cancel = True + self._fut_waiter = None + self._cancel_calls = 0 + + def cancel(self, *_args, **_kwargs): + self._cancel_calls += 1 + return True + + fake = _FakeTask() + scope._tasks = {fake} + + should_retry = scope._deliver_cancellation(scope) + + assert fake._cancel_calls == 0, ( + "Must not re-cancel a task already flagged with _must_cancel — " + "asyncio will deliver on resume." + ) + assert should_retry is False, ( + "Patched _deliver_cancellation must not retry on _must_cancel " + "alone. That was the April 23 regression: the callback re-queued " + "itself every tick while the task sat with _must_cancel=True, " + "burning ~20% CPU on 61k epoll_pwait/sec." + ) + assert scope._cancel_handle is None, ( + "No retry → no pending handle." + ) From 8dcb726dd21e2041f45bd5706a939b0dfaa48dea Mon Sep 17 00:00:00 2001 From: Konstantin Kolesnyak Date: Fri, 24 Apr 2026 10:40:51 +0200 Subject: [PATCH 3/3] Fix anyio patch: skip done tasks to prevent zombie-scope hot-loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third iteration of the anyio _deliver_cancellation spin. This time a CancelScope retained already-finished tasks in self._tasks (anyio doesn't always prune them before the cancel callback fires). For a done task _must_cancel=False (cleared on final step), _task_started is True, _fut_waiter is None — so the previous patch fell into the "waiter not done → cancel()" branch. task.cancel() is a no-op on done tasks, but should_retry was flagged anyway, so call_soon kept re-queuing forever. Observed live: three zombie-scopes in one process producing ~55k epoll_pwait/sec combined, 100% CPU on MainThread, load 1.6, 60°C. Confirmed via py-spy (stack parked on lines 91/95/97/98 of the patch) and a gc-scan dump of CancelScope objects (three active _cancel_handle scopes, each with a single done=True task). Fix: add `if task.done(): continue` at the top of the loop. Also add regression test that reproduces the zombie-scope shape with a stubbed done task and asserts no cancel() call, no retry, and no pending _cancel_handle. --- nerve/_anyio_patch.py | 47 ++++++++++++++++++++------- tests/test_anyio_patch.py | 67 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 12 deletions(-) diff --git a/nerve/_anyio_patch.py b/nerve/_anyio_patch.py index ec6b6e8..7ee5091 100644 --- a/nerve/_anyio_patch.py +++ b/nerve/_anyio_patch.py @@ -26,22 +26,37 @@ one CPU core pinned at 100%, tens of thousands of ``epoll_pwait`` syscalls per second, and no forward progress. -We have seen this trigger at least twice (April 22 and April 23, 2026). The -SDK-side mitigation in ``nerve.agent.engine._safe_disconnect`` only runs +We have seen this trigger at least three times (April 22, 23, and 24, 2026). +The SDK-side mitigation in ``nerve.agent.engine._safe_disconnect`` only runs during ``client.disconnect()``, so spins originating elsewhere (telegram polling, cron, an active SDK request hitting a broken pipe) are not covered. The fix below sets ``should_retry = True`` only when we *actually* called -``task.cancel()`` in this pass. Tasks already flagged with ``_must_cancel`` -do **not** justify a retry — asyncio's ``Task.__step`` raises -``CancelledError`` when such a task next resumes, without help from us. -Retrying on ``_must_cancel`` was the second iteration of this bug -(April 23, 2026 evening): ~20% CPU / 61k epoll_pwait/sec because the -current task itself was sitting with ``_must_cancel=True`` while running -the cancel callback, causing the callback to re-queue forever. - -We also skip the *current task* explicitly — it is running this very -callback and cannot be cancelled from inside it. +``task.cancel()`` on a task that could still receive a cancellation. We +skip three categories of tasks that upstream blindly retries on: + +1. **Done tasks** (``task.done() is True``). Root cause of the April 24 + regression: if a task completes while the scope has ``cancel_called`` + and the scope never pops it out of ``_tasks``, every subsequent pass + sees ``_must_cancel=False`` (cleared when the task finished), + ``_task_started()=True``, ``_fut_waiter=None``. Our "waiter not done" + branch fires, ``task.cancel()`` is a no-op on a done task, and yet we + flagged ``should_retry=True`` anyway. Result: infinite ``call_soon`` + on a scope whose only inhabitant is already a corpse. We observed + three such zombie-scopes running simultaneously, all spinning at + ~55k epoll_pwait/sec combined (100% CPU, load 1.6, 60°C). + +2. **Tasks already flagged with ``_must_cancel``**. asyncio's + ``Task.__step`` checks the flag when the task next resumes and will + raise ``CancelledError`` without our help. Retrying in that case means + we re-queue ourselves on every event-loop tick while the task is + blocked (shielded, awaiting I/O). This was the second iteration of + the bug (April 23 evening). + +3. **The current task**, because a task cannot cancel itself from inside + a callback. (``current_task()`` is often ``None`` here because the + callback runs from ``call_soon`` outside any task context — the check + is a belt-and-suspenders guard for the other case.) Import this module once, before anyio is used (i.e. very early in the process entry point — see ``nerve/__main__.py``). @@ -77,6 +92,14 @@ def _patched_deliver_cancellation(self, origin): # type: ignore[no-untyped-def] should_retry = False current = current_task() for task in self._tasks: + # Already finished. anyio doesn't always remove tasks from + # ``_tasks`` before the scope's cancel callback fires (done + # tasks can linger until the task group unwinds). ``task.cancel()`` + # is a no-op on a done task, so retrying is pure busy-loop. + # This is the root cause of the April 24, 2026 spin. + if task.done(): + continue + # The current task is running this callback; it can't cancel # itself from inside it. Whatever state it's in (including # ``_must_cancel``) will be handled once we return and control diff --git a/tests/test_anyio_patch.py b/tests/test_anyio_patch.py index a3f9034..45da9d7 100644 --- a/tests/test_anyio_patch.py +++ b/tests/test_anyio_patch.py @@ -154,6 +154,9 @@ def __init__(self): self._fut_waiter = None self._cancel_calls = 0 + def done(self): + return False + def cancel(self, *_args, **_kwargs): self._cancel_calls += 1 return True @@ -176,3 +179,67 @@ def cancel(self, *_args, **_kwargs): assert scope._cancel_handle is None, ( "No retry → no pending handle." ) + + +@pytest.mark.asyncio +async def test_no_hot_loop_when_only_task_is_done(): + """Regression (April 24, 2026): a scope whose only task has already + ``done()==True`` must not re-queue ``_deliver_cancellation`` forever. + + Observed in production: anyio 4.13.0 does not always remove tasks from + ``CancelScope._tasks`` the instant they finish — a done task can linger + until the task group unwinds. For a done task ``_must_cancel`` is False + (cleared during final step), ``_task_started()`` is True, and + ``_fut_waiter`` is None. The previous patch therefore fell into the + "waiter not done → cancel()" branch, called ``task.cancel()`` (which is + a no-op on a done task), and still set ``should_retry=True``. Result: + a zombie-scope spinning ~55k epoll_pwait/sec per scope, pinning one + CPU core until the process was restarted. + + Three such zombies were found in a live process dump (scope IDs + 0x7ffec1774f50, 0x7ffec17ae090, 0x7ffec17ad6d0) — the live fix was to + ``discard()`` the done task and ``handle.cancel()`` the pending + ``call_soon``. This test guarantees the code-level fix holds. + """ + from anyio._backends._asyncio import CancelScope + + scope = CancelScope() + current = asyncio.current_task() + scope._host_task = current + scope._cancel_called = True + scope._cancel_reason = "regression test (done)" + scope._cancel_handle = None + + class _DoneTask: + """Mimics an asyncio.Task that has already completed.""" + + def __init__(self): + self._must_cancel = False # cleared when task finished + self._fut_waiter = None # done tasks have no waiter + self._cancel_calls = 0 + + def done(self): + return True + + def cancel(self, *_args, **_kwargs): + self._cancel_calls += 1 + return False # cancel() returns False on done tasks + + done_task = _DoneTask() + scope._tasks = {done_task} + + should_retry = scope._deliver_cancellation(scope) + + assert should_retry is False, ( + "Patched _deliver_cancellation must not retry on done tasks. " + "Retrying on a done task is the April 24 hot-loop: cancel() is a " + "no-op, but the callback keeps re-queuing itself every tick." + ) + assert done_task._cancel_calls == 0, ( + "Must not call task.cancel() on a done task — it's a no-op that " + "only exists to flag should_retry=True." + ) + assert scope._cancel_handle is None, ( + "No retry → no pending handle. Otherwise the zombie-scope spin " + "returns on the next rebuild." + )