shloimy-wiesel · shloimy-wiesel · May 26, 2026
diff --git a/example/backend/routes/chat.py b/example/backend/routes/chat.py
@@ -7,7 +7,8 @@
 
 Pattern:
 1. Create a ``StreamContext`` at the start of the request.
-2. Spin up a background asyncio task that calls llm_service.chat().
+2. Call ``await ctx.run(...)`` to schedule the background work with
+   automatic finish/error handling.
 3. Return ``StreamingResponse(ctx.stream(), ...)`` immediately so the
    client starts receiving SSE events as they are produced.
 
@@ -18,8 +19,6 @@
 
 from __future__ import annotations
 
-import asyncio
-
 from fastapi import APIRouter
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
@@ -29,7 +28,6 @@
 from ..services import llm_service
 
 router = APIRouter()
-_background_tasks: set[asyncio.Task] = set()
 
 
 class ChatMessage(BaseModel):
@@ -50,20 +48,8 @@ async def chat(request: ChatRequest) -> StreamingResponse:
     are contextually aware of previous turns.
     """
     ctx = StreamContext()
-
-    async def _work() -> None:
-        try:
-            messages = [m.model_dump() for m in request.messages]
-            await llm_service.chat(messages, ctx=ctx)
-        except Exception as exc:
-            await ctx.write_text(f"\n\n_(Error: {exc})_")
-        finally:
-            await ctx.finish()
-
-    _task = asyncio.create_task(_work())
-    _background_tasks.add(_task)
-    _task.add_done_callback(_background_tasks.discard)
-
+    messages = [m.model_dump() for m in request.messages]
+    await ctx.run(lambda c: llm_service.chat(messages, ctx=c))
     return StreamingResponse(
         ctx.stream(),
         media_type="text/event-stream",

diff --git a/src/ai_sdk_stream_python/__init__.py b/src/ai_sdk_stream_python/__init__.py
@@ -18,7 +18,6 @@
     from fastapi import FastAPI
     from fastapi.responses import StreamingResponse
     from ai_sdk_stream_python import StreamContext
-    import asyncio
 
     app = FastAPI()
 
@@ -27,14 +26,11 @@
     async def chat():
         ctx = StreamContext()
 
-        async def _work():
-            try:
-                await ctx.write_text("Hello ")
-                await ctx.write_text("world!")
-            finally:
-                await ctx.finish()
+        async def _work(c: StreamContext) -> None:
+            await c.write_text("Hello ")
+            await c.write_text("world!")
 
-        asyncio.create_task(_work())
+        await ctx.run(_work)
         return StreamingResponse(
             ctx.stream(),
             media_type="text/event-stream",

diff --git a/src/ai_sdk_stream_python/context.py b/src/ai_sdk_stream_python/context.py
@@ -34,15 +34,12 @@
     async def chat(request: ChatRequest):
         ctx = StreamContext()
 
-        async def _work():
-            try:
-                await ctx.store.set("query", request.message)
-                async for chunk in my_llm.stream(request.message):
-                    await ctx.write_text(chunk)
-            finally:
-                await ctx.finish()
+        async def _work(c: StreamContext) -> None:
+            await c.store.set("query", request.message)
+            async for chunk in my_llm.stream(request.message):
+                await c.write_text(chunk)
 
-        asyncio.create_task(_work())
+        await ctx.run(_work)
         return StreamingResponse(
             ctx.stream(),
             media_type="text/event-stream",