Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .claude/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"extraKnownMarketplaces": {
"reboot-plugin": {
"source": {
"source": "github",
"repo": "reboot-dev/reboot-plugin"
}
}
},
"enabledPlugins": {
"reboot@reboot-plugin": true
}
}
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# locally before `docker build` so that `web/dist/` contains the
# bundled UIs. This image copies that prebuilt bundle rather
# than installing Node and rebuilding it here.
FROM ghcr.io/reboot-dev/reboot-base:1.0.4
FROM ghcr.io/reboot-dev/reboot-base:1.1.0

WORKDIR /app

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ pytest backend/
`mcp_servers.json` is pre-configured. In another terminal:

```bash
npx @mcpjam/inspector@v2.4.0 --config mcp_servers.json --server agent-wiki
npx @mcpjam/inspector@2.9.3 --config mcp_servers.json --server agent-wiki
```

Try these prompts to exercise each capability. The librarian
Expand Down
13 changes: 11 additions & 2 deletions backend/src/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import asyncio
import logging
from reboot.aio.applications import Application
from reboot.aio.auth.oauth_providers import Anonymous
from reboot.aio.auth.oauth_providers import (
Development,
OAuthProviderByEnvironment,
)
from servicers.wiki import (
PageServicer,
TranscriptServicer,
Expand All @@ -25,7 +28,13 @@ async def main() -> None:
],
# `User` is an auto-constructed state type, so Reboot
# needs an OAuth provider to identify the caller.
oauth=Anonymous(),
oauth=OAuthProviderByEnvironment(
dev=Development(),
# TODO: set a real provider (e.g. `Google(...)`) before
# production; `prod=None` makes a production deployment fail
# to start until one is chosen.
prod=None,
),
)
await application.run()

Expand Down
126 changes: 73 additions & 53 deletions backend/tests/wiki_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
)
from pydantic_ai.models.function import AgentInfo, FunctionModel
from reboot.aio.applications import Application
from reboot.aio.tests import Reboot
from reboot.aio.auth.oauth_providers import Anonymous
from reboot.aio.tests import OAuthProviderForTest, Reboot
from servicers import wiki as wiki_module
from servicers.wiki import (
PageServicer,
Expand Down Expand Up @@ -54,21 +55,42 @@ def _refuse(
return FunctionModel(_refuse)


class ServicerTest(unittest.IsolatedAsyncioTestCase):
"""Unit tests for each servicer's CRUD methods. These
tests never add a transcript, so the librarian workflow
never actually runs — but we still replace the agent's
model as a belt-and-braces guard against accidental
Anthropic calls from this suite."""
def _simple_librarian_model() -> FunctionModel:
"""Return a `FunctionModel` that always returns the same
response, used by tests that want to trigger the
librarian but don't care about its behavior."""

def _respond(
messages: list[ModelMessage],
info: AgentInfo,
) -> ModelResponse:
return ModelResponse(parts=[TextPart(content="Librarian response")])

return FunctionModel(_respond)


class _WikiTestBase(unittest.IsolatedAsyncioTestCase):
"""Base class that wires up Reboot, creates an `alice` user
context, and swaps the librarian model for the duration of
each test. Subclasses override `_make_librarian_model` to
choose which stand-in model to install."""

def _make_librarian_model(self) -> FunctionModel:
raise NotImplementedError

async def asyncSetUp(self) -> None:
self._original_model = wiki_module.librarian.wrapped.model
wiki_module.librarian.wrapped.model = _null_librarian_model()
# Overwrite the librarian's model within the test, so any calls
# to LLM become deterministic.
wiki_module.librarian.wrapped.model = self._make_librarian_model()

self.rbt = Reboot()
await self.rbt.start()
await self.rbt.up(
Application(servicers=APPLICATION_SERVICERS),
Application(
servicers=APPLICATION_SERVICERS,
oauth=OAuthProviderForTest(Anonymous()),
),
)
self.user_id = "alice"
self.context = self.rbt.create_external_context(
Expand All @@ -90,6 +112,18 @@ async def asyncTearDown(self) -> None:
await self.rbt.stop()
wiki_module.librarian.wrapped.model = self._original_model


class ServicerTest(_WikiTestBase):
"""Unit tests for each servicer's CRUD methods. These
tests never add a transcript, so the librarian workflow
never actually runs — but we still replace the agent's
model as a belt-and-braces guard against accidental
Anthropic calls from this suite."""

def _make_librarian_model(self) -> FunctionModel:
# The tests should never trigger the librarian.
return _null_librarian_model()

async def test_user_create_and_list_wikis(self) -> None:
"""A user can create a wiki and then see it in their
list, keyed by the user-supplied name."""
Expand Down Expand Up @@ -180,6 +214,14 @@ async def test_transcript_crud(self) -> None:
self.assertEqual(len(got.messages), 1)
self.assertEqual(got.messages[0].content, "Goodbye")


class ServicerWithSimpleLibrarianTest(_WikiTestBase):

def _make_librarian_model(self) -> FunctionModel:
# Depending on the timing, that test might trigger the librarian
# when the transcription is added an consumed by `until`.
return _simple_librarian_model()

async def test_add_transcript_creates_transcript(
self,
) -> None:
Expand Down Expand Up @@ -228,8 +270,9 @@ class ScriptedLibrarian:

def __init__(self) -> None:
self.page_id: str | None = None
self.done = asyncio.Event()

def step(
async def step(
self,
messages: list[ModelMessage],
info: AgentInfo,
Expand Down Expand Up @@ -286,42 +329,25 @@ def step(
),
]
)

# Signal done the moment we emit the final response, which means
# the librarian has already executed `update_wiki` and the
# wiki's content is updated by the time any test code waiting on
# `done` wakes up.
self.done.set()
return ModelResponse(parts=[TextPart(content="Done.")])


class IngestWorkflowTest(unittest.IsolatedAsyncioTestCase):
class IngestWorkflowTest(_WikiTestBase):
"""End-to-end test of the `Wiki.ingest` librarian
workflow with the LLM replaced by a `FunctionModel`."""

async def asyncSetUp(self) -> None:
self.script = ScriptedLibrarian()
self._original_model = wiki_module.librarian.wrapped.model
wiki_module.librarian.wrapped.model = FunctionModel(self.script.step)

self.rbt = Reboot()
await self.rbt.start()
await self.rbt.up(
Application(servicers=APPLICATION_SERVICERS),
)
self.user_id = "alice"
self.context = self.rbt.create_external_context(
name=f"test-{self.id()}",
bearer_token=self.rbt.make_valid_oauth_access_token(
user_id=self.user_id,
),
)
# `User` is an auto-constructed state type: in
# production the MCP session's "new session" hook
# calls `_auto_construct` for the authenticated user.
# Tests don't go through that hook, so we do it here.
await UserServicer._auto_construct(
self.context,
state_id=self.user_id,
)
script = ScriptedLibrarian()

async def asyncTearDown(self) -> None:
await self.rbt.stop()
wiki_module.librarian.wrapped.model = self._original_model
def _make_librarian_model(self) -> FunctionModel:
# Scripted model that drives the librarian through a fixed
# sequence of tool calls.
return FunctionModel(self.script.step)

async def test_ingest_creates_page_and_updates_wiki(
self,
Expand Down Expand Up @@ -351,20 +377,14 @@ async def test_ingest_creates_page_and_updates_wiki(
],
)

# Poll the wiki's markdown body until the scripted
# `update_wiki` call lands. `Wiki.get` is the only
# externally observable signal — `transcripts` lives
# on the internal state, not on the `get` response.
for _ in range(100): # 10 s at 100 ms steps.
state = await wiki.get(self.context)
if state.content.startswith("# Table of contents"):
break
await asyncio.sleep(0.1)
else:
self.fail(
"Timed out waiting for librarian to rewrite "
"Wiki.content"
)
# Block until the scripted librarian signals it is
# done. `done` is set the moment `step()` emits its
# final `TextPart("Done.")`, at which point
# `update_wiki` has already executed and
# `Wiki.content` is already updated.
await self.script.done.wait()

state = await wiki.get(self.context)

# The scripted librarian should have created exactly
# one page and referenced it from the wiki's
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ dependencies = [
"uuid7>=0.1.0",
"anyio>=4.0.0",
"pydantic-ai-slim[anthropic]>=1.0.0",
"reboot==1.0.4",
"reboot==1.1.0",
]

[tool.rye]
dev-dependencies = [
"pytest>=7.4",
"reboot==1.0.4",
"reboot==1.1.0",
]

# This project only uses `rye` to provide `python` and its dependencies.
Expand Down
5 changes: 3 additions & 2 deletions requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -220,16 +220,17 @@ pyprctl==0.1.3
pytest==9.0.3
python-dateutil==2.9.0.post0
# via kubernetes-asyncio
python-dotenv==1.2.2
python-dotenv==1.2.1
# via pydantic-settings
# via reboot
python-multipart==0.0.27
# via mcp
python-ulid==3.1.0
# via reboot
pyyaml==6.0.2
# via kubernetes-asyncio
# via reboot
reboot==1.0.4
reboot==1.1.0
referencing==0.37.0
# via jsonschema
# via jsonschema-specifications
Expand Down
5 changes: 3 additions & 2 deletions requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -211,16 +211,17 @@ pyprctl==0.1.3
# via reboot
python-dateutil==2.9.0.post0
# via kubernetes-asyncio
python-dotenv==1.2.2
python-dotenv==1.2.1
# via pydantic-settings
# via reboot
python-multipart==0.0.27
# via mcp
python-ulid==3.1.0
# via reboot
pyyaml==6.0.2
# via kubernetes-asyncio
# via reboot
reboot==1.0.4
reboot==1.1.0
referencing==0.37.0
# via jsonschema
# via jsonschema-specifications
Expand Down
28 changes: 14 additions & 14 deletions web/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"dependencies": {
"@modelcontextprotocol/ext-apps": "1.5.0",
"@modelcontextprotocol/sdk": "1.29.0",
"@reboot-dev/reboot-api": "1.0.4",
"@reboot-dev/reboot-react": "1.0.4",
"@reboot-dev/reboot-api": "1.1.0",
"@reboot-dev/reboot-react": "1.1.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-markdown": "^10.1.0",
Expand Down
Loading